Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
CopyUtils.cuh
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 
9 #pragma once
10 
11 #include "DeviceTensor.cuh"
12 #include "HostTensor.cuh"
13 
14 namespace faiss { namespace gpu {
15 
16 /// Ensure the memory at `p` is either on the given device, or copy it
17 /// to the device in a new allocation.
18 /// If `resources` is provided, then we will perform a temporary
19 /// memory allocation if needed. Otherwise, we will call cudaMalloc if
20 /// needed.
21 template <typename T, int Dim>
22 DeviceTensor<T, Dim, true> toDevice(GpuResources* resources,
23  int dstDevice,
24  T* src,
25  cudaStream_t stream,
26  std::initializer_list<int> sizes) {
27  int dev = getDeviceForAddress(src);
28 
29  if (dev == dstDevice) {
30  // On device we expect
31  return DeviceTensor<T, Dim, true>(src, sizes);
32  } else {
33  // On different device or on host
34  DeviceScope scope(dstDevice);
35 
36  Tensor<T, Dim, true> oldT(src, sizes);
37 
38  if (resources) {
39  DeviceTensor<T, Dim, true> newT(resources->getMemoryManager(dstDevice),
40  sizes,
41  stream);
42 
43  newT.copyFrom(oldT, stream);
44  return newT;
45  } else {
46  DeviceTensor<T, Dim, true> newT(sizes);
47 
48  newT.copyFrom(oldT, stream);
49  return newT;
50  }
51  }
52 }
53 
54 /// Copies a device array's allocation to an address, if necessary
55 template <typename T>
56 inline void fromDevice(T* src, T* dst, size_t num, cudaStream_t stream) {
57  // It is possible that the array already represents memory at `p`,
58  // in which case no copy is needed
59  if (src == dst) {
60  return;
61  }
62 
63  int dev = getDeviceForAddress(dst);
64 
65  if (dev == -1) {
66  CUDA_VERIFY(cudaMemcpyAsync(dst,
67  src,
68  num * sizeof(T),
69  cudaMemcpyDeviceToHost,
70  stream));
71  } else {
72  CUDA_VERIFY(cudaMemcpyAsync(dst,
73  src,
74  num * sizeof(T),
75  cudaMemcpyDeviceToDevice,
76  stream));
77  }
78 }
79 
80 /// Copies a device array's allocation to an address, if necessary
81 template <typename T, int Dim>
82 void fromDevice(Tensor<T, Dim, true>& src, T* dst, cudaStream_t stream) {
83  FAISS_ASSERT(src.isContiguous());
84  fromDevice(src.data(), dst, src.numElements(), stream);
85 }
86 
87 } } // namespace