Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
CopyUtils.cuh
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 
10 #pragma once
11 
12 #include "DeviceTensor.cuh"
13 #include "HostTensor.cuh"
14 
15 namespace faiss { namespace gpu {
16 
17 /// Ensure the memory at `p` is either on the given device, or copy it
18 /// to the device in a new allocation.
19 /// If `resources` is provided, then we will perform a temporary
20 /// memory allocation if needed. Otherwise, we will call cudaMalloc if
21 /// needed.
22 template <typename T, int Dim>
23 DeviceTensor<T, Dim, true> toDevice(GpuResources* resources,
24  int dstDevice,
25  T* src,
26  cudaStream_t stream,
27  std::initializer_list<int> sizes) {
28  int dev = getDeviceForAddress(src);
29 
30  if (dev == dstDevice) {
31  // On device we expect
32  return DeviceTensor<T, Dim, true>(src, sizes);
33  } else {
34  // On different device or on host
35  DeviceScope scope(dstDevice);
36 
37  Tensor<T, Dim, true> oldT(src, sizes);
38 
39  if (resources) {
40  DeviceTensor<T, Dim, true> newT(resources->getMemoryManager(dstDevice),
41  sizes,
42  stream);
43 
44  newT.copyFrom(oldT, stream);
45  return newT;
46  } else {
47  DeviceTensor<T, Dim, true> newT(sizes);
48 
49  newT.copyFrom(oldT, stream);
50  return newT;
51  }
52  }
53 }
54 
55 /// Copies a device array's allocation to an address, if necessary
56 template <typename T>
57 inline void fromDevice(T* src, T* dst, size_t num, cudaStream_t stream) {
58  // It is possible that the array already represents memory at `p`,
59  // in which case no copy is needed
60  if (src == dst) {
61  return;
62  }
63 
64  int dev = getDeviceForAddress(dst);
65 
66  if (dev == -1) {
67  CUDA_VERIFY(cudaMemcpyAsync(dst,
68  src,
69  num * sizeof(T),
70  cudaMemcpyDeviceToHost,
71  stream));
72  } else {
73  CUDA_VERIFY(cudaMemcpyAsync(dst,
74  src,
75  num * sizeof(T),
76  cudaMemcpyDeviceToDevice,
77  stream));
78  }
79 }
80 
81 /// Copies a device array's allocation to an address, if necessary
82 template <typename T, int Dim>
83 void fromDevice(Tensor<T, Dim, true>& src, T* dst, cudaStream_t stream) {
84  FAISS_ASSERT(src.isContiguous());
85  fromDevice(src.data(), dst, src.numElements(), stream);
86 }
87 
88 } } // namespace