Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
CopyUtils.cuh
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 
12 #pragma once
13 
14 #include "DeviceTensor.cuh"
15 #include "HostTensor.cuh"
16 
17 namespace faiss { namespace gpu {
18 
19 /// Ensure the memory at `p` is either on the given device, or copy it
20 /// to the device in a new allocation.
21 /// If `resources` is provided, then we will perform a temporary
22 /// memory allocation if needed. Otherwise, we will call cudaMalloc if
23 /// needed.
24 template <typename T, int Dim>
25 DeviceTensor<T, Dim, true> toDevice(GpuResources* resources,
26  int dstDevice,
27  T* src,
28  cudaStream_t stream,
29  std::initializer_list<int> sizes) {
30  int dev = getDeviceForAddress(src);
31 
32  if (dev == dstDevice) {
33  // On device we expect
34  return DeviceTensor<T, Dim, true>(src, sizes);
35  } else {
36  // On different device or on host
37  DeviceScope scope(dstDevice);
38 
39  Tensor<T, Dim, true> oldT(src, sizes);
40 
41  if (resources) {
42  DeviceTensor<T, Dim, true> newT(resources->getMemoryManager(dstDevice),
43  sizes,
44  stream);
45 
46  newT.copyFrom(oldT, stream);
47  return newT;
48  } else {
49  DeviceTensor<T, Dim, true> newT(sizes);
50 
51  newT.copyFrom(oldT, stream);
52  return newT;
53  }
54  }
55 }
56 
57 /// Copies a device array's allocation to an address, if necessary
58 template <typename T>
59 inline void fromDevice(T* src, T* dst, size_t num, cudaStream_t stream) {
60  // It is possible that the array already represents memory at `p`,
61  // in which case no copy is needed
62  if (src == dst) {
63  return;
64  }
65 
66  int dev = getDeviceForAddress(dst);
67 
68  if (dev == -1) {
69  CUDA_VERIFY(cudaMemcpyAsync(dst,
70  src,
71  num * sizeof(T),
72  cudaMemcpyDeviceToHost,
73  stream));
74  } else {
75  CUDA_VERIFY(cudaMemcpyAsync(dst,
76  src,
77  num * sizeof(T),
78  cudaMemcpyDeviceToDevice,
79  stream));
80  }
81 }
82 
83 /// Copies a device array's allocation to an address, if necessary
84 template <typename T, int Dim>
85 void fromDevice(Tensor<T, Dim, true>& src, T* dst, cudaStream_t stream) {
86  FAISS_ASSERT(src.isContiguous());
87  fromDevice(src.data(), dst, src.numElements(), stream);
88 }
89 
90 } } // namespace