Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
CopyUtils.cuh
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #pragma once
12 
13 #include "DeviceTensor.cuh"
14 #include "HostTensor.cuh"
15 
16 namespace faiss { namespace gpu {
17 
18 /// Ensure the memory at `p` is either on the given device, or copy it
19 /// to the device in a new allocation.
20 /// If `resources` is provided, then we will perform a temporary
21 /// memory allocation if needed. Otherwise, we will call cudaMalloc if
22 /// needed.
23 template <typename T, int Dim>
24 DeviceTensor<T, Dim, true> toDevice(GpuResources* resources,
25  int dstDevice,
26  T* src,
27  cudaStream_t stream,
28  std::initializer_list<int> sizes) {
29  int dev = getDeviceForAddress(src);
30 
31  if (dev == dstDevice) {
32  // On device we expect
33  return DeviceTensor<T, Dim, true>(src, sizes);
34  } else {
35  // On different device or on host
36  DeviceScope scope(dstDevice);
37 
38  Tensor<T, Dim, true> oldT(src, sizes);
39 
40  if (resources) {
41  DeviceTensor<T, Dim, true> newT(resources->getMemoryManager(dstDevice),
42  sizes,
43  stream);
44 
45  newT.copyFrom(oldT, stream);
46  return newT;
47  } else {
48  DeviceTensor<T, Dim, true> newT(sizes);
49 
50  newT.copyFrom(oldT, stream);
51  return newT;
52  }
53  }
54 }
55 
56 /// Copies a device array's allocation to an address, if necessary
57 template <typename T>
58 inline void fromDevice(T* src, T* dst, size_t num, cudaStream_t stream) {
59  // It is possible that the array already represents memory at `p`,
60  // in which case no copy is needed
61  if (src == dst) {
62  return;
63  }
64 
65  int dev = getDeviceForAddress(dst);
66 
67  if (dev == -1) {
68  CUDA_VERIFY(cudaMemcpyAsync(dst,
69  src,
70  num * sizeof(T),
71  cudaMemcpyDeviceToHost,
72  stream));
73  } else {
74  CUDA_VERIFY(cudaMemcpyAsync(dst,
75  src,
76  num * sizeof(T),
77  cudaMemcpyDeviceToDevice,
78  stream));
79  }
80 }
81 
82 /// Copies a device array's allocation to an address, if necessary
83 template <typename T, int Dim>
84 void fromDevice(Tensor<T, Dim, true>& src, T* dst, cudaStream_t stream) {
85  FAISS_ASSERT(src.isContiguous());
86  fromDevice(src.data(), dst, src.numElements(), stream);
87 }
88 
89 } } // namespace