12 #include "DeviceTensor.cuh"
13 #include "HostTensor.cuh"
15 namespace faiss {
namespace gpu {
22 template <
typename T,
int Dim>
23 DeviceTensor<T, Dim, true> toDevice(GpuResources* resources,
27 std::initializer_list<int> sizes) {
28 int dev = getDeviceForAddress(src);
30 if (dev == dstDevice) {
32 return DeviceTensor<T, Dim, true>(src, sizes);
35 DeviceScope scope(dstDevice);
37 Tensor<T, Dim, true> oldT(src, sizes);
40 DeviceTensor<T, Dim, true> newT(resources->getMemoryManager(dstDevice),
44 newT.copyFrom(oldT, stream);
47 DeviceTensor<T, Dim, true> newT(sizes);
49 newT.copyFrom(oldT, stream);
57 inline void fromDevice(T* src, T* dst,
size_t num, cudaStream_t stream) {
64 int dev = getDeviceForAddress(dst);
67 CUDA_VERIFY(cudaMemcpyAsync(dst,
70 cudaMemcpyDeviceToHost,
73 CUDA_VERIFY(cudaMemcpyAsync(dst,
76 cudaMemcpyDeviceToDevice,
82 template <
typename T,
int Dim>
83 void fromDevice(Tensor<T, Dim, true>& src, T* dst, cudaStream_t stream) {
84 FAISS_ASSERT(src.isContiguous());
85 fromDevice(src.data(), dst, src.numElements(), stream);