14 #include "DeviceTensor.cuh"
15 #include "HostTensor.cuh"
17 namespace faiss {
namespace gpu {
24 template <
typename T,
int Dim>
25 DeviceTensor<T, Dim, true> toDevice(GpuResources* resources,
29 std::initializer_list<int> sizes) {
30 int dev = getDeviceForAddress(src);
32 if (dev == dstDevice) {
34 return DeviceTensor<T, Dim, true>(src, sizes);
37 DeviceScope scope(dstDevice);
39 Tensor<T, Dim, true> oldT(src, sizes);
42 DeviceTensor<T, Dim, true> newT(resources->getMemoryManager(dstDevice),
46 newT.copyFrom(oldT, stream);
49 DeviceTensor<T, Dim, true> newT(sizes);
51 newT.copyFrom(oldT, stream);
59 inline void fromDevice(T* src, T* dst,
size_t num, cudaStream_t stream) {
66 int dev = getDeviceForAddress(dst);
69 CUDA_VERIFY(cudaMemcpyAsync(dst,
72 cudaMemcpyDeviceToHost,
75 CUDA_VERIFY(cudaMemcpyAsync(dst,
78 cudaMemcpyDeviceToDevice,
84 template <
typename T,
int Dim>
85 void fromDevice(Tensor<T, Dim, true>& src, T* dst, cudaStream_t stream) {
86 FAISS_ASSERT(src.isContiguous());
87 fromDevice(src.data(), dst, src.numElements(), stream);