13 #include "DeviceTensor.cuh"
14 #include "HostTensor.cuh"
16 namespace faiss {
namespace gpu {
23 template <
typename T,
int Dim>
24 DeviceTensor<T, Dim, true> toDevice(GpuResources* resources,
28 std::initializer_list<int> sizes) {
29 int dev = getDeviceForAddress(src);
31 if (dev == dstDevice) {
33 return DeviceTensor<T, Dim, true>(src, sizes);
36 DeviceScope scope(dstDevice);
38 Tensor<T, Dim, true> oldT(src, sizes);
41 DeviceTensor<T, Dim, true> newT(resources->getMemoryManager(dstDevice),
45 newT.copyFrom(oldT, stream);
48 DeviceTensor<T, Dim, true> newT(sizes);
50 newT.copyFrom(oldT, stream);
58 inline void fromDevice(T* src, T* dst,
size_t num, cudaStream_t stream) {
65 int dev = getDeviceForAddress(dst);
68 CUDA_VERIFY(cudaMemcpyAsync(dst,
71 cudaMemcpyDeviceToHost,
74 CUDA_VERIFY(cudaMemcpyAsync(dst,
77 cudaMemcpyDeviceToDevice,
83 template <
typename T,
int Dim>
84 void fromDevice(Tensor<T, Dim, true>& src, T* dst, cudaStream_t stream) {
85 FAISS_ASSERT(src.isContiguous());
86 fromDevice(src.data(), dst, src.numElements(), stream);