11 #include "DeviceTensor.cuh"
12 #include "HostTensor.cuh"
14 namespace faiss {
namespace gpu {
21 template <
typename T,
int Dim>
22 DeviceTensor<T, Dim, true> toDevice(GpuResources* resources,
26 std::initializer_list<int> sizes) {
27 int dev = getDeviceForAddress(src);
29 if (dev == dstDevice) {
31 return DeviceTensor<T, Dim, true>(src, sizes);
34 DeviceScope scope(dstDevice);
36 Tensor<T, Dim, true> oldT(src, sizes);
39 DeviceTensor<T, Dim, true> newT(resources->getMemoryManager(dstDevice),
43 newT.copyFrom(oldT, stream);
46 DeviceTensor<T, Dim, true> newT(sizes);
48 newT.copyFrom(oldT, stream);
56 inline void fromDevice(T* src, T* dst,
size_t num, cudaStream_t stream) {
63 int dev = getDeviceForAddress(dst);
66 CUDA_VERIFY(cudaMemcpyAsync(dst,
69 cudaMemcpyDeviceToHost,
72 CUDA_VERIFY(cudaMemcpyAsync(dst,
75 cudaMemcpyDeviceToDevice,
81 template <
typename T,
int Dim>
82 void fromDevice(Tensor<T, Dim, true>& src, T* dst, cudaStream_t stream) {
83 FAISS_ASSERT(src.isContiguous());
84 fromDevice(src.data(), dst, src.numElements(), stream);