docs/html/DeviceTensor-inl_8cuh_source.html

 /**

  * Copyright (c) 2015-present, Facebook, Inc.

  * All rights reserved.

  *

  * This source code is licensed under the CC-by-NC license found in the

  * LICENSE file in the root directory of this source tree.

  */


 // Copyright 2004-present Facebook. All Rights Reserved.


 #include <utility> // std::move


 namespace faiss { namespace gpu {


 template <typename T, int Dim, bool Contig,

           typename IndexT, template <typename U> class PtrTraits>

 __host__

 DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor() :

     Tensor<T, Dim, Contig, IndexT, PtrTraits>(),

     state_(AllocState::NotOwner) {

 }


 template <typename T, int Dim, bool Contig,

           typename IndexT, template <typename U> class PtrTraits>

 __host__

 DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor(

   DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>&& t) :

     Tensor<T, Dim, Contig, IndexT, PtrTraits>(),

     state_(AllocState::NotOwner) {

   this->operator=(std::move(t));

 }


 template <typename T, int Dim, bool Contig,

           typename IndexT, template <typename U> class PtrTraits>

 __host__

 DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>&

 DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::operator=(

   DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>&& t) {

   if (this->state_ == AllocState::Owner) {

     CUDA_VERIFY(cudaFree(this->data_));

   }


   this->Tensor<T, Dim, Contig, IndexT, PtrTraits>::operator=(

     std::move(t));


   this->state_ = t.state_; t.state_ = AllocState::NotOwner;

   this->reservation_ = std::move(t.reservation_);


   return *this;

 }


 template <typename T, int Dim, bool Contig,

           typename IndexT, template <typename U> class PtrTraits>

 __host__

 DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::~DeviceTensor() {

   if (state_ == AllocState::Owner) {

     FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0));

     CUDA_VERIFY(cudaFree(this->data_));

     this->data_ = nullptr;

   }


   // Otherwise, if we have a temporary memory reservation, then its

   // destructor will return the reservation

 }


 // cudaMalloc constructor

 template <typename T, int Dim, bool Contig,

           typename IndexT, template <typename U> class PtrTraits>

 __host__

 DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor(

   const IndexT sizes[Dim]) :

     Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, sizes),

     state_(AllocState::Owner) {


   CUDA_VERIFY(cudaMalloc(&this->data_, this->getSizeInBytes()));

   FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0));

 }


 template <typename T, int Dim, bool Contig,

           typename IndexT, template <typename U> class PtrTraits>

 __host__

 DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor(

   std::initializer_list<IndexT> sizes) :

     Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, sizes),

     state_(AllocState::Owner) {


   CUDA_VERIFY(cudaMalloc(&this->data_, this->getSizeInBytes()));

   if(!(this->data_ || this->getSizeInBytes() == 0)) {

     fprintf(stderr, "could not cudaMalloc %ld bytes!\n",  this->getSizeInBytes());

     abort();

   }

 }


 // memory reservation constructor

 template <typename T, int Dim, bool Contig,

           typename IndexT, template <typename U> class PtrTraits>

 __host__

 DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor(

   DeviceMemory& m,

   const IndexT sizes[Dim],

   cudaStream_t stream) :

     Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, sizes),

     state_(AllocState::Reservation) {


   auto memory = m.getMemory(stream, this->getSizeInBytes());


   this->data_ = (T*) memory.get();

   FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0));

   reservation_ = std::move(memory);

 }


 // memory reservation constructor

 template <typename T, int Dim, bool Contig,

           typename IndexT, template <typename U> class PtrTraits>

 __host__

 DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor(

   DeviceMemory& m,

   std::initializer_list<IndexT> sizes,

   cudaStream_t stream) :

     Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, sizes),

     state_(AllocState::Reservation) {


   auto memory =

     m.getMemory(stream, this->getSizeInBytes());


   this->data_ = (T*) memory.get();

   FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0));

   reservation_ = std::move(memory);

 }


 template <typename T, int Dim, bool Contig,

           typename IndexT, template <typename U> class PtrTraits>

 __host__

 DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor(

   DataPtrType data,

   const IndexT sizes[Dim]) :

     Tensor<T, Dim, Contig, IndexT, PtrTraits>(data, sizes),

     state_(AllocState::NotOwner) {

 }


 template <typename T, int Dim, bool Contig,

           typename IndexT, template <typename U> class PtrTraits>

 __host__

 DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor(

   DataPtrType data,

   std::initializer_list<IndexT> sizes) :

     Tensor<T, Dim, Contig, IndexT, PtrTraits>(data, sizes),

     state_(AllocState::NotOwner) {

 }


 template <typename T, int Dim, bool Contig,

           typename IndexT, template <typename U> class PtrTraits>

 __host__

 DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor(

   DataPtrType data,

   const IndexT sizes[Dim],

   const IndexT strides[Dim]) :

     Tensor<T, Dim, Contig, IndexT, PtrTraits>(data, sizes, strides),

     state_(AllocState::NotOwner) {

 }


 template <typename T, int Dim, bool Contig,

           typename IndexT, template <typename U> class PtrTraits>

 __host__

 DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor(

   Tensor<T, Dim, Contig, IndexT, PtrTraits>& t,

   cudaStream_t stream) :

     Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, t.sizes(), t.strides()),

     state_(AllocState::Owner) {


   CUDA_VERIFY(cudaMalloc(&this->data_, this->getSizeInBytes()));

   FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0));

   this->copyFrom(t, stream);

 }


 template <typename T, int Dim, bool Contig,

           typename IndexT, template <typename U> class PtrTraits>

 __host__

 DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor(

   DeviceMemory& m,

   Tensor<T, Dim, Contig, IndexT, PtrTraits>& t,

   cudaStream_t stream) :

     Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, t.sizes(), t.strides()),

     state_(AllocState::Reservation) {


   auto memory = m.getMemory(stream, this->getSizeInBytes());


   this->data_ = (T*) memory.get();

   FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0));

   reservation_ = std::move(memory);


   this->copyFrom(t, stream);

 }


 template <typename T, int Dim, bool Contig,

           typename IndexT, template <typename U> class PtrTraits>

 __host__ DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>&

 DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::zero(

   cudaStream_t stream) {

   if (this->data_) {

     // Region must be contiguous

     FAISS_ASSERT(this->isContiguous());


     CUDA_VERIFY(cudaMemsetAsync(

                   this->data_, 0, this->getSizeInBytes(), stream));

   }


   return *this;

 }


 } } // namespace

faiss::gpu::DeviceTensor::operator=
__host__ DeviceTensor< T, Dim, Contig, IndexT, PtrTraits > & operator=(DeviceTensor< T, Dim, Contig, IndexT, PtrTraits > &&t)
Move assignment.
Definition: DeviceTensor-inl.cuh:38

faiss::gpu::DeviceTensor::DeviceTensor
__host__ DeviceTensor()
Default constructor.
Definition: DeviceTensor-inl.cuh:19

faiss::gpu::Tensor::data_
DataPtrType data_
Raw pointer to where the tensor data begins.
Definition: Tensor.cuh:331

faiss::gpu::Tensor::operator=
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > & operator=(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t)=default
Assignment.

faiss::gpu::DeviceTensor::zero
__host__ DeviceTensor< T, Dim, Contig, IndexT, PtrTraits > & zero(cudaStream_t stream)
Call to zero out memory.
Definition: DeviceTensor-inl.cuh:199

faiss::gpu::DeviceMemory::getMemory
virtual DeviceMemoryReservation getMemory(cudaStream_t stream, size_t size)=0

faiss::gpu::DeviceTensor::~DeviceTensor
__host__ ~DeviceTensor()
Destructor.
Definition: DeviceTensor-inl.cuh:56

faiss::gpu::Tensor
Our tensor type.
Definition: Tensor.cuh:31

faiss::gpu::Tensor::getSizeInBytes
__host__ __device__ size_t getSizeInBytes() const
Definition: Tensor.cuh:226

faiss::gpu::Tensor::copyFrom
__host__ void copyFrom(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
Definition: Tensor-inl.cuh:101

faiss::gpu::DeviceMemory
Manages temporary memory allocations on a GPU device.
Definition: DeviceMemory.h:47

faiss::gpu::DeviceTensor
Definition: DeviceTensor.cuh:24