13 namespace faiss {
namespace gpu {
15 template <
typename T,
int Dim,
bool Contig,
16 typename IndexT,
template <
typename U>
class PtrTraits>
19 Tensor<T, Dim, Contig, IndexT, PtrTraits>(),
20 state_(AllocState::NotOwner),
21 space_(MemorySpace::Device) {
24 template <
typename T,
int Dim,
bool Contig,
25 typename IndexT,
template <
typename U>
class PtrTraits>
29 Tensor<T, Dim, Contig, IndexT, PtrTraits>(),
30 state_(AllocState::NotOwner),
31 space_(MemorySpace::Device) {
35 template <
typename T,
int Dim,
bool Contig,
36 typename IndexT,
template <
typename U>
class PtrTraits>
41 if (this->state_ == AllocState::Owner) {
42 CUDA_VERIFY(cudaFree(this->data_));
48 this->state_ = t.state_; t.state_ = AllocState::NotOwner;
49 this->space_ = t.space_;
50 this->reservation_ = std::move(t.reservation_);
55 template <
typename T,
int Dim,
bool Contig,
56 typename IndexT,
template <
typename U>
class PtrTraits>
59 if (state_ == AllocState::Owner) {
60 FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0));
61 CUDA_VERIFY(cudaFree(this->data_));
62 this->data_ =
nullptr;
69 template <
typename T,
int Dim,
bool Contig,
70 typename IndexT,
template <
typename U>
class PtrTraits>
73 const IndexT sizes[Dim],
75 Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, sizes),
76 state_(AllocState::Owner),
83 template <
typename T,
int Dim,
bool Contig,
84 typename IndexT,
template <
typename U>
class PtrTraits>
87 std::initializer_list<IndexT> sizes,
89 Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, sizes),
90 state_(AllocState::Owner),
98 template <
typename T,
int Dim,
bool Contig,
99 typename IndexT,
template <
typename U>
class PtrTraits>
103 const IndexT sizes[Dim],
106 Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, sizes),
107 state_(AllocState::Reservation),
113 this->
data_ = (T*) memory.get();
115 reservation_ = std::move(memory);
119 template <
typename T,
int Dim,
bool Contig,
120 typename IndexT,
template <
typename U>
class PtrTraits>
124 std::initializer_list<IndexT> sizes,
127 Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, sizes),
128 state_(AllocState::Reservation),
134 this->
data_ = (T*) memory.get();
136 reservation_ = std::move(memory);
139 template <
typename T,
int Dim,
bool Contig,
140 typename IndexT,
template <
typename U>
class PtrTraits>
144 const IndexT sizes[Dim],
146 Tensor<T, Dim, Contig, IndexT, PtrTraits>(data, sizes),
147 state_(AllocState::NotOwner),
151 template <
typename T,
int Dim,
bool Contig,
152 typename IndexT,
template <
typename U>
class PtrTraits>
156 std::initializer_list<IndexT> sizes,
158 Tensor<T, Dim, Contig, IndexT, PtrTraits>(data, sizes),
159 state_(AllocState::NotOwner),
163 template <
typename T,
int Dim,
bool Contig,
164 typename IndexT,
template <
typename U>
class PtrTraits>
168 const IndexT sizes[Dim],
169 const IndexT strides[Dim],
171 Tensor<T, Dim, Contig, IndexT, PtrTraits>(data, sizes, strides),
172 state_(AllocState::NotOwner),
176 template <
typename T,
int Dim,
bool Contig,
177 typename IndexT,
template <
typename U>
class PtrTraits>
183 Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, t.sizes(), t.strides()),
184 state_(AllocState::Owner),
192 template <
typename T,
int Dim,
bool Contig,
193 typename IndexT,
template <
typename U>
class PtrTraits>
200 Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, t.sizes(), t.strides()),
201 state_(AllocState::Reservation),
207 this->
data_ = (T*) memory.get();
209 reservation_ = std::move(memory);
214 template <
typename T,
int Dim,
bool Contig,
215 typename IndexT,
template <
typename U>
class PtrTraits>
218 cudaStream_t stream) {
221 FAISS_ASSERT(this->isContiguous());
223 CUDA_VERIFY(cudaMemsetAsync(
224 this->data_, 0, this->getSizeInBytes(), stream));
__host__ DeviceTensor< T, Dim, Contig, IndexT, PtrTraits > & operator=(DeviceTensor< T, Dim, Contig, IndexT, PtrTraits > &&t)
Move assignment.
__host__ DeviceTensor()
Default constructor.
DataPtrType data_
Raw pointer to where the tensor data begins.
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > & operator=(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t)=default
Assignment.
__host__ DeviceTensor< T, Dim, Contig, IndexT, PtrTraits > & zero(cudaStream_t stream)
Call to zero out memory.
virtual DeviceMemoryReservation getMemory(cudaStream_t stream, size_t size)=0
__host__ ~DeviceTensor()
Destructor.
__host__ __device__ size_t getSizeInBytes() const
__host__ void copyFrom(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
Manages temporary memory allocations on a GPU device.