14 namespace faiss {
namespace gpu {
16 template <
typename T,
int Dim,
bool Contig,
17 typename IndexT,
template <
typename U>
class PtrTraits>
20 Tensor<T, Dim, Contig, IndexT, PtrTraits>(),
21 state_(AllocState::NotOwner) {
24 template <
typename T,
int Dim,
bool Contig,
25 typename IndexT,
template <
typename U>
class PtrTraits>
29 Tensor<T, Dim, Contig, IndexT, PtrTraits>(),
30 state_(AllocState::NotOwner) {
34 template <
typename T,
int Dim,
bool Contig,
35 typename IndexT,
template <
typename U>
class PtrTraits>
40 if (this->state_ == AllocState::Owner) {
41 CUDA_VERIFY(cudaFree(this->data_));
47 this->state_ = t.state_; t.state_ = AllocState::NotOwner;
48 this->reservation_ = std::move(t.reservation_);
53 template <
typename T,
int Dim,
bool Contig,
54 typename IndexT,
template <
typename U>
class PtrTraits>
57 if (state_ == AllocState::Owner) {
58 FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0));
59 CUDA_VERIFY(cudaFree(this->data_));
60 this->data_ =
nullptr;
68 template <
typename T,
int Dim,
bool Contig,
69 typename IndexT,
template <
typename U>
class PtrTraits>
72 const IndexT sizes[Dim]) :
73 Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, sizes),
74 state_(AllocState::Owner) {
80 template <
typename T,
int Dim,
bool Contig,
81 typename IndexT,
template <
typename U>
class PtrTraits>
84 std::initializer_list<IndexT> sizes) :
85 Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, sizes),
86 state_(AllocState::Owner) {
90 fprintf(stderr,
"could not cudaMalloc %ld bytes!\n", this->
getSizeInBytes());
96 template <
typename T,
int Dim,
bool Contig,
97 typename IndexT,
template <
typename U>
class PtrTraits>
101 const IndexT sizes[Dim],
102 cudaStream_t stream) :
103 Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, sizes),
104 state_(AllocState::Reservation) {
108 this->
data_ = (T*) memory.get();
110 reservation_ = std::move(memory);
114 template <
typename T,
int Dim,
bool Contig,
115 typename IndexT,
template <
typename U>
class PtrTraits>
119 std::initializer_list<IndexT> sizes,
120 cudaStream_t stream) :
121 Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, sizes),
122 state_(AllocState::Reservation) {
127 this->
data_ = (T*) memory.get();
129 reservation_ = std::move(memory);
132 template <
typename T,
int Dim,
bool Contig,
133 typename IndexT,
template <
typename U>
class PtrTraits>
137 const IndexT sizes[Dim]) :
138 Tensor<T, Dim, Contig, IndexT, PtrTraits>(data, sizes),
139 state_(AllocState::NotOwner) {
142 template <
typename T,
int Dim,
bool Contig,
143 typename IndexT,
template <
typename U>
class PtrTraits>
147 std::initializer_list<IndexT> sizes) :
148 Tensor<T, Dim, Contig, IndexT, PtrTraits>(data, sizes),
149 state_(AllocState::NotOwner) {
152 template <
typename T,
int Dim,
bool Contig,
153 typename IndexT,
template <
typename U>
class PtrTraits>
157 const IndexT sizes[Dim],
158 const IndexT strides[Dim]) :
159 Tensor<T, Dim, Contig, IndexT, PtrTraits>(data, sizes, strides),
160 state_(AllocState::NotOwner) {
163 template <
typename T,
int Dim,
bool Contig,
164 typename IndexT,
template <
typename U>
class PtrTraits>
168 cudaStream_t stream) :
169 Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, t.sizes(), t.strides()),
170 state_(AllocState::Owner) {
177 template <
typename T,
int Dim,
bool Contig,
178 typename IndexT,
template <
typename U>
class PtrTraits>
183 cudaStream_t stream) :
184 Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, t.sizes(), t.strides()),
185 state_(AllocState::Reservation) {
189 this->
data_ = (T*) memory.get();
191 reservation_ = std::move(memory);
196 template <
typename T,
int Dim,
bool Contig,
197 typename IndexT,
template <
typename U>
class PtrTraits>
200 cudaStream_t stream) {
203 FAISS_ASSERT(this->isContiguous());
205 CUDA_VERIFY(cudaMemsetAsync(
206 this->data_, 0, this->getSizeInBytes(), stream));
__host__ DeviceTensor< T, Dim, Contig, IndexT, PtrTraits > & operator=(DeviceTensor< T, Dim, Contig, IndexT, PtrTraits > &&t)
Move assignment.
__host__ DeviceTensor()
Default constructor.
DataPtrType data_
Raw pointer to where the tensor data begins.
__host__ __device__ Tensor< T, Dim, Contig, IndexT, PtrTraits > & operator=(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t)=default
Assignment.
__host__ DeviceTensor< T, Dim, Contig, IndexT, PtrTraits > & zero(cudaStream_t stream)
Call to zero out memory.
virtual DeviceMemoryReservation getMemory(cudaStream_t stream, size_t size)=0
__host__ ~DeviceTensor()
Destructor.
__host__ __device__ size_t getSizeInBytes() const
__host__ void copyFrom(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
Manages temporary memory allocations on a GPU device.