/** * Copyright (c) Facebook, Inc. and its affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include // std::move namespace faiss { namespace gpu { template class PtrTraits> __host__ DeviceTensor::DeviceTensor() : Tensor(), state_(AllocState::NotOwner), space_(MemorySpace::Device) { } template class PtrTraits> __host__ DeviceTensor::DeviceTensor( DeviceTensor&& t) : Tensor(), state_(AllocState::NotOwner), space_(MemorySpace::Device) { this->operator=(std::move(t)); } template class PtrTraits> __host__ DeviceTensor& DeviceTensor::operator=( DeviceTensor&& t) { if (this->state_ == AllocState::Owner) { CUDA_VERIFY(cudaFree(this->data_)); } this->Tensor::operator=( std::move(t)); this->state_ = t.state_; t.state_ = AllocState::NotOwner; this->space_ = t.space_; this->reservation_ = std::move(t.reservation_); return *this; } template class PtrTraits> __host__ DeviceTensor::~DeviceTensor() { if (state_ == AllocState::Owner) { FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0)); CUDA_VERIFY(cudaFree(this->data_)); this->data_ = nullptr; } // Otherwise, if we have a temporary memory reservation, then its // destructor will return the reservation } template class PtrTraits> __host__ DeviceTensor::DeviceTensor( const IndexT sizes[Dim], MemorySpace space) : Tensor(nullptr, sizes), state_(AllocState::Owner), space_(space) { allocMemorySpace(space, &this->data_, this->getSizeInBytes()); FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0)); } template class PtrTraits> __host__ DeviceTensor::DeviceTensor( std::initializer_list sizes, MemorySpace space) : Tensor(nullptr, sizes), state_(AllocState::Owner), space_(space) { allocMemorySpace(space, &this->data_, this->getSizeInBytes()); FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0)); } // memory reservation constructor template class PtrTraits> __host__ DeviceTensor::DeviceTensor( DeviceMemory& m, const IndexT sizes[Dim], cudaStream_t stream, MemorySpace space) : Tensor(nullptr, sizes), state_(AllocState::Reservation), space_(space) { // FIXME: add MemorySpace to DeviceMemory auto memory = m.getMemory(stream, this->getSizeInBytes()); this->data_ = (T*) memory.get(); FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0)); reservation_ = std::move(memory); } // memory reservation constructor template class PtrTraits> __host__ DeviceTensor::DeviceTensor( DeviceMemory& m, std::initializer_list sizes, cudaStream_t stream, MemorySpace space) : Tensor(nullptr, sizes), state_(AllocState::Reservation), space_(space) { // FIXME: add MemorySpace to DeviceMemory auto memory = m.getMemory(stream, this->getSizeInBytes()); this->data_ = (T*) memory.get(); FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0)); reservation_ = std::move(memory); } template class PtrTraits> __host__ DeviceTensor::DeviceTensor( DataPtrType data, const IndexT sizes[Dim], MemorySpace space) : Tensor(data, sizes), state_(AllocState::NotOwner), space_(space) { } template class PtrTraits> __host__ DeviceTensor::DeviceTensor( DataPtrType data, std::initializer_list sizes, MemorySpace space) : Tensor(data, sizes), state_(AllocState::NotOwner), space_(space) { } template class PtrTraits> __host__ DeviceTensor::DeviceTensor( DataPtrType data, const IndexT sizes[Dim], const IndexT strides[Dim], MemorySpace space) : Tensor(data, sizes, strides), state_(AllocState::NotOwner), space_(space) { } template class PtrTraits> __host__ DeviceTensor::DeviceTensor( Tensor& t, cudaStream_t stream, MemorySpace space) : Tensor(nullptr, t.sizes(), t.strides()), state_(AllocState::Owner), space_(space) { allocMemorySpace(space_, &this->data_, this->getSizeInBytes()); FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0)); this->copyFrom(t, stream); } template class PtrTraits> __host__ DeviceTensor::DeviceTensor( DeviceMemory& m, Tensor& t, cudaStream_t stream, MemorySpace space) : Tensor(nullptr, t.sizes(), t.strides()), state_(AllocState::Reservation), space_(space) { // FIXME: add MemorySpace to DeviceMemory auto memory = m.getMemory(stream, this->getSizeInBytes()); this->data_ = (T*) memory.get(); FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0)); reservation_ = std::move(memory); this->copyFrom(t, stream); } template class PtrTraits> __host__ DeviceTensor& DeviceTensor::zero( cudaStream_t stream) { if (this->data_) { // Region must be contiguous FAISS_ASSERT(this->isContiguous()); CUDA_VERIFY(cudaMemsetAsync( this->data_, 0, this->getSizeInBytes(), stream)); } return *this; } } } // namespace