faiss/gpu/utils/DeviceTensor-inl.cuh

213 lines
6.7 KiB
Plaintext

/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the CC-by-NC license found in the
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include <utility> // std::move
namespace faiss { namespace gpu {
template <typename T, int Dim, bool Contig,
typename IndexT, template <typename U> class PtrTraits>
__host__
DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor() :
Tensor<T, Dim, Contig, IndexT, PtrTraits>(),
state_(AllocState::NotOwner) {
}
template <typename T, int Dim, bool Contig,
typename IndexT, template <typename U> class PtrTraits>
__host__
DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor(
DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>&& t) :
Tensor<T, Dim, Contig, IndexT, PtrTraits>(),
state_(AllocState::NotOwner) {
this->operator=(std::move(t));
}
template <typename T, int Dim, bool Contig,
typename IndexT, template <typename U> class PtrTraits>
__host__
DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>&
DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::operator=(
DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>&& t) {
if (this->state_ == AllocState::Owner) {
CUDA_VERIFY(cudaFree(this->data_));
}
this->Tensor<T, Dim, Contig, IndexT, PtrTraits>::operator=(
std::move(t));
this->state_ = t.state_; t.state_ = AllocState::NotOwner;
this->reservation_ = std::move(t.reservation_);
return *this;
}
template <typename T, int Dim, bool Contig,
typename IndexT, template <typename U> class PtrTraits>
__host__
DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::~DeviceTensor() {
if (state_ == AllocState::Owner) {
FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0));
CUDA_VERIFY(cudaFree(this->data_));
this->data_ = nullptr;
}
// Otherwise, if we have a temporary memory reservation, then its
// destructor will return the reservation
}
// cudaMalloc constructor
template <typename T, int Dim, bool Contig,
typename IndexT, template <typename U> class PtrTraits>
__host__
DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor(
const IndexT sizes[Dim]) :
Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, sizes),
state_(AllocState::Owner) {
CUDA_VERIFY(cudaMalloc(&this->data_, this->getSizeInBytes()));
FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0));
}
template <typename T, int Dim, bool Contig,
typename IndexT, template <typename U> class PtrTraits>
__host__
DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor(
std::initializer_list<IndexT> sizes) :
Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, sizes),
state_(AllocState::Owner) {
CUDA_VERIFY(cudaMalloc(&this->data_, this->getSizeInBytes()));
if(!(this->data_ || this->getSizeInBytes() == 0)) {
fprintf(stderr, "could not cudaMalloc %ld bytes!\n", this->getSizeInBytes());
abort();
}
}
// memory reservation constructor
template <typename T, int Dim, bool Contig,
typename IndexT, template <typename U> class PtrTraits>
__host__
DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor(
DeviceMemory& m,
const IndexT sizes[Dim],
cudaStream_t stream) :
Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, sizes),
state_(AllocState::Reservation) {
auto memory = m.getMemory(stream, this->getSizeInBytes());
this->data_ = (T*) memory.get();
FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0));
reservation_ = std::move(memory);
}
// memory reservation constructor
template <typename T, int Dim, bool Contig,
typename IndexT, template <typename U> class PtrTraits>
__host__
DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor(
DeviceMemory& m,
std::initializer_list<IndexT> sizes,
cudaStream_t stream) :
Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, sizes),
state_(AllocState::Reservation) {
auto memory =
m.getMemory(stream, this->getSizeInBytes());
this->data_ = (T*) memory.get();
FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0));
reservation_ = std::move(memory);
}
template <typename T, int Dim, bool Contig,
typename IndexT, template <typename U> class PtrTraits>
__host__
DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor(
DataPtrType data,
const IndexT sizes[Dim]) :
Tensor<T, Dim, Contig, IndexT, PtrTraits>(data, sizes),
state_(AllocState::NotOwner) {
}
template <typename T, int Dim, bool Contig,
typename IndexT, template <typename U> class PtrTraits>
__host__
DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor(
DataPtrType data,
std::initializer_list<IndexT> sizes) :
Tensor<T, Dim, Contig, IndexT, PtrTraits>(data, sizes),
state_(AllocState::NotOwner) {
}
template <typename T, int Dim, bool Contig,
typename IndexT, template <typename U> class PtrTraits>
__host__
DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor(
DataPtrType data,
const IndexT sizes[Dim],
const IndexT strides[Dim]) :
Tensor<T, Dim, Contig, IndexT, PtrTraits>(data, sizes, strides),
state_(AllocState::NotOwner) {
}
template <typename T, int Dim, bool Contig,
typename IndexT, template <typename U> class PtrTraits>
__host__
DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor(
Tensor<T, Dim, Contig, IndexT, PtrTraits>& t,
cudaStream_t stream) :
Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, t.sizes(), t.strides()),
state_(AllocState::Owner) {
CUDA_VERIFY(cudaMalloc(&this->data_, this->getSizeInBytes()));
FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0));
this->copyFrom(t, stream);
}
template <typename T, int Dim, bool Contig,
typename IndexT, template <typename U> class PtrTraits>
__host__
DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::DeviceTensor(
DeviceMemory& m,
Tensor<T, Dim, Contig, IndexT, PtrTraits>& t,
cudaStream_t stream) :
Tensor<T, Dim, Contig, IndexT, PtrTraits>(nullptr, t.sizes(), t.strides()),
state_(AllocState::Reservation) {
auto memory = m.getMemory(stream, this->getSizeInBytes());
this->data_ = (T*) memory.get();
FAISS_ASSERT(this->data_ || (this->getSizeInBytes() == 0));
reservation_ = std::move(memory);
this->copyFrom(t, stream);
}
template <typename T, int Dim, bool Contig,
typename IndexT, template <typename U> class PtrTraits>
__host__ DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>&
DeviceTensor<T, Dim, Contig, IndexT, PtrTraits>::zero(
cudaStream_t stream) {
if (this->data_) {
// Region must be contiguous
FAISS_ASSERT(this->isContiguous());
CUDA_VERIFY(cudaMemsetAsync(
this->data_, 0, this->getSizeInBytes(), stream));
}
return *this;
}
} } // namespace