13 #include "../../FaissAssert.h"
14 #include "DeviceUtils.h"
15 #include "MemorySpace.h"
16 #include "StaticUtils.h"
21 namespace faiss {
namespace gpu {
43 CUDA_VERIFY(cudaFree(data_));
49 size_t size()
const {
return num_; }
50 size_t capacity()
const {
return capacity_; }
51 T* data() {
return data_; }
52 const T* data()
const {
return data_; }
54 template <
typename OutT>
55 std::vector<OutT> copyToHost(cudaStream_t stream)
const {
56 FAISS_ASSERT(num_ *
sizeof(T) %
sizeof(OutT) == 0);
58 std::vector<OutT> out((num_ *
sizeof(T)) /
sizeof(OutT));
59 CUDA_VERIFY(cudaMemcpyAsync(out.data(), data_, num_ *
sizeof(T),
60 cudaMemcpyDeviceToHost, stream));
68 bool append(
const T* d,
71 bool reserveExact =
false) {
75 size_t reserveSize = num_ + n;
77 reserveSize = getNewCapacity_(reserveSize);
80 mem = reserve(reserveSize, stream);
82 int dev = getDeviceForAddress(d);
84 CUDA_VERIFY(cudaMemcpyAsync(data_ + num_, d, n *
sizeof(T),
85 cudaMemcpyHostToDevice, stream));
87 CUDA_VERIFY(cudaMemcpyAsync(data_ + num_, d, n *
sizeof(T),
88 cudaMemcpyDeviceToDevice, stream));
97 bool resize(
size_t newSize, cudaStream_t stream) {
100 if (num_ < newSize) {
101 mem = reserve(getNewCapacity_(newSize), stream);
115 size_t reclaim(
bool exact, cudaStream_t stream) {
116 size_t free = capacity_ - num_;
119 realloc_(num_, stream);
120 return free *
sizeof(T);
127 if (free > (capacity_ / 4)) {
128 size_t newFree = capacity_ / 8;
129 size_t newCapacity = num_ + newFree;
131 size_t oldCapacity = capacity_;
132 FAISS_ASSERT(newCapacity < oldCapacity);
134 realloc_(newCapacity, stream);
136 return (oldCapacity - newCapacity) *
sizeof(T);
143 bool reserve(
size_t newCapacity, cudaStream_t stream) {
144 if (newCapacity <= capacity_) {
149 realloc_(newCapacity, stream);
154 void realloc_(
size_t newCapacity, cudaStream_t stream) {
155 FAISS_ASSERT(num_ <= newCapacity);
157 T* newData =
nullptr;
158 allocMemorySpace(space_, (
void**) &newData, newCapacity *
sizeof(T));
159 CUDA_VERIFY(cudaMemcpyAsync(newData, data_, num_ *
sizeof(T),
160 cudaMemcpyDeviceToDevice, stream));
162 CUDA_VERIFY(cudaFree(data_));
165 capacity_ = newCapacity;
168 size_t getNewCapacity_(
size_t preferredSize) {
169 return utils::nextHighestPowerOf2(preferredSize);