14 #include "../../FaissAssert.h"
15 #include "DeviceUtils.h"
16 #include "StaticUtils.h"
21 namespace faiss {
namespace gpu {
42 CUDA_VERIFY(cudaFree(data_));
48 size_t size()
const {
return num_; }
49 size_t capacity()
const {
return capacity_; }
50 T* data() {
return data_; }
51 const T* data()
const {
return data_; }
53 template <
typename OutT>
54 std::vector<OutT> copyToHost(cudaStream_t stream)
const {
55 FAISS_ASSERT(num_ *
sizeof(T) %
sizeof(OutT) == 0);
57 std::vector<OutT> out((num_ *
sizeof(T)) /
sizeof(OutT));
58 CUDA_VERIFY(cudaMemcpyAsync(out.data(), data_, num_ *
sizeof(T),
59 cudaMemcpyDeviceToHost, stream));
67 bool append(
const T* d,
70 bool reserveExact =
false) {
74 size_t reserveSize = num_ + n;
76 reserveSize = getNewCapacity_(reserveSize);
79 mem = reserve(reserveSize, stream);
81 int dev = getDeviceForAddress(d);
83 CUDA_VERIFY(cudaMemcpyAsync(data_ + num_, d, n *
sizeof(T),
84 cudaMemcpyHostToDevice, stream));
86 CUDA_VERIFY(cudaMemcpyAsync(data_ + num_, d, n *
sizeof(T),
87 cudaMemcpyDeviceToDevice, stream));
96 bool resize(
size_t newSize, cudaStream_t stream) {
100 mem = reserve(getNewCapacity_(newSize), stream);
114 size_t reclaim(
bool exact, cudaStream_t stream) {
115 size_t free = capacity_ - num_;
118 realloc_(num_, stream);
119 return free *
sizeof(T);
126 if (free > (capacity_ / 4)) {
127 size_t newFree = capacity_ / 8;
128 size_t newCapacity = num_ + newFree;
130 size_t oldCapacity = capacity_;
131 FAISS_ASSERT(newCapacity < oldCapacity);
133 realloc_(newCapacity, stream);
135 return (oldCapacity - newCapacity) *
sizeof(T);
142 bool reserve(
size_t newCapacity, cudaStream_t stream) {
143 if (newCapacity <= capacity_) {
148 realloc_(newCapacity, stream);
153 void realloc_(
size_t newCapacity, cudaStream_t stream) {
154 FAISS_ASSERT(num_ <= newCapacity);
156 T* newData =
nullptr;
157 CUDA_VERIFY(cudaMalloc(&newData, newCapacity *
sizeof(T)));
158 CUDA_VERIFY(cudaMemcpyAsync(newData, data_, num_ *
sizeof(T),
159 cudaMemcpyDeviceToDevice, stream));
161 CUDA_VERIFY(cudaFree(data_));
164 capacity_ = newCapacity;
167 size_t getNewCapacity_(
size_t preferredSize) {
168 return utils::nextHighestPowerOf2(preferredSize);