12 #include "StackDeviceMemory.h"
13 #include "DeviceUtils.h"
14 #include "StaticUtils.h"
15 #include "../../FaissAssert.h"
19 namespace faiss {
namespace gpu {
29 highWaterMemoryUsed_(0),
34 FAISS_ASSERT(err == cudaSuccess);
44 end_(((char*) p) + sz),
49 StackDeviceMemory::Stack::~Stack() {
53 cudaError_t err = cudaFree(start_);
54 FAISS_ASSERT(err == cudaSuccess);
60 return (end_ - head_);
65 if (size > (end_ - head_)) {
70 auto err = cudaMalloc(&p, size);
71 FAISS_ASSERT(err == cudaSuccess);
73 mallocCurrent_ += size;
74 highWaterMalloc_ = std::max(highWaterMalloc_, mallocCurrent_);
76 fprintf(stderr,
"WARN: increase temp memory to avoid cudaMalloc, "
77 "or decrease query/add size (alloc %zu B, highwater %zu B)\n",
78 size, highWaterMalloc_);
85 char* startAlloc = head_;
86 char* endAlloc = head_ + size;
88 while (lastUsers_.size() > 0) {
89 auto& prevUser = lastUsers_.back();
92 FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
94 if (stream != prevUser.stream_) {
100 if (endAlloc < prevUser.end_) {
102 prevUser.start_ = endAlloc;
109 bool done = (prevUser.end_ == endAlloc);
111 lastUsers_.pop_back();
119 FAISS_ASSERT(head_ <= end_);
121 highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,
122 (
size_t) (head_ - start_));
130 cudaStream_t stream) {
131 if (p < start_ || p >= end_) {
135 auto err = cudaFree(p);
136 FAISS_ASSERT(err == cudaSuccess);
138 FAISS_ASSERT(mallocCurrent_ >= size);
139 mallocCurrent_ -= size;
143 FAISS_ASSERT(p + size == head_);
146 lastUsers_.push_back(
Range(p, p + size, stream));
154 s <<
"SDM device " <<
device_ <<
": Total memory " << size_ <<
" ["
155 << (
void*) start_ <<
", " << (
void*) end_ <<
")\n";
156 s <<
" Available memory " << (size_t) (end_ - head_)
157 <<
" [" << (
void*) head_ <<
", " << (
void*) end_ <<
")\n";
158 s <<
" High water temp alloc " << highWaterMemoryUsed_ <<
"\n";
159 s <<
" High water cudaMalloc " << highWaterMalloc_ <<
"\n";
161 int i = lastUsers_.size();
162 for (
auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
163 s << i-- <<
": size " << (size_t) (it->end_ - it->start_)
164 <<
" stream " << it->stream_
165 <<
" [" << (
void*) it->start_ <<
", " << (
void*) it->end_ <<
")\n";
173 return highWaterMalloc_;
178 stack_(device, allocPerDevice) {
182 void* p,
size_t size,
bool isOwner)
184 stack_(device, p, size, isOwner) {
187 StackDeviceMemory::~StackDeviceMemory() {
199 size = utils::roundUp(size, (
size_t) 16);
225 FAISS_ASSERT(m.get());
226 FAISS_ASSERT(
device_ == m.device());
DeviceMemoryReservation getMemory(cudaStream_t stream, size_t size) override
size_t getHighWaterCudaMalloc() const
Returns the high-water mark of cudaMalloc activity.
size_t size_
Total size end_ - start_.
Stack(int device, size_t size)
Constructor that allocates memory via cudaMalloc.
Stack stack_
Memory stack.
void returnAlloc(char *p, size_t size, cudaStream_t stream)
Returns an allocation.
char * head_
Stack head within [start, end)
size_t getSizeAvailable() const override
Returns the current size available without calling cudaMalloc.
size_t getSizeAvailable() const
int device_
Device this allocation is on.
std::string toString() const override
Returns a string containing our current memory manager state.
std::string toString() const
Returns the stack state.
size_t getHighWaterCudaMalloc() const override
char * getAlloc(size_t size, cudaStream_t stream)
int getDevice() const override
Returns the device we are managing memory for.
StackDeviceMemory(int device, size_t allocPerDevice)
Allocate a new region of memory that we manage.