11 #include "StackDeviceMemory.h"
12 #include "DeviceUtils.h"
13 #include "StaticUtils.h"
14 #include "../../FaissAssert.h"
18 namespace faiss {
namespace gpu {
28 highWaterMemoryUsed_(0),
33 FAISS_ASSERT(err == cudaSuccess);
43 end_(((char*) p) + sz),
48 StackDeviceMemory::Stack::~Stack() {
52 cudaError_t err = cudaFree(start_);
53 FAISS_ASSERT(err == cudaSuccess);
59 return (end_ - head_);
64 if (size > (end_ - head_)) {
69 fprintf(stderr,
"WARN: increase temp memory to avoid cudaMalloc, "
70 "or decrease query/add size (alloc %zu B, highwater %zu B)\n",
71 size, highWaterMalloc_);
74 auto err = cudaMalloc(&p, size);
75 FAISS_ASSERT_FMT(err == cudaSuccess,
76 "cudaMalloc error %d on alloc size %zu",
79 mallocCurrent_ += size;
80 highWaterMalloc_ = std::max(highWaterMalloc_, mallocCurrent_);
87 char* startAlloc = head_;
88 char* endAlloc = head_ + size;
90 while (lastUsers_.size() > 0) {
91 auto& prevUser = lastUsers_.back();
94 FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
96 if (stream != prevUser.stream_) {
102 if (endAlloc < prevUser.end_) {
104 prevUser.start_ = endAlloc;
111 bool done = (prevUser.end_ == endAlloc);
113 lastUsers_.pop_back();
121 FAISS_ASSERT(head_ <= end_);
123 highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,
124 (
size_t) (head_ - start_));
132 cudaStream_t stream) {
133 if (p < start_ || p >= end_) {
137 auto err = cudaFree(p);
138 FAISS_ASSERT_FMT(err == cudaSuccess,
139 "cudaFree error %d (addr %p size %zu)",
142 FAISS_ASSERT(mallocCurrent_ >= size);
143 mallocCurrent_ -= size;
147 FAISS_ASSERT(p + size == head_);
150 lastUsers_.push_back(
Range(p, p + size, stream));
158 s <<
"SDM device " <<
device_ <<
": Total memory " << size_ <<
" ["
159 << (
void*) start_ <<
", " << (
void*) end_ <<
")\n";
160 s <<
" Available memory " << (size_t) (end_ - head_)
161 <<
" [" << (
void*) head_ <<
", " << (
void*) end_ <<
")\n";
162 s <<
" High water temp alloc " << highWaterMemoryUsed_ <<
"\n";
163 s <<
" High water cudaMalloc " << highWaterMalloc_ <<
"\n";
165 int i = lastUsers_.size();
166 for (
auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
167 s << i-- <<
": size " << (size_t) (it->end_ - it->start_)
168 <<
" stream " << it->stream_
169 <<
" [" << (
void*) it->start_ <<
", " << (
void*) it->end_ <<
")\n";
177 return highWaterMalloc_;
182 stack_(device, allocPerDevice) {
186 void* p,
size_t size,
bool isOwner)
188 stack_(device, p, size, isOwner) {
191 StackDeviceMemory::~StackDeviceMemory() {
203 size = utils::roundUp(size, (
size_t) 16);
229 FAISS_ASSERT(m.get());
230 FAISS_ASSERT(
device_ == m.device());
DeviceMemoryReservation getMemory(cudaStream_t stream, size_t size) override
size_t getHighWaterCudaMalloc() const
Returns the high-water mark of cudaMalloc activity.
size_t size_
Total size end_ - start_.
Stack(int device, size_t size)
Constructor that allocates memory via cudaMalloc.
Stack stack_
Memory stack.
void returnAlloc(char *p, size_t size, cudaStream_t stream)
Returns an allocation.
char * head_
Stack head within [start, end)
size_t getSizeAvailable() const override
Returns the current size available without calling cudaMalloc.
size_t getSizeAvailable() const
int device_
Device this allocation is on.
std::string toString() const override
Returns a string containing our current memory manager state.
std::string toString() const
Returns the stack state.
size_t getHighWaterCudaMalloc() const override
char * getAlloc(size_t size, cudaStream_t stream)
int getDevice() const override
Returns the device we are managing memory for.
StackDeviceMemory(int device, size_t allocPerDevice)
Allocate a new region of memory that we manage.