10 #include "StackDeviceMemory.h"
11 #include "DeviceUtils.h"
12 #include "StaticUtils.h"
13 #include "../../FaissAssert.h"
17 namespace faiss {
namespace gpu {
27 highWaterMemoryUsed_(0),
29 cudaMallocWarning_(true) {
33 FAISS_ASSERT(err == cudaSuccess);
43 end_(((char*) p) + sz),
47 highWaterMemoryUsed_(0),
49 cudaMallocWarning_(true) {
52 StackDeviceMemory::Stack::~Stack() {
56 cudaError_t err = cudaFree(start_);
57 FAISS_ASSERT(err == cudaSuccess);
63 return (end_ - head_);
68 cudaStream_t stream) {
69 if (size > (end_ - head_)) {
73 if (cudaMallocWarning_) {
75 fprintf(stderr,
"WARN: increase temp memory to avoid cudaMalloc, "
76 "or decrease query/add size (alloc %zu B, highwater %zu B)\n",
77 size, highWaterMalloc_);
81 auto err = cudaMalloc(&p, size);
82 FAISS_ASSERT_FMT(err == cudaSuccess,
83 "cudaMalloc error %d on alloc size %zu",
86 mallocCurrent_ += size;
87 highWaterMalloc_ = std::max(highWaterMalloc_, mallocCurrent_);
94 char* startAlloc = head_;
95 char* endAlloc = head_ + size;
97 while (lastUsers_.size() > 0) {
98 auto& prevUser = lastUsers_.back();
101 FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
103 if (stream != prevUser.stream_) {
109 if (endAlloc < prevUser.end_) {
111 prevUser.start_ = endAlloc;
118 bool done = (prevUser.end_ == endAlloc);
120 lastUsers_.pop_back();
128 FAISS_ASSERT(head_ <= end_);
130 highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,
131 (
size_t) (head_ - start_));
139 cudaStream_t stream) {
140 if (p < start_ || p >= end_) {
144 auto err = cudaFree(p);
145 FAISS_ASSERT_FMT(err == cudaSuccess,
146 "cudaFree error %d (addr %p size %zu)",
149 FAISS_ASSERT(mallocCurrent_ >= size);
150 mallocCurrent_ -= size;
154 FAISS_ASSERT(p + size == head_);
157 lastUsers_.push_back(
Range(p, p + size, stream));
165 s <<
"SDM device " <<
device_ <<
": Total memory " << size_ <<
" ["
166 << (
void*) start_ <<
", " << (
void*) end_ <<
")\n";
167 s <<
" Available memory " << (size_t) (end_ - head_)
168 <<
" [" << (
void*) head_ <<
", " << (
void*) end_ <<
")\n";
169 s <<
" High water temp alloc " << highWaterMemoryUsed_ <<
"\n";
170 s <<
" High water cudaMalloc " << highWaterMalloc_ <<
"\n";
172 int i = lastUsers_.size();
173 for (
auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
174 s << i-- <<
": size " << (size_t) (it->end_ - it->start_)
175 <<
" stream " << it->stream_
176 <<
" [" << (
void*) it->start_ <<
", " << (
void*) it->end_ <<
")\n";
184 return highWaterMalloc_;
189 stack_(device, allocPerDevice) {
193 void* p,
size_t size,
bool isOwner)
195 stack_(device, p, size, isOwner) {
198 StackDeviceMemory::~StackDeviceMemory() {
215 size = utils::roundUp(size, (
size_t) 16);
241 FAISS_ASSERT(m.get());
242 FAISS_ASSERT(
device_ == m.device());
DeviceMemoryReservation getMemory(cudaStream_t stream, size_t size) override
size_t getHighWaterCudaMalloc() const
Returns the high-water mark of cudaMalloc activity.
bool cudaMallocWarning_
Whether or not a warning upon cudaMalloc is generated.
size_t size_
Total size end_ - start_.
Stack(int device, size_t size)
Constructor that allocates memory via cudaMalloc.
void setCudaMallocWarning(bool b)
Stack stack_
Memory stack.
void returnAlloc(char *p, size_t size, cudaStream_t stream)
Returns an allocation.
char * head_
Stack head within [start, end)
size_t getSizeAvailable() const override
Returns the current size available without calling cudaMalloc.
size_t getSizeAvailable() const
int device_
Device this allocation is on.
std::string toString() const override
Returns a string containing our current memory manager state.
std::string toString() const
Returns the stack state.
size_t getHighWaterCudaMalloc() const override
char * getAlloc(size_t size, cudaStream_t stream)
int getDevice() const override
Returns the device we are managing memory for.
StackDeviceMemory(int device, size_t allocPerDevice)
Allocate a new region of memory that we manage.