9 #include "StackDeviceMemory.h"
10 #include "DeviceUtils.h"
11 #include "MemorySpace.h"
12 #include "StaticUtils.h"
13 #include "../../FaissAssert.h"
17 namespace faiss {
namespace gpu {
27 highWaterMemoryUsed_(0),
29 cudaMallocWarning_(true) {
32 allocMemorySpace(MemorySpace::Device, &
start_,
size_);
42 end_(((char*) p) + sz),
46 highWaterMemoryUsed_(0),
48 cudaMallocWarning_(true) {
51 StackDeviceMemory::Stack::~Stack() {
55 freeMemorySpace(MemorySpace::Device, start_);
61 return (end_ - head_);
66 cudaStream_t stream) {
67 if (size > (end_ - head_)) {
71 if (cudaMallocWarning_) {
73 fprintf(stderr,
"WARN: increase temp memory to avoid cudaMalloc, "
74 "or decrease query/add size (alloc %zu B, highwater %zu B)\n",
75 size, highWaterMalloc_);
79 allocMemorySpace(MemorySpace::Device, &p, size);
81 mallocCurrent_ += size;
82 highWaterMalloc_ = std::max(highWaterMalloc_, mallocCurrent_);
89 char* startAlloc = head_;
90 char* endAlloc = head_ + size;
92 while (lastUsers_.size() > 0) {
93 auto& prevUser = lastUsers_.back();
96 FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
98 if (stream != prevUser.stream_) {
104 if (endAlloc < prevUser.end_) {
106 prevUser.start_ = endAlloc;
113 bool done = (prevUser.end_ == endAlloc);
115 lastUsers_.pop_back();
123 FAISS_ASSERT(head_ <= end_);
125 highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,
126 (
size_t) (head_ - start_));
134 cudaStream_t stream) {
135 if (p < start_ || p >= end_) {
139 freeMemorySpace(MemorySpace::Device, p);
141 FAISS_ASSERT(mallocCurrent_ >= size);
142 mallocCurrent_ -= size;
146 FAISS_ASSERT(p + size == head_);
149 lastUsers_.push_back(
Range(p, p + size, stream));
157 s <<
"SDM device " <<
device_ <<
": Total memory " << size_ <<
" ["
158 << (
void*) start_ <<
", " << (
void*) end_ <<
")\n";
159 s <<
" Available memory " << (size_t) (end_ - head_)
160 <<
" [" << (
void*) head_ <<
", " << (
void*) end_ <<
")\n";
161 s <<
" High water temp alloc " << highWaterMemoryUsed_ <<
"\n";
162 s <<
" High water cudaMalloc " << highWaterMalloc_ <<
"\n";
164 int i = lastUsers_.size();
165 for (
auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
166 s << i-- <<
": size " << (size_t) (it->end_ - it->start_)
167 <<
" stream " << it->stream_
168 <<
" [" << (
void*) it->start_ <<
", " << (
void*) it->end_ <<
")\n";
176 return highWaterMalloc_;
181 stack_(device, allocPerDevice) {
185 void* p,
size_t size,
bool isOwner)
187 stack_(device, p, size, isOwner) {
190 StackDeviceMemory::~StackDeviceMemory() {
207 size = utils::roundUp(size, (
size_t) 16);
233 FAISS_ASSERT(m.get());
234 FAISS_ASSERT(
device_ == m.device());
DeviceMemoryReservation getMemory(cudaStream_t stream, size_t size) override
size_t getHighWaterCudaMalloc() const
Returns the high-water mark of cudaMalloc activity.
bool cudaMallocWarning_
Whether or not a warning upon cudaMalloc is generated.
size_t size_
Total size end_ - start_.
Stack(int device, size_t size)
Constructor that allocates memory via cudaMalloc.
void setCudaMallocWarning(bool b)
Stack stack_
Memory stack.
void returnAlloc(char *p, size_t size, cudaStream_t stream)
Returns an allocation.
char * head_
Stack head within [start, end)
size_t getSizeAvailable() const override
Returns the current size available without calling cudaMalloc.
size_t getSizeAvailable() const
int device_
Device this allocation is on.
std::string toString() const override
Returns a string containing our current memory manager state.
std::string toString() const
Returns the stack state.
size_t getHighWaterCudaMalloc() const override
char * getAlloc(size_t size, cudaStream_t stream)
int getDevice() const override
Returns the device we are managing memory for.
StackDeviceMemory(int device, size_t allocPerDevice)
Allocate a new region of memory that we manage.