Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
StackDeviceMemory.cpp
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 
9 #include "StackDeviceMemory.h"
10 #include "DeviceUtils.h"
11 #include "MemorySpace.h"
12 #include "StaticUtils.h"
13 #include "../../FaissAssert.h"
14 #include <stdio.h>
15 #include <sstream>
16 
17 namespace faiss { namespace gpu {
18 
20  : device_(d),
21  isOwner_(true),
22  start_(nullptr),
23  end_(nullptr),
24  size_(sz),
25  head_(nullptr),
26  mallocCurrent_(0),
27  highWaterMemoryUsed_(0),
28  highWaterMalloc_(0),
29  cudaMallocWarning_(true) {
31 
32  allocMemorySpace(MemorySpace::Device, &start_, size_);
33 
34  head_ = start_;
35  end_ = start_ + size_;
36 }
37 
38 StackDeviceMemory::Stack::Stack(int d, void* p, size_t sz, bool isOwner)
39  : device_(d),
40  isOwner_(isOwner),
41  start_((char*) p),
42  end_(((char*) p) + sz),
43  size_(sz),
44  head_((char*) p),
45  mallocCurrent_(0),
46  highWaterMemoryUsed_(0),
47  highWaterMalloc_(0),
48  cudaMallocWarning_(true) {
49 }
50 
51 StackDeviceMemory::Stack::~Stack() {
52  if (isOwner_) {
54 
55  freeMemorySpace(MemorySpace::Device, start_);
56  }
57 }
58 
59 size_t
61  return (end_ - head_);
62 }
63 
64 char*
66  cudaStream_t stream) {
67  if (size > (end_ - head_)) {
68  // Too large for our stack
70 
71  if (cudaMallocWarning_) {
72  // Print our requested size before we attempt the allocation
73  fprintf(stderr, "WARN: increase temp memory to avoid cudaMalloc, "
74  "or decrease query/add size (alloc %zu B, highwater %zu B)\n",
75  size, highWaterMalloc_);
76  }
77 
78  char* p = nullptr;
79  allocMemorySpace(MemorySpace::Device, &p, size);
80 
81  mallocCurrent_ += size;
82  highWaterMalloc_ = std::max(highWaterMalloc_, mallocCurrent_);
83 
84  return p;
85  } else {
86  // We can make the allocation out of our stack
87  // Find all the ranges that we overlap that may have been
88  // previously allocated; our allocation will be [head, endAlloc)
89  char* startAlloc = head_;
90  char* endAlloc = head_ + size;
91 
92  while (lastUsers_.size() > 0) {
93  auto& prevUser = lastUsers_.back();
94 
95  // Because there is a previous user, we must overlap it
96  FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
97 
98  if (stream != prevUser.stream_) {
99  // Synchronization required
100  // FIXME
101  FAISS_ASSERT(false);
102  }
103 
104  if (endAlloc < prevUser.end_) {
105  // Update the previous user info
106  prevUser.start_ = endAlloc;
107 
108  break;
109  }
110 
111  // If we're the exact size of the previous request, then we
112  // don't need to continue
113  bool done = (prevUser.end_ == endAlloc);
114 
115  lastUsers_.pop_back();
116 
117  if (done) {
118  break;
119  }
120  }
121 
122  head_ = endAlloc;
123  FAISS_ASSERT(head_ <= end_);
124 
125  highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,
126  (size_t) (head_ - start_));
127  return startAlloc;
128  }
129 }
130 
131 void
133  size_t size,
134  cudaStream_t stream) {
135  if (p < start_ || p >= end_) {
136  // This is not on our stack; it was a one-off allocation
137  DeviceScope s(device_);
138 
139  freeMemorySpace(MemorySpace::Device, p);
140 
141  FAISS_ASSERT(mallocCurrent_ >= size);
142  mallocCurrent_ -= size;
143  } else {
144  // This is on our stack
145  // Allocations should be freed in the reverse order they are made
146  FAISS_ASSERT(p + size == head_);
147 
148  head_ = p;
149  lastUsers_.push_back(Range(p, p + size, stream));
150  }
151 }
152 
153 std::string
155  std::stringstream s;
156 
157  s << "SDM device " << device_ << ": Total memory " << size_ << " ["
158  << (void*) start_ << ", " << (void*) end_ << ")\n";
159  s << " Available memory " << (size_t) (end_ - head_)
160  << " [" << (void*) head_ << ", " << (void*) end_ << ")\n";
161  s << " High water temp alloc " << highWaterMemoryUsed_ << "\n";
162  s << " High water cudaMalloc " << highWaterMalloc_ << "\n";
163 
164  int i = lastUsers_.size();
165  for (auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
166  s << i-- << ": size " << (size_t) (it->end_ - it->start_)
167  << " stream " << it->stream_
168  << " [" << (void*) it->start_ << ", " << (void*) it->end_ << ")\n";
169  }
170 
171  return s.str();
172 }
173 
174 size_t
176  return highWaterMalloc_;
177 }
178 
179 StackDeviceMemory::StackDeviceMemory(int device, size_t allocPerDevice)
180  : device_(device),
181  stack_(device, allocPerDevice) {
182 }
183 
185  void* p, size_t size, bool isOwner)
186  : device_(device),
187  stack_(device, p, size, isOwner) {
188 }
189 
190 StackDeviceMemory::~StackDeviceMemory() {
191 }
192 
193 void
196 }
197 
198 int
200  return device_;
201 }
202 
204 StackDeviceMemory::getMemory(cudaStream_t stream, size_t size) {
205  // We guarantee 16 byte alignment for allocations, so bump up `size`
206  // to the next highest multiple of 16
207  size = utils::roundUp(size, (size_t) 16);
208 
209  return DeviceMemoryReservation(this,
210  device_,
211  stack_.getAlloc(size, stream),
212  size,
213  stream);
214 }
215 
216 size_t
218  return stack_.getSizeAvailable();
219 }
220 
221 std::string
223  return stack_.toString();
224 }
225 
226 size_t
229 }
230 
231 void
232 StackDeviceMemory::returnAllocation(DeviceMemoryReservation& m) {
233  FAISS_ASSERT(m.get());
234  FAISS_ASSERT(device_ == m.device());
235 
236  stack_.returnAlloc((char*) m.get(), m.size(), m.stream());
237 }
238 
239 } } // namespace
DeviceMemoryReservation getMemory(cudaStream_t stream, size_t size) override
size_t getHighWaterCudaMalloc() const
Returns the high-water mark of cudaMalloc activity.
bool cudaMallocWarning_
Whether or not a warning upon cudaMalloc is generated.
size_t size_
Total size end_ - start_.
Stack(int device, size_t size)
Constructor that allocates memory via cudaMalloc.
void returnAlloc(char *p, size_t size, cudaStream_t stream)
Returns an allocation.
char * head_
Stack head within [start, end)
size_t getSizeAvailable() const override
Returns the current size available without calling cudaMalloc.
int device_
Device this allocation is on.
std::string toString() const override
Returns a string containing our current memory manager state.
std::string toString() const
Returns the stack state.
size_t getHighWaterCudaMalloc() const override
char * getAlloc(size_t size, cudaStream_t stream)
int getDevice() const override
Returns the device we are managing memory for.
StackDeviceMemory(int device, size_t allocPerDevice)
Allocate a new region of memory that we manage.