Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
StackDeviceMemory.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 
10 #include "StackDeviceMemory.h"
11 #include "DeviceUtils.h"
12 #include "StaticUtils.h"
13 #include "../../FaissAssert.h"
14 #include <stdio.h>
15 #include <sstream>
16 
17 namespace faiss { namespace gpu {
18 
20  : device_(d),
21  isOwner_(true),
22  start_(nullptr),
23  end_(nullptr),
24  size_(sz),
25  head_(nullptr),
26  mallocCurrent_(0),
27  highWaterMemoryUsed_(0),
28  highWaterMalloc_(0),
29  cudaMallocWarning_(true) {
31 
32  cudaError_t err = cudaMalloc(&start_, size_);
33  FAISS_ASSERT(err == cudaSuccess);
34 
35  head_ = start_;
36  end_ = start_ + size_;
37 }
38 
39 StackDeviceMemory::Stack::Stack(int d, void* p, size_t sz, bool isOwner)
40  : device_(d),
41  isOwner_(isOwner),
42  start_((char*) p),
43  end_(((char*) p) + sz),
44  size_(sz),
45  head_((char*) p),
46  mallocCurrent_(0),
47  highWaterMemoryUsed_(0),
48  highWaterMalloc_(0),
49  cudaMallocWarning_(true) {
50 }
51 
52 StackDeviceMemory::Stack::~Stack() {
53  if (isOwner_) {
55 
56  cudaError_t err = cudaFree(start_);
57  FAISS_ASSERT(err == cudaSuccess);
58  }
59 }
60 
61 size_t
63  return (end_ - head_);
64 }
65 
66 char*
68  cudaStream_t stream) {
69  if (size > (end_ - head_)) {
70  // Too large for our stack
72 
73  if (cudaMallocWarning_) {
74  // Print our requested size before we attempt the allocation
75  fprintf(stderr, "WARN: increase temp memory to avoid cudaMalloc, "
76  "or decrease query/add size (alloc %zu B, highwater %zu B)\n",
77  size, highWaterMalloc_);
78  }
79 
80  char* p = nullptr;
81  auto err = cudaMalloc(&p, size);
82  FAISS_ASSERT_FMT(err == cudaSuccess,
83  "cudaMalloc error %d on alloc size %zu",
84  (int) err, size);
85 
86  mallocCurrent_ += size;
87  highWaterMalloc_ = std::max(highWaterMalloc_, mallocCurrent_);
88 
89  return p;
90  } else {
91  // We can make the allocation out of our stack
92  // Find all the ranges that we overlap that may have been
93  // previously allocated; our allocation will be [head, endAlloc)
94  char* startAlloc = head_;
95  char* endAlloc = head_ + size;
96 
97  while (lastUsers_.size() > 0) {
98  auto& prevUser = lastUsers_.back();
99 
100  // Because there is a previous user, we must overlap it
101  FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
102 
103  if (stream != prevUser.stream_) {
104  // Synchronization required
105  // FIXME
106  FAISS_ASSERT(false);
107  }
108 
109  if (endAlloc < prevUser.end_) {
110  // Update the previous user info
111  prevUser.start_ = endAlloc;
112 
113  break;
114  }
115 
116  // If we're the exact size of the previous request, then we
117  // don't need to continue
118  bool done = (prevUser.end_ == endAlloc);
119 
120  lastUsers_.pop_back();
121 
122  if (done) {
123  break;
124  }
125  }
126 
127  head_ = endAlloc;
128  FAISS_ASSERT(head_ <= end_);
129 
130  highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,
131  (size_t) (head_ - start_));
132  return startAlloc;
133  }
134 }
135 
136 void
138  size_t size,
139  cudaStream_t stream) {
140  if (p < start_ || p >= end_) {
141  // This is not on our stack; it was a one-off allocation
142  DeviceScope s(device_);
143 
144  auto err = cudaFree(p);
145  FAISS_ASSERT_FMT(err == cudaSuccess,
146  "cudaFree error %d (addr %p size %zu)",
147  (int) err, p, size);
148 
149  FAISS_ASSERT(mallocCurrent_ >= size);
150  mallocCurrent_ -= size;
151  } else {
152  // This is on our stack
153  // Allocations should be freed in the reverse order they are made
154  FAISS_ASSERT(p + size == head_);
155 
156  head_ = p;
157  lastUsers_.push_back(Range(p, p + size, stream));
158  }
159 }
160 
161 std::string
163  std::stringstream s;
164 
165  s << "SDM device " << device_ << ": Total memory " << size_ << " ["
166  << (void*) start_ << ", " << (void*) end_ << ")\n";
167  s << " Available memory " << (size_t) (end_ - head_)
168  << " [" << (void*) head_ << ", " << (void*) end_ << ")\n";
169  s << " High water temp alloc " << highWaterMemoryUsed_ << "\n";
170  s << " High water cudaMalloc " << highWaterMalloc_ << "\n";
171 
172  int i = lastUsers_.size();
173  for (auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
174  s << i-- << ": size " << (size_t) (it->end_ - it->start_)
175  << " stream " << it->stream_
176  << " [" << (void*) it->start_ << ", " << (void*) it->end_ << ")\n";
177  }
178 
179  return s.str();
180 }
181 
182 size_t
184  return highWaterMalloc_;
185 }
186 
187 StackDeviceMemory::StackDeviceMemory(int device, size_t allocPerDevice)
188  : device_(device),
189  stack_(device, allocPerDevice) {
190 }
191 
193  void* p, size_t size, bool isOwner)
194  : device_(device),
195  stack_(device, p, size, isOwner) {
196 }
197 
198 StackDeviceMemory::~StackDeviceMemory() {
199 }
200 
201 void
204 }
205 
206 int
208  return device_;
209 }
210 
212 StackDeviceMemory::getMemory(cudaStream_t stream, size_t size) {
213  // We guarantee 16 byte alignment for allocations, so bump up `size`
214  // to the next highest multiple of 16
215  size = utils::roundUp(size, (size_t) 16);
216 
217  return DeviceMemoryReservation(this,
218  device_,
219  stack_.getAlloc(size, stream),
220  size,
221  stream);
222 }
223 
224 size_t
226  return stack_.getSizeAvailable();
227 }
228 
229 std::string
231  return stack_.toString();
232 }
233 
234 size_t
237 }
238 
239 void
240 StackDeviceMemory::returnAllocation(DeviceMemoryReservation& m) {
241  FAISS_ASSERT(m.get());
242  FAISS_ASSERT(device_ == m.device());
243 
244  stack_.returnAlloc((char*) m.get(), m.size(), m.stream());
245 }
246 
247 } } // namespace
DeviceMemoryReservation getMemory(cudaStream_t stream, size_t size) override
size_t getHighWaterCudaMalloc() const
Returns the high-water mark of cudaMalloc activity.
bool cudaMallocWarning_
Whether or not a warning upon cudaMalloc is generated.
size_t size_
Total size end_ - start_.
Stack(int device, size_t size)
Constructor that allocates memory via cudaMalloc.
void returnAlloc(char *p, size_t size, cudaStream_t stream)
Returns an allocation.
char * head_
Stack head within [start, end)
size_t getSizeAvailable() const override
Returns the current size available without calling cudaMalloc.
int device_
Device this allocation is on.
std::string toString() const override
Returns a string containing our current memory manager state.
std::string toString() const
Returns the stack state.
size_t getHighWaterCudaMalloc() const override
char * getAlloc(size_t size, cudaStream_t stream)
int getDevice() const override
Returns the device we are managing memory for.
StackDeviceMemory(int device, size_t allocPerDevice)
Allocate a new region of memory that we manage.