Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
StackDeviceMemory.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #include "StackDeviceMemory.h"
12 #include "DeviceUtils.h"
13 #include "StaticUtils.h"
14 #include "../../FaissAssert.h"
15 #include <stdio.h>
16 #include <sstream>
17 
18 namespace faiss { namespace gpu {
19 
21  : device_(d),
22  isOwner_(true),
23  start_(nullptr),
24  end_(nullptr),
25  size_(sz),
26  head_(nullptr),
27  mallocCurrent_(0),
28  highWaterMemoryUsed_(0),
29  highWaterMalloc_(0) {
31 
32  cudaError_t err = cudaMalloc(&start_, size_);
33  FAISS_ASSERT(err == cudaSuccess);
34 
35  head_ = start_;
36  end_ = start_ + size_;
37 }
38 
39 StackDeviceMemory::Stack::Stack(int d, void* p, size_t sz, bool isOwner)
40  : device_(d),
41  isOwner_(isOwner),
42  start_((char*) p),
43  end_(((char*) p) + sz),
44  size_(sz),
45  head_((char*) p) {
46 }
47 
48 StackDeviceMemory::Stack::~Stack() {
49  if (isOwner_) {
51 
52  cudaError_t err = cudaFree(start_);
53  FAISS_ASSERT(err == cudaSuccess);
54  }
55 }
56 
57 size_t
59  return (end_ - head_);
60 }
61 
62 char*
63 StackDeviceMemory::Stack::getAlloc(size_t size, cudaStream_t stream) {
64  if (size > (end_ - head_)) {
65  // Too large for our stack
67 
68  // Print our requested size before we attempt the allocation
69  fprintf(stderr, "WARN: increase temp memory to avoid cudaMalloc, "
70  "or decrease query/add size (alloc %zu B, highwater %zu B)\n",
71  size, highWaterMalloc_);
72 
73  char* p = nullptr;
74  auto err = cudaMalloc(&p, size);
75  FAISS_ASSERT_FMT(err == cudaSuccess,
76  "cudaMalloc error %d on alloc size %zu",
77  (int) err, size);
78 
79  mallocCurrent_ += size;
80  highWaterMalloc_ = std::max(highWaterMalloc_, mallocCurrent_);
81 
82  return p;
83  } else {
84  // We can make the allocation out of our stack
85  // Find all the ranges that we overlap that may have been
86  // previously allocated; our allocation will be [head, endAlloc)
87  char* startAlloc = head_;
88  char* endAlloc = head_ + size;
89 
90  while (lastUsers_.size() > 0) {
91  auto& prevUser = lastUsers_.back();
92 
93  // Because there is a previous user, we must overlap it
94  FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
95 
96  if (stream != prevUser.stream_) {
97  // Synchronization required
98  // FIXME
99  FAISS_ASSERT(false);
100  }
101 
102  if (endAlloc < prevUser.end_) {
103  // Update the previous user info
104  prevUser.start_ = endAlloc;
105 
106  break;
107  }
108 
109  // If we're the exact size of the previous request, then we
110  // don't need to continue
111  bool done = (prevUser.end_ == endAlloc);
112 
113  lastUsers_.pop_back();
114 
115  if (done) {
116  break;
117  }
118  }
119 
120  head_ = endAlloc;
121  FAISS_ASSERT(head_ <= end_);
122 
123  highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,
124  (size_t) (head_ - start_));
125  return startAlloc;
126  }
127 }
128 
129 void
131  size_t size,
132  cudaStream_t stream) {
133  if (p < start_ || p >= end_) {
134  // This is not on our stack; it was a one-off allocation
135  DeviceScope s(device_);
136 
137  auto err = cudaFree(p);
138  FAISS_ASSERT_FMT(err == cudaSuccess,
139  "cudaFree error %d (addr %p size %zu)",
140  (int) err, p, size);
141 
142  FAISS_ASSERT(mallocCurrent_ >= size);
143  mallocCurrent_ -= size;
144  } else {
145  // This is on our stack
146  // Allocations should be freed in the reverse order they are made
147  FAISS_ASSERT(p + size == head_);
148 
149  head_ = p;
150  lastUsers_.push_back(Range(p, p + size, stream));
151  }
152 }
153 
154 std::string
156  std::stringstream s;
157 
158  s << "SDM device " << device_ << ": Total memory " << size_ << " ["
159  << (void*) start_ << ", " << (void*) end_ << ")\n";
160  s << " Available memory " << (size_t) (end_ - head_)
161  << " [" << (void*) head_ << ", " << (void*) end_ << ")\n";
162  s << " High water temp alloc " << highWaterMemoryUsed_ << "\n";
163  s << " High water cudaMalloc " << highWaterMalloc_ << "\n";
164 
165  int i = lastUsers_.size();
166  for (auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
167  s << i-- << ": size " << (size_t) (it->end_ - it->start_)
168  << " stream " << it->stream_
169  << " [" << (void*) it->start_ << ", " << (void*) it->end_ << ")\n";
170  }
171 
172  return s.str();
173 }
174 
175 size_t
177  return highWaterMalloc_;
178 }
179 
180 StackDeviceMemory::StackDeviceMemory(int device, size_t allocPerDevice)
181  : device_(device),
182  stack_(device, allocPerDevice) {
183 }
184 
186  void* p, size_t size, bool isOwner)
187  : device_(device),
188  stack_(device, p, size, isOwner) {
189 }
190 
191 StackDeviceMemory::~StackDeviceMemory() {
192 }
193 
194 int
196  return device_;
197 }
198 
200 StackDeviceMemory::getMemory(cudaStream_t stream, size_t size) {
201  // We guarantee 16 byte alignment for allocations, so bump up `size`
202  // to the next highest multiple of 16
203  size = utils::roundUp(size, (size_t) 16);
204 
205  return DeviceMemoryReservation(this,
206  device_,
207  stack_.getAlloc(size, stream),
208  size,
209  stream);
210 }
211 
212 size_t
214  return stack_.getSizeAvailable();
215 }
216 
217 std::string
219  return stack_.toString();
220 }
221 
222 size_t
225 }
226 
227 void
228 StackDeviceMemory::returnAllocation(DeviceMemoryReservation& m) {
229  FAISS_ASSERT(m.get());
230  FAISS_ASSERT(device_ == m.device());
231 
232  stack_.returnAlloc((char*) m.get(), m.size(), m.stream());
233 }
234 
235 } } // namespace
DeviceMemoryReservation getMemory(cudaStream_t stream, size_t size) override
size_t getHighWaterCudaMalloc() const
Returns the high-water mark of cudaMalloc activity.
size_t size_
Total size end_ - start_.
Stack(int device, size_t size)
Constructor that allocates memory via cudaMalloc.
void returnAlloc(char *p, size_t size, cudaStream_t stream)
Returns an allocation.
char * head_
Stack head within [start, end)
size_t getSizeAvailable() const override
Returns the current size available without calling cudaMalloc.
int device_
Device this allocation is on.
std::string toString() const override
Returns a string containing our current memory manager state.
std::string toString() const
Returns the stack state.
size_t getHighWaterCudaMalloc() const override
char * getAlloc(size_t size, cudaStream_t stream)
int getDevice() const override
Returns the device we are managing memory for.
StackDeviceMemory(int device, size_t allocPerDevice)
Allocate a new region of memory that we manage.