Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
StackDeviceMemory.cpp
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 
12 #include "StackDeviceMemory.h"
13 #include "DeviceUtils.h"
14 #include "StaticUtils.h"
15 #include "../../FaissAssert.h"
16 #include <stdio.h>
17 #include <sstream>
18 
19 namespace faiss { namespace gpu {
20 
22  : device_(d),
23  isOwner_(true),
24  start_(nullptr),
25  end_(nullptr),
26  size_(sz),
27  head_(nullptr),
28  mallocCurrent_(0),
29  highWaterMemoryUsed_(0),
30  highWaterMalloc_(0) {
32 
33  cudaError_t err = cudaMalloc(&start_, size_);
34  FAISS_ASSERT(err == cudaSuccess);
35 
36  head_ = start_;
37  end_ = start_ + size_;
38 }
39 
40 StackDeviceMemory::Stack::Stack(int d, void* p, size_t sz, bool isOwner)
41  : device_(d),
42  isOwner_(isOwner),
43  start_((char*) p),
44  end_(((char*) p) + sz),
45  size_(sz),
46  head_((char*) p) {
47 }
48 
49 StackDeviceMemory::Stack::~Stack() {
50  if (isOwner_) {
52 
53  cudaError_t err = cudaFree(start_);
54  FAISS_ASSERT(err == cudaSuccess);
55  }
56 }
57 
58 size_t
60  return (end_ - head_);
61 }
62 
63 char*
64 StackDeviceMemory::Stack::getAlloc(size_t size, cudaStream_t stream) {
65  if (size > (end_ - head_)) {
66  // Too large for our stack
68 
69  char* p = nullptr;
70  auto err = cudaMalloc(&p, size);
71  FAISS_ASSERT(err == cudaSuccess);
72 
73  mallocCurrent_ += size;
74  highWaterMalloc_ = std::max(highWaterMalloc_, mallocCurrent_);
75 
76  fprintf(stderr, "WARN: increase temp memory to avoid cudaMalloc, "
77  "or decrease query/add size (alloc %zu B, highwater %zu B)\n",
78  size, highWaterMalloc_);
79 
80  return p;
81  } else {
82  // We can make the allocation out of our stack
83  // Find all the ranges that we overlap that may have been
84  // previously allocated; our allocation will be [head, endAlloc)
85  char* startAlloc = head_;
86  char* endAlloc = head_ + size;
87 
88  while (lastUsers_.size() > 0) {
89  auto& prevUser = lastUsers_.back();
90 
91  // Because there is a previous user, we must overlap it
92  FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
93 
94  if (stream != prevUser.stream_) {
95  // Synchronization required
96  // FIXME
97  FAISS_ASSERT(false);
98  }
99 
100  if (endAlloc < prevUser.end_) {
101  // Update the previous user info
102  prevUser.start_ = endAlloc;
103 
104  break;
105  }
106 
107  // If we're the exact size of the previous request, then we
108  // don't need to continue
109  bool done = (prevUser.end_ == endAlloc);
110 
111  lastUsers_.pop_back();
112 
113  if (done) {
114  break;
115  }
116  }
117 
118  head_ = endAlloc;
119  FAISS_ASSERT(head_ <= end_);
120 
121  highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,
122  (size_t) (head_ - start_));
123  return startAlloc;
124  }
125 }
126 
127 void
129  size_t size,
130  cudaStream_t stream) {
131  if (p < start_ || p >= end_) {
132  // This is not on our stack; it was a one-off allocation
133  DeviceScope s(device_);
134 
135  auto err = cudaFree(p);
136  FAISS_ASSERT(err == cudaSuccess);
137 
138  FAISS_ASSERT(mallocCurrent_ >= size);
139  mallocCurrent_ -= size;
140  } else {
141  // This is on our stack
142  // Allocations should be freed in the reverse order they are made
143  FAISS_ASSERT(p + size == head_);
144 
145  head_ = p;
146  lastUsers_.push_back(Range(p, p + size, stream));
147  }
148 }
149 
150 std::string
152  std::stringstream s;
153 
154  s << "SDM device " << device_ << ": Total memory " << size_ << " ["
155  << (void*) start_ << ", " << (void*) end_ << ")\n";
156  s << " Available memory " << (size_t) (end_ - head_)
157  << " [" << (void*) head_ << ", " << (void*) end_ << ")\n";
158  s << " High water temp alloc " << highWaterMemoryUsed_ << "\n";
159  s << " High water cudaMalloc " << highWaterMalloc_ << "\n";
160 
161  int i = lastUsers_.size();
162  for (auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
163  s << i-- << ": size " << (size_t) (it->end_ - it->start_)
164  << " stream " << it->stream_
165  << " [" << (void*) it->start_ << ", " << (void*) it->end_ << ")\n";
166  }
167 
168  return s.str();
169 }
170 
171 size_t
173  return highWaterMalloc_;
174 }
175 
176 StackDeviceMemory::StackDeviceMemory(int device, size_t allocPerDevice)
177  : device_(device),
178  stack_(device, allocPerDevice) {
179 }
180 
182  void* p, size_t size, bool isOwner)
183  : device_(device),
184  stack_(device, p, size, isOwner) {
185 }
186 
187 StackDeviceMemory::~StackDeviceMemory() {
188 }
189 
190 int
192  return device_;
193 }
194 
196 StackDeviceMemory::getMemory(cudaStream_t stream, size_t size) {
197  // We guarantee 16 byte alignment for allocations, so bump up `size`
198  // to the next highest multiple of 16
199  size = utils::roundUp(size, (size_t) 16);
200 
201  return DeviceMemoryReservation(this,
202  device_,
203  stack_.getAlloc(size, stream),
204  size,
205  stream);
206 }
207 
208 size_t
210  return stack_.getSizeAvailable();
211 }
212 
213 std::string
215  return stack_.toString();
216 }
217 
218 size_t
221 }
222 
223 void
224 StackDeviceMemory::returnAllocation(DeviceMemoryReservation& m) {
225  FAISS_ASSERT(m.get());
226  FAISS_ASSERT(device_ == m.device());
227 
228  stack_.returnAlloc((char*) m.get(), m.size(), m.stream());
229 }
230 
231 } } // namespace
DeviceMemoryReservation getMemory(cudaStream_t stream, size_t size) override
size_t getHighWaterCudaMalloc() const
Returns the high-water mark of cudaMalloc activity.
size_t size_
Total size end_ - start_.
Stack(int device, size_t size)
Constructor that allocates memory via cudaMalloc.
void returnAlloc(char *p, size_t size, cudaStream_t stream)
Returns an allocation.
char * head_
Stack head within [start, end)
size_t getSizeAvailable() const override
Returns the current size available without calling cudaMalloc.
int device_
Device this allocation is on.
std::string toString() const override
Returns a string containing our current memory manager state.
std::string toString() const
Returns the stack state.
size_t getHighWaterCudaMalloc() const override
char * getAlloc(size_t size, cudaStream_t stream)
int getDevice() const override
Returns the device we are managing memory for.
StackDeviceMemory(int device, size_t allocPerDevice)
Allocate a new region of memory that we manage.