Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
DeviceVector.cuh
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 
9 #pragma once
10 
11 #include "../../FaissAssert.h"
12 #include "DeviceUtils.h"
13 #include "MemorySpace.h"
14 #include "StaticUtils.h"
15 #include <algorithm>
16 #include <cuda.h>
17 #include <vector>
18 
19 namespace faiss { namespace gpu {
20 
21 /// A simple version of thrust::device_vector<T>, but has more control
22 /// over whether resize() initializes new space with T() (which we
23 /// don't want), and control on how much the reserved space grows by
24 /// upon resize/reserve. It is also meant for POD types only.
25 template <typename T>
26 class DeviceVector {
27  public:
28  DeviceVector(MemorySpace space = MemorySpace::Device)
29  : data_(nullptr),
30  num_(0),
31  capacity_(0),
32  space_(space) {
33  }
34 
35  ~DeviceVector() {
36  clear();
37  }
38 
39  // Clear all allocated memory; reset to zero size
40  void clear() {
41  freeMemorySpace(space_, data_);
42  data_ = nullptr;
43  num_ = 0;
44  capacity_ = 0;
45  }
46 
47  size_t size() const { return num_; }
48  size_t capacity() const { return capacity_; }
49  T* data() { return data_; }
50  const T* data() const { return data_; }
51 
52  template <typename OutT>
53  std::vector<OutT> copyToHost(cudaStream_t stream) const {
54  FAISS_ASSERT(num_ * sizeof(T) % sizeof(OutT) == 0);
55 
56  std::vector<OutT> out((num_ * sizeof(T)) / sizeof(OutT));
57  CUDA_VERIFY(cudaMemcpyAsync(out.data(), data_, num_ * sizeof(T),
58  cudaMemcpyDeviceToHost, stream));
59 
60  return out;
61  }
62 
63  // Returns true if we actually reallocated memory
64  // If `reserveExact` is true, then we reserve only the memory that
65  // we need for what we're appending
66  bool append(const T* d,
67  size_t n,
68  cudaStream_t stream,
69  bool reserveExact = false) {
70  bool mem = false;
71 
72  if (n > 0) {
73  size_t reserveSize = num_ + n;
74  if (!reserveExact) {
75  reserveSize = getNewCapacity_(reserveSize);
76  }
77 
78  mem = reserve(reserveSize, stream);
79 
80  int dev = getDeviceForAddress(d);
81  if (dev == -1) {
82  CUDA_VERIFY(cudaMemcpyAsync(data_ + num_, d, n * sizeof(T),
83  cudaMemcpyHostToDevice, stream));
84  } else {
85  CUDA_VERIFY(cudaMemcpyAsync(data_ + num_, d, n * sizeof(T),
86  cudaMemcpyDeviceToDevice, stream));
87  }
88  num_ += n;
89  }
90 
91  return mem;
92  }
93 
94  // Returns true if we actually reallocated memory
95  bool resize(size_t newSize, cudaStream_t stream) {
96  bool mem = false;
97 
98  if (num_ < newSize) {
99  mem = reserve(getNewCapacity_(newSize), stream);
100  }
101 
102  // Don't bother zero initializing the newly accessible memory
103  // (unlike thrust::device_vector)
104  num_ = newSize;
105 
106  return mem;
107  }
108 
109  // Clean up after oversized allocations, while leaving some space to
110  // remain for subsequent allocations (if `exact` false) or to
111  // exactly the space we need (if `exact` true); returns space
112  // reclaimed in bytes
113  size_t reclaim(bool exact, cudaStream_t stream) {
114  size_t free = capacity_ - num_;
115 
116  if (exact) {
117  realloc_(num_, stream);
118  return free * sizeof(T);
119  }
120 
121  // If more than 1/4th of the space is free, then we want to
122  // truncate to only having 1/8th of the space free; this still
123  // preserves some space for new elements, but won't force us to
124  // double our size right away
125  if (free > (capacity_ / 4)) {
126  size_t newFree = capacity_ / 8;
127  size_t newCapacity = num_ + newFree;
128 
129  size_t oldCapacity = capacity_;
130  FAISS_ASSERT(newCapacity < oldCapacity);
131 
132  realloc_(newCapacity, stream);
133 
134  return (oldCapacity - newCapacity) * sizeof(T);
135  }
136 
137  return 0;
138  }
139 
140  // Returns true if we actually reallocated memory
141  bool reserve(size_t newCapacity, cudaStream_t stream) {
142  if (newCapacity <= capacity_) {
143  return false;
144  }
145 
146  // Otherwise, we need new space.
147  realloc_(newCapacity, stream);
148  return true;
149  }
150 
151  private:
152  void realloc_(size_t newCapacity, cudaStream_t stream) {
153  FAISS_ASSERT(num_ <= newCapacity);
154 
155  T* newData = nullptr;
156  allocMemorySpace(space_, &newData, newCapacity * sizeof(T));
157  CUDA_VERIFY(cudaMemcpyAsync(newData, data_, num_ * sizeof(T),
158  cudaMemcpyDeviceToDevice, stream));
159  freeMemorySpace(space_, data_);
160 
161  data_ = newData;
162  capacity_ = newCapacity;
163  }
164 
165  size_t getNewCapacity_(size_t preferredSize) {
166  return utils::nextHighestPowerOf2(preferredSize);
167  }
168 
169  T* data_;
170  size_t num_;
171  size_t capacity_;
172  MemorySpace space_;
173 };
174 
175 } } // namespace