Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
GpuIndexBinaryFlat.cu
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #include "GpuIndexBinaryFlat.h"
10 
11 #include "GpuResources.h"
12 #include "impl/BinaryFlatIndex.cuh"
13 #include "utils/ConversionOperators.cuh"
14 #include "utils/CopyUtils.cuh"
15 #include "utils/DeviceUtils.h"
16 
17 #include <thrust/execution_policy.h>
18 #include <thrust/transform.h>
19 
20 namespace faiss { namespace gpu {
21 
22 /// Default CPU search size for which we use paged copies
23 constexpr size_t kMinPageSize = (size_t) 256 * 1024 * 1024;
24 
26  const faiss::IndexBinaryFlat* index,
28  : IndexBinary(index->d),
29  resources_(resources),
30  config_(std::move(config)),
31  data_(nullptr) {
32  FAISS_THROW_IF_NOT_FMT(this->d % 8 == 0,
33  "vector dimension (number of bits) "
34  "must be divisible by 8 (passed %d)",
35  this->d);
36 
37  // Flat index doesn't need training
38  this->is_trained = true;
39 
40  copyFrom(index);
41 }
42 
43 
45  int dims,
47  : IndexBinary(dims),
48  resources_(resources),
49  config_(std::move(config)),
50  data_(nullptr) {
51  FAISS_THROW_IF_NOT_FMT(this->d % 8 == 0,
52  "vector dimension (number of bits) "
53  "must be divisible by 8 (passed %d)",
54  this->d);
55 
56  // Flat index doesn't need training
57  this->is_trained = true;
58 
59  // Construct index
60  DeviceScope scope(config_.device);
61  data_ = new BinaryFlatIndex(resources,
62  this->d,
64 }
65 
66 GpuIndexBinaryFlat::~GpuIndexBinaryFlat() {
67  delete data_;
68 }
69 
70 void
72  DeviceScope scope(config_.device);
73 
74  this->d = index->d;
75 
76  // GPU code has 32 bit indices
77  FAISS_THROW_IF_NOT_FMT(index->ntotal <=
78  (faiss::Index::idx_t) std::numeric_limits<int>::max(),
79  "GPU index only supports up to %zu indices; "
80  "attempting to copy CPU index with %zu parameters",
81  (size_t) std::numeric_limits<int>::max(),
82  (size_t) index->ntotal);
83  this->ntotal = index->ntotal;
84 
85  delete data_;
87  this->d,
89 
90  // The index could be empty
91  if (index->ntotal > 0) {
92  data_->add(index->xb.data(),
93  index->ntotal,
95  }
96 }
97 
98 void
100  DeviceScope scope(config_.device);
101 
102  index->d = this->d;
103  index->ntotal = this->ntotal;
104 
105  FAISS_ASSERT(data_);
106  FAISS_ASSERT(data_->getSize() == this->ntotal);
107  index->xb.resize(this->ntotal * (this->d / 8));
108 
109  if (this->ntotal > 0) {
110  fromDevice(data_->getVectorsRef(),
111  index->xb.data(),
113  }
114 }
115 
116 void
118  const uint8_t* x) {
119  DeviceScope scope(config_.device);
120 
121  // To avoid multiple re-allocations, ensure we have enough storage
122  // available
124 
125  // Due to GPU indexing in int32, we can't store more than this
126  // number of vectors on a GPU
127  FAISS_THROW_IF_NOT_FMT(this->ntotal + n <=
128  (faiss::Index::idx_t) std::numeric_limits<int>::max(),
129  "GPU index only supports up to %zu indices",
130  (size_t) std::numeric_limits<int>::max());
131 
132  data_->add((const unsigned char*) x,
133  n,
135  this->ntotal += n;
136 }
137 
138 void
140  DeviceScope scope(config_.device);
141 
142  // Free the underlying memory
143  data_->reset();
144  this->ntotal = 0;
145 }
146 
147 void
149  const uint8_t* x,
151  int32_t* distances,
152  faiss::IndexBinary::idx_t* labels) const {
153  if (n == 0) {
154  return;
155  }
156 
157  // For now, only support <= max int results
158  FAISS_THROW_IF_NOT_FMT(n <=
159  (faiss::Index::idx_t) std::numeric_limits<int>::max(),
160  "GPU index only supports up to %zu indices",
161  (size_t) std::numeric_limits<int>::max());
162  FAISS_THROW_IF_NOT_FMT(k <= 1024,
163  "GPU only supports k <= 1024 (requested %d)",
164  (int) k); // select limitation
165 
166  DeviceScope scope(config_.device);
167  auto stream = resources_->getDefaultStream(config_.device);
168 
169  // The input vectors may be too large for the GPU, but we still
170  // assume that the output distances and labels are not.
171  // Go ahead and make space for output distances and labels on the
172  // GPU.
173  // If we reach a point where all inputs are too big, we can add
174  // another level of tiling.
175  auto outDistances = toDevice<int32_t, 2>(resources_,
176  config_.device,
177  distances,
178  stream,
179  {(int) n, (int) k});
180 
181  // FlatIndex only supports an interface returning int indices
182  DeviceTensor<int, 2, true> outIntIndices(
183  resources_->getMemoryManagerCurrentDevice(),
184  {(int) n, (int) k}, stream);
185 
186  bool usePaged = false;
187 
188  if (getDeviceForAddress(x) == -1) {
189  // It is possible that the user is querying for a vector set size
190  // `x` that won't fit on the GPU.
191  // In this case, we will have to handle paging of the data from CPU
192  // -> GPU.
193  // Currently, we don't handle the case where the output data won't
194  // fit on the GPU (e.g., n * k is too large for the GPU memory).
195  size_t dataSize = (size_t) n * (this->d / 8) * sizeof(uint8_t);
196 
197  if (dataSize >= kMinPageSize) {
198  searchFromCpuPaged_(n, x, k,
199  outDistances.data(),
200  outIntIndices.data());
201  usePaged = true;
202  }
203  }
204 
205  if (!usePaged) {
206  searchNonPaged_(n, x, k,
207  outDistances.data(),
208  outIntIndices.data());
209  }
210 
211  // Convert and copy int indices out
212  auto outIndices = toDevice<faiss::Index::idx_t, 2>(resources_,
213  config_.device,
214  labels,
215  stream,
216  {(int) n, (int) k});
217 
218  // Convert int to long
219  thrust::transform(thrust::cuda::par.on(stream),
220  outIntIndices.data(),
221  outIntIndices.end(),
222  outIndices.data(),
223  IntToIdxType());
224 
225  // Copy back if necessary
226  fromDevice<int32_t, 2>(outDistances, distances, stream);
227  fromDevice<faiss::Index::idx_t, 2>(outIndices, labels, stream);
228 }
229 
230 void
231 GpuIndexBinaryFlat::searchNonPaged_(int n,
232  const uint8_t* x,
233  int k,
234  int32_t* outDistancesData,
235  int* outIndicesData) const {
236  Tensor<int32_t, 2, true> outDistances(outDistancesData, {n, k});
237  Tensor<int, 2, true> outIndices(outIndicesData, {n, k});
238 
239  auto stream = resources_->getDefaultStream(config_.device);
240 
241  // Make sure arguments are on the device we desire; use temporary
242  // memory allocations to move it if necessary
243  auto vecs = toDevice<uint8_t, 2>(resources_,
244  config_.device,
245  const_cast<uint8_t*>(x),
246  stream,
247  {n, (int) (this->d / 8)});
248 
249  data_->query(vecs, k, outDistances, outIndices);
250 }
251 
252 void
254  const uint8_t* x,
255  int k,
256  int32_t* outDistancesData,
257  int* outIndicesData) const {
258  Tensor<int32_t, 2, true> outDistances(outDistancesData, {n, k});
259  Tensor<int, 2, true> outIndices(outIndicesData, {n, k});
260 
261  auto vectorSize = sizeof(uint8_t) * (this->d / 8);
262 
263  // Just page without overlapping copy with compute (as GpuIndexFlat does)
264  int batchSize = utils::nextHighestPowerOf2(
265  (int) ((size_t) kMinPageSize / vectorSize));
266 
267  for (int cur = 0; cur < n; cur += batchSize) {
268  int num = std::min(batchSize, n - cur);
269 
270  auto outDistancesSlice = outDistances.narrowOutermost(cur, num);
271  auto outIndicesSlice = outIndices.narrowOutermost(cur, num);
272 
273  searchNonPaged_(num,
274  x + (size_t) cur * (this->d / 8),
275  k,
276  outDistancesSlice.data(),
277  outIndicesSlice.data());
278  }
279 }
280 
281 void
283  uint8_t* out) const {
284  DeviceScope scope(config_.device);
285 
286  FAISS_THROW_IF_NOT_MSG(key < this->ntotal, "index out of bounds");
287  auto stream = resources_->getDefaultStream(config_.device);
288 
289  auto& vecs = data_->getVectorsRef();
290  auto vec = vecs[key];
291 
292  fromDevice(vec.data(), out, vecs.getSize(1), stream);
293 }
294 
295 } } // namespace gpu
void reset()
Free all storage.
void reset() override
Removes all elements from the database.
Holder of GPU resources for a particular flat index.
void copyTo(faiss::IndexBinaryFlat *index) const
bool is_trained
set if the Index does not require training, or if training is done already
Definition: IndexBinary.h:46
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:26
virtual cudaStream_t getDefaultStream(int device)=0
int d
vector dimension
Definition: IndexBinary.h:40
void add(faiss::IndexBinary::idx_t n, const uint8_t *x) override
GpuIndexBinaryFlatConfig config_
Configuration options.
void search(faiss::IndexBinary::idx_t n, const uint8_t *x, faiss::IndexBinary::idx_t k, int32_t *distances, faiss::IndexBinary::idx_t *labels) const override
GpuResources * resources_
Manages streans, cuBLAS handles and scratch memory for devices.
MemorySpace memorySpace
Definition: GpuIndex.h:31
long idx_t
all indices are this type
Definition: Index.h:64
Our tensor type.
Definition: Tensor.cuh:29
void searchFromCpuPaged_(int n, const uint8_t *x, int k, int32_t *outDistancesData, int *outIndicesData) const
idx_t ntotal
total nb of indexed vectors
Definition: IndexBinary.h:42
std::vector< uint8_t > xb
database vectors, size ntotal * d / 8
void reconstruct(faiss::IndexBinary::idx_t key, uint8_t *recons) const override
long idx_t
all indices are this type
Definition: IndexBinary.h:38
void copyFrom(const faiss::IndexBinaryFlat *index)
GpuIndexBinaryFlat(GpuResources *resources, const faiss::IndexBinaryFlat *index, GpuIndexBinaryFlatConfig config=GpuIndexBinaryFlatConfig())
int getSize() const
Returns the number of vectors we contain.
void reserve(size_t numVecs, cudaStream_t stream)
Reserve storage that can contain at least this many vectors.
Tensor< unsigned char, 2, true > & getVectorsRef()
Returns a reference to our vectors currently in use.
void add(const unsigned char *data, int numVecs, cudaStream_t stream)