Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
GpuIndexBinaryFlat.cu
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #include "GpuIndexBinaryFlat.h"
9 
10 #include "GpuResources.h"
11 #include "impl/BinaryFlatIndex.cuh"
12 #include "utils/ConversionOperators.cuh"
13 #include "utils/CopyUtils.cuh"
14 #include "utils/DeviceUtils.h"
15 
16 #include <thrust/execution_policy.h>
17 #include <thrust/transform.h>
18 
19 namespace faiss { namespace gpu {
20 
21 /// Default CPU search size for which we use paged copies
22 constexpr size_t kMinPageSize = (size_t) 256 * 1024 * 1024;
23 
25  const faiss::IndexBinaryFlat* index,
27  : IndexBinary(index->d),
28  resources_(resources),
29  config_(std::move(config)),
30  data_(nullptr) {
31  FAISS_THROW_IF_NOT_FMT(this->d % 8 == 0,
32  "vector dimension (number of bits) "
33  "must be divisible by 8 (passed %d)",
34  this->d);
35 
36  // Flat index doesn't need training
37  this->is_trained = true;
38 
39  copyFrom(index);
40 }
41 
42 
44  int dims,
46  : IndexBinary(dims),
47  resources_(resources),
48  config_(std::move(config)),
49  data_(nullptr) {
50  FAISS_THROW_IF_NOT_FMT(this->d % 8 == 0,
51  "vector dimension (number of bits) "
52  "must be divisible by 8 (passed %d)",
53  this->d);
54 
55  // Flat index doesn't need training
56  this->is_trained = true;
57 
58  // Construct index
59  DeviceScope scope(config_.device);
60  data_ = new BinaryFlatIndex(resources,
61  this->d,
63 }
64 
65 GpuIndexBinaryFlat::~GpuIndexBinaryFlat() {
66  delete data_;
67 }
68 
69 void
71  DeviceScope scope(config_.device);
72 
73  this->d = index->d;
74 
75  // GPU code has 32 bit indices
76  FAISS_THROW_IF_NOT_FMT(index->ntotal <=
77  (faiss::Index::idx_t) std::numeric_limits<int>::max(),
78  "GPU index only supports up to %zu indices; "
79  "attempting to copy CPU index with %zu parameters",
80  (size_t) std::numeric_limits<int>::max(),
81  (size_t) index->ntotal);
82  this->ntotal = index->ntotal;
83 
84  delete data_;
86  this->d,
88 
89  // The index could be empty
90  if (index->ntotal > 0) {
91  data_->add(index->xb.data(),
92  index->ntotal,
94  }
95 }
96 
97 void
99  DeviceScope scope(config_.device);
100 
101  index->d = this->d;
102  index->ntotal = this->ntotal;
103 
104  FAISS_ASSERT(data_);
105  FAISS_ASSERT(data_->getSize() == this->ntotal);
106  index->xb.resize(this->ntotal * (this->d / 8));
107 
108  if (this->ntotal > 0) {
109  fromDevice(data_->getVectorsRef(),
110  index->xb.data(),
112  }
113 }
114 
115 void
117  const uint8_t* x) {
118  DeviceScope scope(config_.device);
119 
120  // To avoid multiple re-allocations, ensure we have enough storage
121  // available
123 
124  // Due to GPU indexing in int32, we can't store more than this
125  // number of vectors on a GPU
126  FAISS_THROW_IF_NOT_FMT(this->ntotal + n <=
127  (faiss::Index::idx_t) std::numeric_limits<int>::max(),
128  "GPU index only supports up to %zu indices",
129  (size_t) std::numeric_limits<int>::max());
130 
131  data_->add((const unsigned char*) x,
132  n,
134  this->ntotal += n;
135 }
136 
137 void
139  DeviceScope scope(config_.device);
140 
141  // Free the underlying memory
142  data_->reset();
143  this->ntotal = 0;
144 }
145 
146 void
148  const uint8_t* x,
150  int32_t* distances,
151  faiss::IndexBinary::idx_t* labels) const {
152  if (n == 0) {
153  return;
154  }
155 
156  // For now, only support <= max int results
157  FAISS_THROW_IF_NOT_FMT(n <= (Index::idx_t) std::numeric_limits<int>::max(),
158  "GPU index only supports up to %zu indices",
159  (size_t) std::numeric_limits<int>::max());
160  FAISS_THROW_IF_NOT_FMT(k <= (Index::idx_t) getMaxKSelection(),
161  "GPU only supports k <= %d (requested %d)",
162  getMaxKSelection(),
163  (int) k); // select limitation
164 
165  DeviceScope scope(config_.device);
166  auto stream = resources_->getDefaultStream(config_.device);
167 
168  // The input vectors may be too large for the GPU, but we still
169  // assume that the output distances and labels are not.
170  // Go ahead and make space for output distances and labels on the
171  // GPU.
172  // If we reach a point where all inputs are too big, we can add
173  // another level of tiling.
174  auto outDistances = toDevice<int32_t, 2>(resources_,
175  config_.device,
176  distances,
177  stream,
178  {(int) n, (int) k});
179 
180  // FlatIndex only supports an interface returning int indices
181  DeviceTensor<int, 2, true> outIntIndices(
182  resources_->getMemoryManagerCurrentDevice(),
183  {(int) n, (int) k}, stream);
184 
185  bool usePaged = false;
186 
187  if (getDeviceForAddress(x) == -1) {
188  // It is possible that the user is querying for a vector set size
189  // `x` that won't fit on the GPU.
190  // In this case, we will have to handle paging of the data from CPU
191  // -> GPU.
192  // Currently, we don't handle the case where the output data won't
193  // fit on the GPU (e.g., n * k is too large for the GPU memory).
194  size_t dataSize = (size_t) n * (this->d / 8) * sizeof(uint8_t);
195 
196  if (dataSize >= kMinPageSize) {
197  searchFromCpuPaged_(n, x, k,
198  outDistances.data(),
199  outIntIndices.data());
200  usePaged = true;
201  }
202  }
203 
204  if (!usePaged) {
205  searchNonPaged_(n, x, k,
206  outDistances.data(),
207  outIntIndices.data());
208  }
209 
210  // Convert and copy int indices out
211  auto outIndices = toDevice<faiss::Index::idx_t, 2>(resources_,
212  config_.device,
213  labels,
214  stream,
215  {(int) n, (int) k});
216 
217  // Convert int to long
218  thrust::transform(thrust::cuda::par.on(stream),
219  outIntIndices.data(),
220  outIntIndices.end(),
221  outIndices.data(),
222  IntToIdxType());
223 
224  // Copy back if necessary
225  fromDevice<int32_t, 2>(outDistances, distances, stream);
226  fromDevice<faiss::Index::idx_t, 2>(outIndices, labels, stream);
227 }
228 
229 void
230 GpuIndexBinaryFlat::searchNonPaged_(int n,
231  const uint8_t* x,
232  int k,
233  int32_t* outDistancesData,
234  int* outIndicesData) const {
235  Tensor<int32_t, 2, true> outDistances(outDistancesData, {n, k});
236  Tensor<int, 2, true> outIndices(outIndicesData, {n, k});
237 
238  auto stream = resources_->getDefaultStream(config_.device);
239 
240  // Make sure arguments are on the device we desire; use temporary
241  // memory allocations to move it if necessary
242  auto vecs = toDevice<uint8_t, 2>(resources_,
243  config_.device,
244  const_cast<uint8_t*>(x),
245  stream,
246  {n, (int) (this->d / 8)});
247 
248  data_->query(vecs, k, outDistances, outIndices);
249 }
250 
251 void
253  const uint8_t* x,
254  int k,
255  int32_t* outDistancesData,
256  int* outIndicesData) const {
257  Tensor<int32_t, 2, true> outDistances(outDistancesData, {n, k});
258  Tensor<int, 2, true> outIndices(outIndicesData, {n, k});
259 
260  auto vectorSize = sizeof(uint8_t) * (this->d / 8);
261 
262  // Just page without overlapping copy with compute (as GpuIndexFlat does)
263  int batchSize = utils::nextHighestPowerOf2(
264  (int) ((size_t) kMinPageSize / vectorSize));
265 
266  for (int cur = 0; cur < n; cur += batchSize) {
267  int num = std::min(batchSize, n - cur);
268 
269  auto outDistancesSlice = outDistances.narrowOutermost(cur, num);
270  auto outIndicesSlice = outIndices.narrowOutermost(cur, num);
271 
272  searchNonPaged_(num,
273  x + (size_t) cur * (this->d / 8),
274  k,
275  outDistancesSlice.data(),
276  outIndicesSlice.data());
277  }
278 }
279 
280 void
282  uint8_t* out) const {
283  DeviceScope scope(config_.device);
284 
285  FAISS_THROW_IF_NOT_MSG(key < this->ntotal, "index out of bounds");
286  auto stream = resources_->getDefaultStream(config_.device);
287 
288  auto& vecs = data_->getVectorsRef();
289  auto vec = vecs[key];
290 
291  fromDevice(vec.data(), out, vecs.getSize(1), stream);
292 }
293 
294 } } // namespace gpu
void reset()
Free all storage.
void reset() override
Removes all elements from the database.
Holder of GPU resources for a particular flat index.
void copyTo(faiss::IndexBinaryFlat *index) const
bool is_trained
set if the Index does not require training, or if training is done already
Definition: IndexBinary.h:47
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:25
virtual cudaStream_t getDefaultStream(int device)=0
Index::idx_t idx_t
all indices are this type
Definition: IndexBinary.h:37
int d
vector dimension
Definition: IndexBinary.h:41
long idx_t
all indices are this type
Definition: Index.h:62
void add(faiss::IndexBinary::idx_t n, const uint8_t *x) override
GpuIndexBinaryFlatConfig config_
Configuration options.
void search(faiss::IndexBinary::idx_t n, const uint8_t *x, faiss::IndexBinary::idx_t k, int32_t *distances, faiss::IndexBinary::idx_t *labels) const override
GpuResources * resources_
Manages streans, cuBLAS handles and scratch memory for devices.
MemorySpace memorySpace
Definition: GpuIndex.h:30
Our tensor type.
Definition: Tensor.cuh:28
void searchFromCpuPaged_(int n, const uint8_t *x, int k, int32_t *outDistancesData, int *outIndicesData) const
idx_t ntotal
total nb of indexed vectors
Definition: IndexBinary.h:43
std::vector< uint8_t > xb
database vectors, size ntotal * d / 8
void reconstruct(faiss::IndexBinary::idx_t key, uint8_t *recons) const override
void copyFrom(const faiss::IndexBinaryFlat *index)
GpuIndexBinaryFlat(GpuResources *resources, const faiss::IndexBinaryFlat *index, GpuIndexBinaryFlatConfig config=GpuIndexBinaryFlatConfig())
int getSize() const
Returns the number of vectors we contain.
void reserve(size_t numVecs, cudaStream_t stream)
Reserve storage that can contain at least this many vectors.
Tensor< unsigned char, 2, true > & getVectorsRef()
Returns a reference to our vectors currently in use.
void add(const unsigned char *data, int numVecs, cudaStream_t stream)