Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
GpuIndexIVFPQ.cu
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 
12 #include "GpuIndexIVFPQ.h"
13 #include "../ProductQuantizer.h"
14 #include "GpuIndexFlat.h"
15 #include "GpuResources.h"
16 #include "impl/IVFPQ.cuh"
17 #include "utils/CopyUtils.cuh"
18 #include "utils/DeviceUtils.h"
19 #include "../IndexFlat.h"
20 #include "../IndexIVFPQ.h"
21 
22 #include <limits>
23 
24 namespace faiss { namespace gpu {
25 
27  int device,
28  IndicesOptions indicesOptions,
29  bool useFloat16LookupTables,
30  const faiss::IndexIVFPQ* index) :
31  GpuIndexIVF(resources,
32  device,
33  indicesOptions,
34  false, // FIXME: float 16 coarse quantizer
35  index->d,
36  index->metric_type,
37  index->nlist),
38  useFloat16LookupTables_(useFloat16LookupTables),
39  subQuantizers_(0),
40  bitsPerCode_(0),
41  usePrecomputed_(false),
42  reserveMemoryVecs_(0),
43  index_(nullptr) {
44 #ifndef FAISS_USE_FLOAT16
45  FAISS_ASSERT(!useFloat16LookupTables_);
46 #endif
47 
48  copyFrom(index);
49 }
50 
52  int device,
53  int dims,
54  int nlist,
55  int subQuantizers,
56  int bitsPerCode,
57  bool usePrecomputed,
58  IndicesOptions indicesOptions,
59  bool useFloat16LookupTables,
60  faiss::MetricType metric) :
61  GpuIndexIVF(resources,
62  device,
63  indicesOptions,
64  false, // FIXME: float 16 coarse quantizer
65  dims,
66  metric,
67  nlist),
68  useFloat16LookupTables_(useFloat16LookupTables),
69  subQuantizers_(subQuantizers),
70  bitsPerCode_(bitsPerCode),
71  usePrecomputed_(usePrecomputed),
72  reserveMemoryVecs_(0),
73  index_(nullptr) {
74 #ifndef FAISS_USE_FLOAT16
75  FAISS_ASSERT(!useFloat16LookupTables_);
76 #endif
77 
78  assertSettings_();
79 
80  // FIXME make IP work fully
81  FAISS_ASSERT(this->metric_type == faiss::METRIC_L2);
82 
83  // We haven't trained ourselves, so don't construct the PQ index yet
84  this->is_trained = false;
85 }
86 
87 GpuIndexIVFPQ::~GpuIndexIVFPQ() {
88  delete index_;
89 }
90 
91 void
93  DeviceScope scope(device_);
94 
95  // FIXME: support this
96  FAISS_ASSERT(index->metric_type == faiss::METRIC_L2);
97  GpuIndexIVF::copyFrom(index);
98 
99  // Clear out our old data
100  delete index_;
101  index_ = nullptr;
102 
103  subQuantizers_ = index->pq.M;
104  bitsPerCode_ = index->pq.nbits;
105 
106  // We only support this
107  FAISS_ASSERT(index->pq.byte_per_idx == 1);
108  FAISS_ASSERT(index->by_residual);
109  FAISS_ASSERT(index->polysemous_ht == 0);
110  usePrecomputed_ = index->use_precomputed_table;
111 
112  assertSettings_();
113 
114  // The other index might not be trained
115  if (!index->is_trained) {
116  return;
117  }
118 
119  // Otherwise, we can populate ourselves from the other index
120  this->is_trained = true;
121 
122  // Copy our lists as well
123  // The product quantizer must have data in it
124  FAISS_ASSERT(index->pq.centroids.size() > 0);
125  index_ = new IVFPQ(resources_,
127  subQuantizers_,
128  bitsPerCode_,
129  (float*) index->pq.centroids.data(),
131  useFloat16LookupTables_);
132  // Doesn't make sense to reserve memory here
133  index_->setPrecomputedCodes(usePrecomputed_);
134 
135  // Copy database vectors, if any
136  for (size_t i = 0; i < index->codes.size(); ++i) {
137  auto& codes = index->codes[i];
138  auto& indices = index->ids[i];
139 
140  FAISS_ASSERT(indices.size() * subQuantizers_ == codes.size());
141  index_->addCodeVectorsFromCpu(i,
142  codes.data(),
143  indices.data(),
144  indices.size());
145  }
146 }
147 
148 void
150  DeviceScope scope(device_);
151 
152  // We must have the indices in order to copy to ourselves
153  FAISS_ASSERT(indicesOptions_ != INDICES_IVF);
154 
155  GpuIndexIVF::copyTo(index);
156 
157  //
158  // IndexIVFPQ information
159  //
160  index->by_residual = true;
161  index->use_precomputed_table = 0;
162  index->code_size = subQuantizers_;
163  index->pq = faiss::ProductQuantizer(this->d, subQuantizers_, bitsPerCode_);
164 
165  index->do_polysemous_training = false;
166  index->polysemous_training = nullptr;
167 
168  index->scan_table_threshold = 0;
169  index->max_codes = 0;
170  index->polysemous_ht = 0;
171  index->codes.clear();
172  index->codes.resize(nlist_);
173  index->precomputed_table.clear();
174 
175  if (index_) {
176  // Copy the inverted lists
177  for (int i = 0; i < nlist_; ++i) {
178  index->ids[i] = getListIndices(i);
179  index->codes[i] = getListCodes(i);
180  }
181 
182  // Copy PQ centroids
183  auto devPQCentroids = index_->getPQCentroids();
184  index->pq.centroids.resize(devPQCentroids.numElements());
185 
186  fromDevice<float, 3>(devPQCentroids,
187  index->pq.centroids.data(),
188  resources_->getDefaultStream(device_));
189 
190  if (usePrecomputed_) {
191  index->precompute_table();
192  }
193  }
194 }
195 
196 void
198  reserveMemoryVecs_ = numVecs;
199  if (index_) {
200  DeviceScope scope(device_);
201  index_->reserveMemory(numVecs);
202  }
203 }
204 
205 void
207  usePrecomputed_ = enable;
208  if (index_) {
209  DeviceScope scope(device_);
210  index_->setPrecomputedCodes(enable);
211  }
212 
213  assertSettings_();
214 }
215 
216 bool
218  return usePrecomputed_;
219 }
220 
221 bool
223  return useFloat16LookupTables_;
224 }
225 
226 int
228  return subQuantizers_;
229 }
230 
231 int
233  return bitsPerCode_;
234 }
235 
236 int
238  return utils::pow2(bitsPerCode_);
239 }
240 
241 size_t
243  if (index_) {
244  DeviceScope scope(device_);
245  return index_->reclaimMemory();
246  }
247 
248  return 0;
249 }
250 
251 void
253  if (index_) {
254  DeviceScope scope(device_);
255 
256  index_->reset();
257  this->ntotal = 0;
258  } else {
259  FAISS_ASSERT(this->ntotal == 0);
260  }
261 }
262 
263 void
264 GpuIndexIVFPQ::trainResidualQuantizer_(Index::idx_t n, const float* x) {
265  // Code largely copied from faiss::IndexIVFPQ
266  // FIXME: GPUize more of this
267  n = std::min(n, (Index::idx_t) (1 << bitsPerCode_) * 64);
268 
269  if (this->verbose) {
270  printf("computing residuals\n");
271  }
272 
273  std::vector<Index::idx_t> assign(n);
274  quantizer_->assign (n, x, assign.data());
275 
276  std::vector<float> residuals(n * d);
277 
278  for (idx_t i = 0; i < n; i++) {
279  quantizer_->compute_residual(x + i * d, &residuals[i * d], assign[i]);
280  }
281 
282  if (this->verbose) {
283  printf("training %d x %d product quantizer on %ld vectors in %dD\n",
284  subQuantizers_, getCentroidsPerSubQuantizer(), n, this->d);
285  }
286 
287  // Just use the CPU product quantizer to determine sub-centroids
288  faiss::ProductQuantizer pq(this->d, subQuantizers_, bitsPerCode_);
289  pq.verbose = this->verbose;
290  pq.train(n, residuals.data());
291 
292  index_ = new IVFPQ(resources_,
294  subQuantizers_,
295  bitsPerCode_,
296  pq.centroids.data(),
298  useFloat16LookupTables_);
299  if (reserveMemoryVecs_) {
300  index_->reserveMemory(reserveMemoryVecs_);
301  }
302 
303  index_->setPrecomputedCodes(usePrecomputed_);
304 }
305 
306 void
307 GpuIndexIVFPQ::train(Index::idx_t n, const float* x) {
308  DeviceScope scope(device_);
309 
310  if (this->is_trained) {
311  FAISS_ASSERT(quantizer_->is_trained);
312  FAISS_ASSERT(quantizer_->ntotal == nlist_);
313  FAISS_ASSERT(index_);
314  return;
315  }
316 
317  FAISS_ASSERT(!index_);
318 
319  trainQuantizer_(n, x);
320  trainResidualQuantizer_(n, x);
321 
322  this->is_trained = true;
323 }
324 
325 void
327  const float* x,
328  const Index::idx_t* xids) {
329  FAISS_ASSERT(this->is_trained);
330  FAISS_ASSERT(index_);
331 
332  if (n == 0) {
333  return;
334  }
335 
336  DeviceScope scope(device_);
337  auto stream = resources_->getDefaultStreamCurrentDevice();
338 
339  auto deviceVecs =
340  toDevice<float, 2>(resources_,
341  device_,
342  const_cast<float*>(x),
343  stream,
344  {(int) n, index_->getDim()});
345 
346  auto deviceIndices =
347  toDevice<Index::idx_t, 1>(resources_,
348  device_,
349  const_cast<Index::idx_t*>(xids),
350  stream,
351  {(int) n});
352 
353  // Not all vectors may be able to be added (some may contain NaNs
354  // etc)
355  ntotal += index_->classifyAndAddVectors(deviceVecs, deviceIndices);
356 }
357 
358 void
360  const float* x,
362  float* distances,
363  faiss::Index::idx_t* labels) const {
364  FAISS_ASSERT(this->is_trained);
365  FAISS_ASSERT(index_);
366 
367  if (n == 0) {
368  return;
369  }
370 
371  DeviceScope scope(device_);
372 
373  // Make sure arguments are on the device we desire; use temporary
374  // memory allocations to move it if necessary
375  auto devX =
376  toDevice<float, 2>(resources_,
377  device_,
378  const_cast<float*>(x),
379  resources_->getDefaultStream(device_),
380  {(int) n, index_->getDim()});
381  auto devDistances =
382  toDevice<float, 2>(resources_,
383  device_,
384  distances,
385  resources_->getDefaultStream(device_),
386  {(int) n, (int) k});
387  auto devLabels =
388  toDevice<faiss::Index::idx_t, 2>(resources_,
389  device_,
390  labels,
391  resources_->getDefaultStream(device_),
392  {(int) n, (int) k});
393 
394  index_->query(devX,
395  nprobe_,
396  (int) k,
397  devDistances,
398  devLabels);
399 
400  // Copy back if necessary
401  fromDevice<float, 2>(
402  devDistances, distances, resources_->getDefaultStream(device_));
403  fromDevice<faiss::Index::idx_t, 2>(
404  devLabels, labels, resources_->getDefaultStream(device_));
405 }
406 
407 void
408 GpuIndexIVFPQ::set_typename() {
409  // FIXME: implement
410  FAISS_ASSERT(false);
411 }
412 
413 int
414 GpuIndexIVFPQ::getListLength(int listId) const {
415  FAISS_ASSERT(index_);
416  return index_->getListLength(listId);
417 }
418 
419 std::vector<unsigned char>
420 GpuIndexIVFPQ::getListCodes(int listId) const {
421  FAISS_ASSERT(index_);
422  DeviceScope scope(device_);
423 
424  return index_->getListCodes(listId);
425 }
426 
427 std::vector<long>
428 GpuIndexIVFPQ::getListIndices(int listId) const {
429  FAISS_ASSERT(index_);
430  DeviceScope scope(device_);
431 
432  return index_->getListIndices(listId);
433 }
434 
435 void
436 GpuIndexIVFPQ::assertSettings_() const {
437  // Our implementation has these restrictions:
438 
439  // Must have some number of lists
440  FAISS_ASSERT(nlist_ > 0);
441 
442  // up to a single byte per code
443  FAISS_ASSERT(bitsPerCode_ <= 8);
444 
445  // Sub-quantizers must evenly divide dimensions available
446  FAISS_ASSERT(this->d % subQuantizers_ == 0);
447 
448  // The number of bytes per encoded vector must be one we support
449  FAISS_ASSERT(IVFPQ::isSupportedPQCodeLength(subQuantizers_));
450 
451  // We must have enough shared memory on the current device to store
452  // our lookup distances
453  int lookupTableSize = sizeof(float);
454 #ifdef FAISS_USE_FLOAT16
455  if (useFloat16LookupTables_) {
456  lookupTableSize = sizeof(half);
457  }
458 #endif
459 
460  // 64 bytes per code is only supported with usage of float16, at 2^8
461  // codes per subquantizer
462  FAISS_ASSERT(lookupTableSize * subQuantizers_ * utils::pow2(bitsPerCode_)
463  <= getMaxSharedMemPerBlock(device_));
464 
465  // If precomputed codes are disabled, we have an extra limitation in
466  // terms of the number of dimensions per subquantizer
467  FAISS_ASSERT(usePrecomputed_ ||
469  this->d / subQuantizers_));
470 
471  // TODO: fully implement METRIC_INNER_PRODUCT
472  FAISS_ASSERT(this->metric_type == faiss::METRIC_L2);
473 }
474 
475 } } // namespace
std::vector< long > getListIndices(int listId) const
void precompute_table()
build precomputed table
Definition: IndexIVFPQ.cpp:376
size_t nbits
number of bits per quantization index
PolysemousTraining * polysemous_training
if NULL, use default
Definition: IndexIVFPQ.h:37
size_t byte_per_idx
nb bytes per code component (1 or 2)
int getDim() const
Return the number of dimensions we are indexing.
Definition: IVFBase.cu:99
int getListLength(int listId) const
Definition: IVFBase.cu:199
FlatIndex * getGpuData()
For internal access.
Definition: GpuIndexFlat.h:100
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:24
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
Definition: IVFBase.cu:44
int getListLength(int listId) const
bool do_polysemous_training
reorder PQ centroids after training?
Definition: IndexIVFPQ.h:36
size_t scan_table_threshold
use table computation or on-the-fly?
Definition: IndexIVFPQ.h:40
std::vector< float > precomputed_table
Definition: IndexIVFPQ.h:48
int polysemous_ht
Hamming thresh for polysemous filtering.
Definition: IndexIVFPQ.h:42
int getBitsPerCode() const
Return the number of bits per PQ code.
int device_
The GPU device we are resident on.
Definition: GpuIndex.h:43
std::vector< std::vector< long > > ids
Inverted lists for indexes.
Definition: IndexIVF.h:56
int d
vector dimension
Definition: Index.h:66
void train(Index::idx_t n, const float *x) override
size_t max_codes
max nb of codes to visit to do a query
Definition: IndexIVFPQ.h:41
static bool isSupportedPQCodeLength(int size)
Returns true if we support PQ in this size.
Definition: IVFPQ.cu:71
int nprobe_
Number of inverted list probes per query.
Definition: GpuIndexIVF.h:91
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
const IndicesOptions indicesOptions_
How should indices be stored on the GPU?
Definition: GpuIndexIVF.h:81
int classifyAndAddVectors(Tensor< float, 2, true > &vecs, Tensor< long, 1, true > &indices)
Definition: IVFPQ.cu:119
void query(Tensor< float, 2, true > &queries, int nprobe, int k, Tensor< float, 2, true > &outDistances, Tensor< long, 2, true > &outIndices)
Definition: IVFPQ.cu:517
void copyFrom(const faiss::IndexIVFPQ *index)
Tensor< float, 3, true > getPQCentroids()
Definition: IVFPQ.cu:592
GpuResources * resources_
Manages streans, cuBLAS handles and scratch memory for devices.
Definition: GpuIndex.h:40
void copyTo(faiss::IndexIVF *index) const
Copy what we have to the CPU equivalent.
Definition: GpuIndexIVF.cu:181
long idx_t
all indices are this type
Definition: Index.h:64
int nlist_
Number of inverted lists that we manage.
Definition: GpuIndexIVF.h:88
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
bool verbose
verbosity level
Definition: Index.h:68
void setPrecomputedCodes(bool enable)
Enable or disable pre-computed codes.
Definition: IVFPQ.cu:101
std::vector< unsigned char > getListCodes(int listId) const
Return the list codes of a particular list back to the CPU.
Definition: IVFPQ.cu:584
void copyTo(faiss::IndexIVFPQ *index) const
bool getFloat16LookupTables() const
Are float16 residual distance lookup tables enabled?
bool by_residual
Encode residual or plain vector?
Definition: IndexIVFPQ.h:31
GpuIndexFlat * quantizer_
Quantizer for inverted lists.
Definition: GpuIndexIVF.h:97
void add_with_ids(Index::idx_t n, const float *x, const Index::idx_t *xids) override
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:74
ProductQuantizer pq
produces the codes
Definition: IndexIVFPQ.h:34
void search(faiss::Index::idx_t n, const float *x, faiss::Index::idx_t k, float *distances, faiss::Index::idx_t *labels) const override
size_t M
number of subquantizers
int getNumSubQuantizers() const
Return the number of sub-quantizers we are using.
std::vector< long > getListIndices(int listId) const
Return the list indices of a particular list back to the CPU.
Definition: IVFBase.cu:206
int getCentroidsPerSubQuantizer() const
Return the number of centroids per PQ code (2^bits per code)
size_t code_size
code size per vector in bytes
Definition: IndexIVFPQ.h:33
void setPrecomputedCodes(bool enable)
Enable or disable pre-computed codes.
void copyFrom(const faiss::IndexIVF *index)
Copy what we need from the CPU equivalent.
Definition: GpuIndexIVF.cu:117
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:71
void compute_residual(const float *x, float *residual, idx_t key) const
Definition: Index.cpp:58
bool getPrecomputedCodes() const
Are pre-computed codes enabled?
GpuIndexIVFPQ(GpuResources *resources, int device, IndicesOptions indicesOptions, bool useFloat16LookupTables, const faiss::IndexIVFPQ *index)
size_t reclaimMemory()
Definition: IVFBase.cu:104
Implementing class for IVFPQ on the GPU.
Definition: IVFPQ.cuh:20
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:44
int use_precomputed_table
if by_residual, build precompute tables
Definition: IndexIVFPQ.h:32
std::vector< unsigned char > getListCodes(int listId) const
std::vector< float > centroids
Centroid table, size M * ksub * dsub.
static bool isSupportedNoPrecomputedSubDimSize(int dims)
Definition: IVFPQ.cu:96