Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
GpuIndexIVFPQ.cu
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #include "GpuIndexIVFPQ.h"
12 #include "../IndexFlat.h"
13 #include "../IndexIVFPQ.h"
14 #include "../ProductQuantizer.h"
15 #include "GpuIndexFlat.h"
16 #include "GpuResources.h"
17 #include "impl/IVFPQ.cuh"
18 #include "utils/CopyUtils.cuh"
19 #include "utils/DeviceUtils.h"
20 
21 #include <limits>
22 
23 namespace faiss { namespace gpu {
24 
26  const faiss::IndexIVFPQ* index,
27  GpuIndexIVFPQConfig config) :
28  GpuIndexIVF(resources,
29  index->d,
30  index->metric_type,
31  index->nlist,
32  config),
33  ivfpqConfig_(config),
34  subQuantizers_(0),
35  bitsPerCode_(0),
36  reserveMemoryVecs_(0),
37  index_(nullptr) {
38 #ifndef FAISS_USE_FLOAT16
39  FAISS_ASSERT(!ivfpqConfig_.useFloat16LookupTables);
40 #endif
41 
42  copyFrom(index);
43 }
44 
46  int dims,
47  int nlist,
48  int subQuantizers,
49  int bitsPerCode,
50  faiss::MetricType metric,
51  GpuIndexIVFPQConfig config) :
52  GpuIndexIVF(resources,
53  dims,
54  metric,
55  nlist,
56  config),
57  ivfpqConfig_(config),
58  subQuantizers_(subQuantizers),
59  bitsPerCode_(bitsPerCode),
60  reserveMemoryVecs_(0),
61  index_(nullptr) {
62 #ifndef FAISS_USE_FLOAT16
63  FAISS_ASSERT(!config.useFloat16LookupTables);
64 #endif
65 
66  verifySettings_();
67 
68  // FIXME make IP work fully
69  FAISS_ASSERT(this->metric_type == faiss::METRIC_L2);
70 
71  // We haven't trained ourselves, so don't construct the PQ index yet
72  this->is_trained = false;
73 }
74 
75 GpuIndexIVFPQ::~GpuIndexIVFPQ() {
76  delete index_;
77 }
78 
79 void
81  DeviceScope scope(device_);
82 
83  // FIXME: support this
84  FAISS_THROW_IF_NOT_MSG(index->metric_type == faiss::METRIC_L2,
85  "inner product unsupported");
86  GpuIndexIVF::copyFrom(index);
87 
88  // Clear out our old data
89  delete index_;
90  index_ = nullptr;
91 
92  subQuantizers_ = index->pq.M;
93  bitsPerCode_ = index->pq.nbits;
94 
95  // We only support this
96  FAISS_ASSERT(index->pq.byte_per_idx == 1);
97  FAISS_ASSERT(index->by_residual);
98  FAISS_ASSERT(index->polysemous_ht == 0);
99 
100  verifySettings_();
101 
102  // The other index might not be trained
103  if (!index->is_trained) {
104  return;
105  }
106 
107  // Otherwise, we can populate ourselves from the other index
108  this->is_trained = true;
109 
110  // Copy our lists as well
111  // The product quantizer must have data in it
112  FAISS_ASSERT(index->pq.centroids.size() > 0);
113  index_ = new IVFPQ(resources_,
115  subQuantizers_,
116  bitsPerCode_,
117  (float*) index->pq.centroids.data(),
118  ivfpqConfig_.indicesOptions,
119  ivfpqConfig_.useFloat16LookupTables,
120  memorySpace_);
121  // Doesn't make sense to reserve memory here
122  index_->setPrecomputedCodes(ivfpqConfig_.usePrecomputedTables);
123 
124  // Copy database vectors, if any
125  const InvertedLists *ivf = index->invlists;
126  size_t nlist = ivf ? ivf->nlist : 0;
127  for (size_t i = 0; i < nlist; ++i) {
128  size_t list_size = ivf->list_size(i);
129 
130  // GPU index can only support max int entries per list
131  FAISS_THROW_IF_NOT_FMT(list_size <=
132  (size_t) std::numeric_limits<int>::max(),
133  "GPU inverted list can only support "
134  "%zu entries; %zu found",
135  (size_t) std::numeric_limits<int>::max(),
136  list_size);
137 
138  index_->addCodeVectorsFromCpu(
139  i, ivf->get_codes(i), ivf->get_ids(i), list_size);
140  }
141 }
142 
143 void
145  DeviceScope scope(device_);
146 
147  // We must have the indices in order to copy to ourselves
148  FAISS_THROW_IF_NOT_MSG(ivfpqConfig_.indicesOptions != INDICES_IVF,
149  "Cannot copy to CPU as GPU index doesn't retain "
150  "indices (INDICES_IVF)");
151 
152  GpuIndexIVF::copyTo(index);
153 
154  //
155  // IndexIVFPQ information
156  //
157  index->by_residual = true;
158  index->use_precomputed_table = 0;
159  index->code_size = subQuantizers_;
160  index->pq = faiss::ProductQuantizer(this->d, subQuantizers_, bitsPerCode_);
161 
162  index->do_polysemous_training = false;
163  index->polysemous_training = nullptr;
164 
165  index->scan_table_threshold = 0;
166  index->max_codes = 0;
167  index->polysemous_ht = 0;
168  index->precomputed_table.clear();
169 
171  nlist_, index->code_size);
172 
173  index->replace_invlists(ivf, true);
174 
175  if (index_) {
176  // Copy the inverted lists
177  for (int i = 0; i < nlist_; ++i) {
178  auto ids = getListIndices(i);
179  auto codes = getListCodes(i);
180  index->invlists->add_entries (i, ids.size(), ids.data(), codes.data());
181  }
182 
183  // Copy PQ centroids
184  auto devPQCentroids = index_->getPQCentroids();
185  index->pq.centroids.resize(devPQCentroids.numElements());
186 
187  fromDevice<float, 3>(devPQCentroids,
188  index->pq.centroids.data(),
190 
191  if (ivfpqConfig_.usePrecomputedTables) {
192  index->precompute_table();
193  }
194  }
195 }
196 
197 void
199  reserveMemoryVecs_ = numVecs;
200  if (index_) {
201  DeviceScope scope(device_);
202  index_->reserveMemory(numVecs);
203  }
204 }
205 
206 void
208  ivfpqConfig_.usePrecomputedTables = enable;
209  if (index_) {
210  DeviceScope scope(device_);
211  index_->setPrecomputedCodes(enable);
212  }
213 
214  verifySettings_();
215 }
216 
217 bool
219  return ivfpqConfig_.usePrecomputedTables;
220 }
221 
222 int
224  return subQuantizers_;
225 }
226 
227 int
229  return bitsPerCode_;
230 }
231 
232 int
234  return utils::pow2(bitsPerCode_);
235 }
236 
237 size_t
239  if (index_) {
240  DeviceScope scope(device_);
241  return index_->reclaimMemory();
242  }
243 
244  return 0;
245 }
246 
247 void
249  if (index_) {
250  DeviceScope scope(device_);
251 
252  index_->reset();
253  this->ntotal = 0;
254  } else {
255  FAISS_ASSERT(this->ntotal == 0);
256  }
257 }
258 
259 void
260 GpuIndexIVFPQ::trainResidualQuantizer_(Index::idx_t n, const float* x) {
261  // Code largely copied from faiss::IndexIVFPQ
262  // FIXME: GPUize more of this
263  n = std::min(n, (Index::idx_t) (1 << bitsPerCode_) * 64);
264 
265  if (this->verbose) {
266  printf("computing residuals\n");
267  }
268 
269  std::vector<Index::idx_t> assign(n);
270  quantizer_->assign (n, x, assign.data());
271 
272  std::vector<float> residuals(n * d);
273 
274  for (idx_t i = 0; i < n; i++) {
275  quantizer_->compute_residual(x + i * d, &residuals[i * d], assign[i]);
276  }
277 
278  if (this->verbose) {
279  printf("training %d x %d product quantizer on %ld vectors in %dD\n",
280  subQuantizers_, getCentroidsPerSubQuantizer(), n, this->d);
281  }
282 
283  // Just use the CPU product quantizer to determine sub-centroids
284  faiss::ProductQuantizer pq(this->d, subQuantizers_, bitsPerCode_);
285  pq.verbose = this->verbose;
286  pq.train(n, residuals.data());
287 
288  index_ = new IVFPQ(resources_,
290  subQuantizers_,
291  bitsPerCode_,
292  pq.centroids.data(),
293  ivfpqConfig_.indicesOptions,
294  ivfpqConfig_.useFloat16LookupTables,
295  memorySpace_);
296  if (reserveMemoryVecs_) {
297  index_->reserveMemory(reserveMemoryVecs_);
298  }
299 
300  index_->setPrecomputedCodes(ivfpqConfig_.usePrecomputedTables);
301 }
302 
303 void
304 GpuIndexIVFPQ::train(Index::idx_t n, const float* x) {
305  DeviceScope scope(device_);
306 
307  if (this->is_trained) {
308  FAISS_ASSERT(quantizer_->is_trained);
309  FAISS_ASSERT(quantizer_->ntotal == nlist_);
310  FAISS_ASSERT(index_);
311  return;
312  }
313 
314  FAISS_ASSERT(!index_);
315 
316  trainQuantizer_(n, x);
317  trainResidualQuantizer_(n, x);
318 
319  this->is_trained = true;
320 }
321 
322 void
324  const float* x,
325  const Index::idx_t* xids) {
326  // Device is already set in GpuIndex::addInternal_
327  FAISS_ASSERT(index_);
328  FAISS_ASSERT(n > 0);
329 
330  auto stream = resources_->getDefaultStreamCurrentDevice();
331 
332  auto deviceVecs =
333  toDevice<float, 2>(resources_,
334  device_,
335  const_cast<float*>(x),
336  stream,
337  {(int) n, index_->getDim()});
338 
339  auto deviceIndices =
340  toDevice<Index::idx_t, 1>(resources_,
341  device_,
342  const_cast<Index::idx_t*>(xids),
343  stream,
344  {(int) n});
345 
346  // Not all vectors may be able to be added (some may contain NaNs
347  // etc)
348  ntotal += index_->classifyAndAddVectors(deviceVecs, deviceIndices);
349 }
350 
351 void
353  const float* x,
355  float* distances,
356  faiss::Index::idx_t* labels) const {
357  // Device is already set in GpuIndex::search
358 
359  FAISS_ASSERT(index_);
360  FAISS_ASSERT(n > 0);
361 
362  // Make sure arguments are on the device we desire; use temporary
363  // memory allocations to move it if necessary
364  auto devX =
365  toDevice<float, 2>(resources_,
366  device_,
367  const_cast<float*>(x),
368  resources_->getDefaultStream(device_),
369  {(int) n, index_->getDim()});
370  auto devDistances =
371  toDevice<float, 2>(resources_,
372  device_,
373  distances,
374  resources_->getDefaultStream(device_),
375  {(int) n, (int) k});
376  auto devLabels =
377  toDevice<faiss::Index::idx_t, 2>(resources_,
378  device_,
379  labels,
380  resources_->getDefaultStream(device_),
381  {(int) n, (int) k});
382 
383  index_->query(devX,
384  nprobe_,
385  (int) k,
386  devDistances,
387  devLabels);
388 
389  // Copy back if necessary
390  fromDevice<float, 2>(
391  devDistances, distances, resources_->getDefaultStream(device_));
392  fromDevice<faiss::Index::idx_t, 2>(
393  devLabels, labels, resources_->getDefaultStream(device_));
394 }
395 
396 int
397 GpuIndexIVFPQ::getListLength(int listId) const {
398  FAISS_ASSERT(index_);
399  return index_->getListLength(listId);
400 }
401 
402 std::vector<unsigned char>
403 GpuIndexIVFPQ::getListCodes(int listId) const {
404  FAISS_ASSERT(index_);
405  DeviceScope scope(device_);
406 
407  return index_->getListCodes(listId);
408 }
409 
410 std::vector<long>
411 GpuIndexIVFPQ::getListIndices(int listId) const {
412  FAISS_ASSERT(index_);
413  DeviceScope scope(device_);
414 
415  return index_->getListIndices(listId);
416 }
417 
418 void
419 GpuIndexIVFPQ::verifySettings_() const {
420  // Our implementation has these restrictions:
421 
422  // Must have some number of lists
423  FAISS_THROW_IF_NOT_MSG(nlist_ > 0, "nlist must be >0");
424 
425  // up to a single byte per code
426  FAISS_THROW_IF_NOT_FMT(bitsPerCode_ <= 8,
427  "Bits per code must be <= 8 (passed %d)", bitsPerCode_);
428 
429  // Sub-quantizers must evenly divide dimensions available
430  FAISS_THROW_IF_NOT_FMT(this->d % subQuantizers_ == 0,
431  "Number of sub-quantizers (%d) must be an "
432  "even divisor of the number of dimensions (%d)",
433  subQuantizers_, this->d);
434 
435  // The number of bytes per encoded vector must be one we support
436  FAISS_THROW_IF_NOT_FMT(IVFPQ::isSupportedPQCodeLength(subQuantizers_),
437  "Number of bytes per encoded vector / sub-quantizers (%d) "
438  "is not supported",
439  subQuantizers_);
440 
441  // We must have enough shared memory on the current device to store
442  // our lookup distances
443  int lookupTableSize = sizeof(float);
444 #ifdef FAISS_USE_FLOAT16
445  if (ivfpqConfig_.useFloat16LookupTables) {
446  lookupTableSize = sizeof(half);
447  }
448 #endif
449 
450  // 64 bytes per code is only supported with usage of float16, at 2^8
451  // codes per subquantizer
452  size_t requiredSmemSize =
453  lookupTableSize * subQuantizers_ * utils::pow2(bitsPerCode_);
454  size_t smemPerBlock = getMaxSharedMemPerBlock(device_);
455 
456  FAISS_THROW_IF_NOT_FMT(requiredSmemSize
457  <= getMaxSharedMemPerBlock(device_),
458  "Device %d has %zu bytes of shared memory, while "
459  "%d bits per code and %d sub-quantizers requires %zu "
460  "bytes. Consider useFloat16LookupTables and/or "
461  "reduce parameters",
462  device_, smemPerBlock, bitsPerCode_, subQuantizers_,
463  requiredSmemSize);
464 
465  // If precomputed codes are disabled, we have an extra limitation in
466  // terms of the number of dimensions per subquantizer
467  FAISS_THROW_IF_NOT_FMT(ivfpqConfig_.usePrecomputedTables ||
469  this->d / subQuantizers_),
470  "Number of dimensions per sub-quantizer (%d) "
471  "is not currently supported without precomputed codes. "
472  "Only 1, 2, 3, 4, 6, 8, 10, 12, 16, 20, 24, 28, 32 dims "
473  "per sub-quantizer are currently supported with no "
474  "precomputed codes. "
475  "Precomputed codes supports any number of dimensions, but "
476  "will involve memory overheads.",
477  this->d / subQuantizers_);
478 
479  // TODO: fully implement METRIC_INNER_PRODUCT
480  FAISS_THROW_IF_NOT_MSG(this->metric_type == faiss::METRIC_L2,
481  "METRIC_INNER_PRODUCT is currently unsupported");
482 }
483 
484 } } // namespace
std::vector< long > getListIndices(int listId) const
void searchImpl_(faiss::Index::idx_t n, const float *x, faiss::Index::idx_t k, float *distances, faiss::Index::idx_t *labels) const override
Called from GpuIndex for search.
void precompute_table()
build precomputed table
Definition: IndexIVFPQ.cpp:355
size_t nbits
number of bits per quantization index
cudaStream_t getDefaultStreamCurrentDevice()
Calls getDefaultStream with the current device.
PolysemousTraining * polysemous_training
if NULL, use default
Definition: IndexIVFPQ.h:35
size_t byte_per_idx
nb bytes per code component (1 or 2)
virtual const idx_t * get_ids(size_t list_no) const =0
GpuIndexIVFPQ(GpuResources *resources, const faiss::IndexIVFPQ *index, GpuIndexIVFPQConfig config=GpuIndexIVFPQConfig())
int getDim() const
Return the number of dimensions we are indexing.
Definition: IVFBase.cu:100
int getListLength(int listId) const
Definition: IVFBase.cu:200
FlatIndex * getGpuData()
For internal access.
Definition: GpuIndexFlat.h:120
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:34
virtual size_t list_size(size_t list_no) const =0
get the size of a list
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
Definition: IVFBase.cu:45
int getListLength(int listId) const
bool do_polysemous_training
reorder PQ centroids after training?
Definition: IndexIVFPQ.h:34
size_t scan_table_threshold
use table computation or on-the-fly?
Definition: IndexIVFPQ.h:38
std::vector< float > precomputed_table
Definition: IndexIVFPQ.h:44
int polysemous_ht
Hamming thresh for polysemous filtering.
Definition: IndexIVFPQ.h:39
int getBitsPerCode() const
Return the number of bits per PQ code.
virtual cudaStream_t getDefaultStream(int device)=0
int d
vector dimension
Definition: Index.h:64
void train(Index::idx_t n, const float *x) override
static bool isSupportedPQCodeLength(int size)
Returns true if we support PQ in this size.
Definition: IVFPQ.cu:72
int nprobe_
Number of inverted list probes per query.
Definition: GpuIndexIVF.h:91
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
int classifyAndAddVectors(Tensor< float, 2, true > &vecs, Tensor< long, 1, true > &indices)
Definition: IVFPQ.cu:120
void query(Tensor< float, 2, true > &queries, int nprobe, int k, Tensor< float, 2, true > &outDistances, Tensor< long, 2, true > &outIndices)
Definition: IVFPQ.cu:516
const int device_
The GPU device we are resident on.
Definition: GpuIndex.h:94
void copyFrom(const faiss::IndexIVFPQ *index)
Tensor< float, 3, true > getPQCentroids()
Definition: IVFPQ.cu:591
GpuResources * resources_
Manages streans, cuBLAS handles and scratch memory for devices.
Definition: GpuIndex.h:91
void copyTo(faiss::IndexIVF *index) const
Copy what we have to the CPU equivalent.
Definition: GpuIndexIVF.cu:149
long idx_t
all indices are this type
Definition: Index.h:62
int nlist_
Number of inverted lists that we manage.
Definition: GpuIndexIVF.h:88
void addImpl_(faiss::Index::idx_t n, const float *x, const faiss::Index::idx_t *ids) override
Called from GpuIndex for add/add_with_ids.
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:65
bool verbose
verbosity level
Definition: Index.h:66
void setPrecomputedCodes(bool enable)
Enable or disable pre-computed codes.
Definition: IVFPQ.cu:102
std::vector< unsigned char > getListCodes(int listId) const
Return the list codes of a particular list back to the CPU.
Definition: IVFPQ.cu:583
void copyTo(faiss::IndexIVFPQ *index) const
size_t nlist
number of possible key values
Definition: IndexIVF.h:70
const MemorySpace memorySpace_
The memory space of our primary storage on the GPU.
Definition: GpuIndex.h:97
bool by_residual
Encode residual or plain vector?
Definition: IndexIVFPQ.h:30
GpuIndexFlat * quantizer_
Quantizer for inverted lists.
Definition: GpuIndexIVF.h:94
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:72
ProductQuantizer pq
produces the codes
Definition: IndexIVFPQ.h:32
InvertedLists * invlists
Acess to the actual data.
Definition: IndexIVF.h:168
size_t M
number of subquantizers
int getNumSubQuantizers() const
Return the number of sub-quantizers we are using.
std::vector< long > getListIndices(int listId) const
Return the list indices of a particular list back to the CPU.
Definition: IVFBase.cu:207
int getCentroidsPerSubQuantizer() const
Return the number of centroids per PQ code (2^bits per code)
void setPrecomputedCodes(bool enable)
Enable or disable pre-computed codes.
virtual const uint8_t * get_codes(size_t list_no) const =0
void copyFrom(const faiss::IndexIVF *index)
Copy what we need from the CPU equivalent.
Definition: GpuIndexIVF.cu:81
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:69
void compute_residual(const float *x, float *residual, idx_t key) const
Definition: Index.cpp:86
size_t max_codes
max nb of codes to visit to do a query
Definition: IndexIVF.h:174
bool getPrecomputedCodes() const
Are pre-computed codes enabled?
IndicesOptions indicesOptions
Index storage options for the GPU.
Definition: GpuIndexIVF.h:31
size_t reclaimMemory()
Definition: IVFBase.cu:105
Implementing class for IVFPQ on the GPU.
Definition: IVFPQ.cuh:19
size_t code_size
code size per vector in bytes
Definition: IndexIVF.h:171
MetricType
Some algorithms support both an inner product version and a L2 search version.
Definition: Index.h:43
int use_precomputed_table
if by_residual, build precompute tables
Definition: IndexIVFPQ.h:31
std::vector< unsigned char > getListCodes(int listId) const
std::vector< float > centroids
Centroid table, size M * ksub * dsub.
static bool isSupportedNoPrecomputedSubDimSize(int dims)
Definition: IVFPQ.cu:97