Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
GpuIndexIVFPQ.cu
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 
10 #include "GpuIndexIVFPQ.h"
11 #include "../IndexFlat.h"
12 #include "../IndexIVFPQ.h"
13 #include "../ProductQuantizer.h"
14 #include "GpuIndexFlat.h"
15 #include "GpuResources.h"
16 #include "impl/IVFPQ.cuh"
17 #include "utils/CopyUtils.cuh"
18 #include "utils/DeviceUtils.h"
19 
20 #include <limits>
21 
22 namespace faiss { namespace gpu {
23 
25  const faiss::IndexIVFPQ* index,
26  GpuIndexIVFPQConfig config) :
27  GpuIndexIVF(resources,
28  index->d,
29  index->metric_type,
30  index->nlist,
31  config),
32  ivfpqConfig_(config),
33  subQuantizers_(0),
34  bitsPerCode_(0),
35  reserveMemoryVecs_(0),
36  index_(nullptr) {
37 #ifndef FAISS_USE_FLOAT16
38  FAISS_ASSERT(!ivfpqConfig_.useFloat16LookupTables);
39 #endif
40 
41  copyFrom(index);
42 }
43 
45  int dims,
46  int nlist,
47  int subQuantizers,
48  int bitsPerCode,
49  faiss::MetricType metric,
50  GpuIndexIVFPQConfig config) :
51  GpuIndexIVF(resources,
52  dims,
53  metric,
54  nlist,
55  config),
56  ivfpqConfig_(config),
57  subQuantizers_(subQuantizers),
58  bitsPerCode_(bitsPerCode),
59  reserveMemoryVecs_(0),
60  index_(nullptr) {
61 #ifndef FAISS_USE_FLOAT16
62  FAISS_ASSERT(!config.useFloat16LookupTables);
63 #endif
64 
65  verifySettings_();
66 
67  // FIXME make IP work fully
68  FAISS_ASSERT(this->metric_type == faiss::METRIC_L2);
69 
70  // We haven't trained ourselves, so don't construct the PQ index yet
71  this->is_trained = false;
72 }
73 
74 GpuIndexIVFPQ::~GpuIndexIVFPQ() {
75  delete index_;
76 }
77 
78 void
80  DeviceScope scope(device_);
81 
82  // FIXME: support this
83  FAISS_THROW_IF_NOT_MSG(index->metric_type == faiss::METRIC_L2,
84  "inner product unsupported");
85  GpuIndexIVF::copyFrom(index);
86 
87  // Clear out our old data
88  delete index_;
89  index_ = nullptr;
90 
91  subQuantizers_ = index->pq.M;
92  bitsPerCode_ = index->pq.nbits;
93 
94  // We only support this
95  FAISS_ASSERT(index->pq.byte_per_idx == 1);
96  FAISS_ASSERT(index->by_residual);
97  FAISS_ASSERT(index->polysemous_ht == 0);
98 
99  verifySettings_();
100 
101  // The other index might not be trained
102  if (!index->is_trained) {
103  return;
104  }
105 
106  // Otherwise, we can populate ourselves from the other index
107  this->is_trained = true;
108 
109  // Copy our lists as well
110  // The product quantizer must have data in it
111  FAISS_ASSERT(index->pq.centroids.size() > 0);
112  index_ = new IVFPQ(resources_,
114  subQuantizers_,
115  bitsPerCode_,
116  (float*) index->pq.centroids.data(),
117  ivfpqConfig_.indicesOptions,
118  ivfpqConfig_.useFloat16LookupTables,
119  memorySpace_);
120  // Doesn't make sense to reserve memory here
121  index_->setPrecomputedCodes(ivfpqConfig_.usePrecomputedTables);
122 
123  // Copy database vectors, if any
124  const InvertedLists *ivf = index->invlists;
125  size_t nlist = ivf ? ivf->nlist : 0;
126  for (size_t i = 0; i < nlist; ++i) {
127  size_t list_size = ivf->list_size(i);
128 
129  // GPU index can only support max int entries per list
130  FAISS_THROW_IF_NOT_FMT(list_size <=
131  (size_t) std::numeric_limits<int>::max(),
132  "GPU inverted list can only support "
133  "%zu entries; %zu found",
134  (size_t) std::numeric_limits<int>::max(),
135  list_size);
136 
137  index_->addCodeVectorsFromCpu(
138  i, ivf->get_codes(i), ivf->get_ids(i), list_size);
139  }
140 }
141 
142 void
144  DeviceScope scope(device_);
145 
146  // We must have the indices in order to copy to ourselves
147  FAISS_THROW_IF_NOT_MSG(ivfpqConfig_.indicesOptions != INDICES_IVF,
148  "Cannot copy to CPU as GPU index doesn't retain "
149  "indices (INDICES_IVF)");
150 
151  GpuIndexIVF::copyTo(index);
152 
153  //
154  // IndexIVFPQ information
155  //
156  index->by_residual = true;
157  index->use_precomputed_table = 0;
158  index->code_size = subQuantizers_;
159  index->pq = faiss::ProductQuantizer(this->d, subQuantizers_, bitsPerCode_);
160 
161  index->do_polysemous_training = false;
162  index->polysemous_training = nullptr;
163 
164  index->scan_table_threshold = 0;
165  index->max_codes = 0;
166  index->polysemous_ht = 0;
167  index->precomputed_table.clear();
168 
170  nlist_, index->code_size);
171 
172  index->replace_invlists(ivf, true);
173 
174  if (index_) {
175  // Copy the inverted lists
176  for (int i = 0; i < nlist_; ++i) {
177  auto ids = getListIndices(i);
178  auto codes = getListCodes(i);
179  index->invlists->add_entries (i, ids.size(), ids.data(), codes.data());
180  }
181 
182  // Copy PQ centroids
183  auto devPQCentroids = index_->getPQCentroids();
184  index->pq.centroids.resize(devPQCentroids.numElements());
185 
186  fromDevice<float, 3>(devPQCentroids,
187  index->pq.centroids.data(),
189 
190  if (ivfpqConfig_.usePrecomputedTables) {
191  index->precompute_table();
192  }
193  }
194 }
195 
196 void
198  reserveMemoryVecs_ = numVecs;
199  if (index_) {
200  DeviceScope scope(device_);
201  index_->reserveMemory(numVecs);
202  }
203 }
204 
205 void
207  ivfpqConfig_.usePrecomputedTables = enable;
208  if (index_) {
209  DeviceScope scope(device_);
210  index_->setPrecomputedCodes(enable);
211  }
212 
213  verifySettings_();
214 }
215 
216 bool
218  return ivfpqConfig_.usePrecomputedTables;
219 }
220 
221 int
223  return subQuantizers_;
224 }
225 
226 int
228  return bitsPerCode_;
229 }
230 
231 int
233  return utils::pow2(bitsPerCode_);
234 }
235 
236 size_t
238  if (index_) {
239  DeviceScope scope(device_);
240  return index_->reclaimMemory();
241  }
242 
243  return 0;
244 }
245 
246 void
248  if (index_) {
249  DeviceScope scope(device_);
250 
251  index_->reset();
252  this->ntotal = 0;
253  } else {
254  FAISS_ASSERT(this->ntotal == 0);
255  }
256 }
257 
258 void
259 GpuIndexIVFPQ::trainResidualQuantizer_(Index::idx_t n, const float* x) {
260  // Code largely copied from faiss::IndexIVFPQ
261  // FIXME: GPUize more of this
262  n = std::min(n, (Index::idx_t) (1 << bitsPerCode_) * 64);
263 
264  if (this->verbose) {
265  printf("computing residuals\n");
266  }
267 
268  std::vector<Index::idx_t> assign(n);
269  quantizer_->assign (n, x, assign.data());
270 
271  std::vector<float> residuals(n * d);
272 
273  for (idx_t i = 0; i < n; i++) {
274  quantizer_->compute_residual(x + i * d, &residuals[i * d], assign[i]);
275  }
276 
277  if (this->verbose) {
278  printf("training %d x %d product quantizer on %ld vectors in %dD\n",
279  subQuantizers_, getCentroidsPerSubQuantizer(), n, this->d);
280  }
281 
282  // Just use the CPU product quantizer to determine sub-centroids
283  faiss::ProductQuantizer pq(this->d, subQuantizers_, bitsPerCode_);
284  pq.verbose = this->verbose;
285  pq.train(n, residuals.data());
286 
287  index_ = new IVFPQ(resources_,
289  subQuantizers_,
290  bitsPerCode_,
291  pq.centroids.data(),
292  ivfpqConfig_.indicesOptions,
293  ivfpqConfig_.useFloat16LookupTables,
294  memorySpace_);
295  if (reserveMemoryVecs_) {
296  index_->reserveMemory(reserveMemoryVecs_);
297  }
298 
299  index_->setPrecomputedCodes(ivfpqConfig_.usePrecomputedTables);
300 }
301 
302 void
303 GpuIndexIVFPQ::train(Index::idx_t n, const float* x) {
304  DeviceScope scope(device_);
305 
306  if (this->is_trained) {
307  FAISS_ASSERT(quantizer_->is_trained);
308  FAISS_ASSERT(quantizer_->ntotal == nlist_);
309  FAISS_ASSERT(index_);
310  return;
311  }
312 
313  FAISS_ASSERT(!index_);
314 
315  trainQuantizer_(n, x);
316  trainResidualQuantizer_(n, x);
317 
318  this->is_trained = true;
319 }
320 
321 void
323  const float* x,
324  const Index::idx_t* xids) {
325  // Device is already set in GpuIndex::addInternal_
326  FAISS_ASSERT(index_);
327  FAISS_ASSERT(n > 0);
328 
329  auto stream = resources_->getDefaultStreamCurrentDevice();
330 
331  auto deviceVecs =
332  toDevice<float, 2>(resources_,
333  device_,
334  const_cast<float*>(x),
335  stream,
336  {(int) n, index_->getDim()});
337 
338  auto deviceIndices =
339  toDevice<Index::idx_t, 1>(resources_,
340  device_,
341  const_cast<Index::idx_t*>(xids),
342  stream,
343  {(int) n});
344 
345  // Not all vectors may be able to be added (some may contain NaNs
346  // etc)
347  ntotal += index_->classifyAndAddVectors(deviceVecs, deviceIndices);
348 }
349 
350 void
352  const float* x,
354  float* distances,
355  faiss::Index::idx_t* labels) const {
356  // Device is already set in GpuIndex::search
357 
358  FAISS_ASSERT(index_);
359  FAISS_ASSERT(n > 0);
360 
361  // Make sure arguments are on the device we desire; use temporary
362  // memory allocations to move it if necessary
363  auto devX =
364  toDevice<float, 2>(resources_,
365  device_,
366  const_cast<float*>(x),
367  resources_->getDefaultStream(device_),
368  {(int) n, index_->getDim()});
369  auto devDistances =
370  toDevice<float, 2>(resources_,
371  device_,
372  distances,
373  resources_->getDefaultStream(device_),
374  {(int) n, (int) k});
375  auto devLabels =
376  toDevice<faiss::Index::idx_t, 2>(resources_,
377  device_,
378  labels,
379  resources_->getDefaultStream(device_),
380  {(int) n, (int) k});
381 
382  index_->query(devX,
383  nprobe_,
384  (int) k,
385  devDistances,
386  devLabels);
387 
388  // Copy back if necessary
389  fromDevice<float, 2>(
390  devDistances, distances, resources_->getDefaultStream(device_));
391  fromDevice<faiss::Index::idx_t, 2>(
392  devLabels, labels, resources_->getDefaultStream(device_));
393 }
394 
395 int
396 GpuIndexIVFPQ::getListLength(int listId) const {
397  FAISS_ASSERT(index_);
398  return index_->getListLength(listId);
399 }
400 
401 std::vector<unsigned char>
402 GpuIndexIVFPQ::getListCodes(int listId) const {
403  FAISS_ASSERT(index_);
404  DeviceScope scope(device_);
405 
406  return index_->getListCodes(listId);
407 }
408 
409 std::vector<long>
410 GpuIndexIVFPQ::getListIndices(int listId) const {
411  FAISS_ASSERT(index_);
412  DeviceScope scope(device_);
413 
414  return index_->getListIndices(listId);
415 }
416 
417 void
418 GpuIndexIVFPQ::verifySettings_() const {
419  // Our implementation has these restrictions:
420 
421  // Must have some number of lists
422  FAISS_THROW_IF_NOT_MSG(nlist_ > 0, "nlist must be >0");
423 
424  // up to a single byte per code
425  FAISS_THROW_IF_NOT_FMT(bitsPerCode_ <= 8,
426  "Bits per code must be <= 8 (passed %d)", bitsPerCode_);
427 
428  // Sub-quantizers must evenly divide dimensions available
429  FAISS_THROW_IF_NOT_FMT(this->d % subQuantizers_ == 0,
430  "Number of sub-quantizers (%d) must be an "
431  "even divisor of the number of dimensions (%d)",
432  subQuantizers_, this->d);
433 
434  // The number of bytes per encoded vector must be one we support
435  FAISS_THROW_IF_NOT_FMT(IVFPQ::isSupportedPQCodeLength(subQuantizers_),
436  "Number of bytes per encoded vector / sub-quantizers (%d) "
437  "is not supported",
438  subQuantizers_);
439 
440  // We must have enough shared memory on the current device to store
441  // our lookup distances
442  int lookupTableSize = sizeof(float);
443 #ifdef FAISS_USE_FLOAT16
444  if (ivfpqConfig_.useFloat16LookupTables) {
445  lookupTableSize = sizeof(half);
446  }
447 #endif
448 
449  // 64 bytes per code is only supported with usage of float16, at 2^8
450  // codes per subquantizer
451  size_t requiredSmemSize =
452  lookupTableSize * subQuantizers_ * utils::pow2(bitsPerCode_);
453  size_t smemPerBlock = getMaxSharedMemPerBlock(device_);
454 
455  FAISS_THROW_IF_NOT_FMT(requiredSmemSize
456  <= getMaxSharedMemPerBlock(device_),
457  "Device %d has %zu bytes of shared memory, while "
458  "%d bits per code and %d sub-quantizers requires %zu "
459  "bytes. Consider useFloat16LookupTables and/or "
460  "reduce parameters",
461  device_, smemPerBlock, bitsPerCode_, subQuantizers_,
462  requiredSmemSize);
463 
464  // If precomputed codes are disabled, we have an extra limitation in
465  // terms of the number of dimensions per subquantizer
466  FAISS_THROW_IF_NOT_FMT(ivfpqConfig_.usePrecomputedTables ||
468  this->d / subQuantizers_),
469  "Number of dimensions per sub-quantizer (%d) "
470  "is not currently supported without precomputed codes. "
471  "Only 1, 2, 3, 4, 6, 8, 10, 12, 16, 20, 24, 28, 32 dims "
472  "per sub-quantizer are currently supported with no "
473  "precomputed codes. "
474  "Precomputed codes supports any number of dimensions, but "
475  "will involve memory overheads.",
476  this->d / subQuantizers_);
477 
478  // TODO: fully implement METRIC_INNER_PRODUCT
479  FAISS_THROW_IF_NOT_MSG(this->metric_type == faiss::METRIC_L2,
480  "METRIC_INNER_PRODUCT is currently unsupported");
481 }
482 
483 } } // namespace
std::vector< long > getListIndices(int listId) const
void searchImpl_(faiss::Index::idx_t n, const float *x, faiss::Index::idx_t k, float *distances, faiss::Index::idx_t *labels) const override
Called from GpuIndex for search.
void precompute_table()
build precomputed table
Definition: IndexIVFPQ.cpp:364
size_t nbits
number of bits per quantization index
cudaStream_t getDefaultStreamCurrentDevice()
Calls getDefaultStream with the current device.
simple (default) implementation as an array of inverted lists
PolysemousTraining * polysemous_training
if NULL, use default
Definition: IndexIVFPQ.h:41
size_t byte_per_idx
nb bytes per code component (1 or 2)
virtual const idx_t * get_ids(size_t list_no) const =0
GpuIndexIVFPQ(GpuResources *resources, const faiss::IndexIVFPQ *index, GpuIndexIVFPQConfig config=GpuIndexIVFPQConfig())
int getDim() const
Return the number of dimensions we are indexing.
Definition: IVFBase.cu:99
int getListLength(int listId) const
Definition: IVFBase.cu:199
FlatIndex * getGpuData()
For internal access.
Definition: GpuIndexFlat.h:119
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:35
virtual size_t list_size(size_t list_no) const =0
get the size of a list
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
Definition: IVFBase.cu:44
int getListLength(int listId) const
bool do_polysemous_training
reorder PQ centroids after training?
Definition: IndexIVFPQ.h:40
size_t scan_table_threshold
use table computation or on-the-fly?
Definition: IndexIVFPQ.h:44
std::vector< float > precomputed_table
Definition: IndexIVFPQ.h:60
int polysemous_ht
Hamming thresh for polysemous filtering.
Definition: IndexIVFPQ.h:45
int getBitsPerCode() const
Return the number of bits per PQ code.
virtual cudaStream_t getDefaultStream(int device)=0
int d
vector dimension
Definition: Index.h:66
void train(Index::idx_t n, const float *x) override
static bool isSupportedPQCodeLength(int size)
Returns true if we support PQ in this size.
Definition: IVFPQ.cu:71
int nprobe_
Number of inverted list probes per query.
Definition: GpuIndexIVF.h:90
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
int classifyAndAddVectors(Tensor< float, 2, true > &vecs, Tensor< long, 1, true > &indices)
Definition: IVFPQ.cu:119
void query(Tensor< float, 2, true > &queries, int nprobe, int k, Tensor< float, 2, true > &outDistances, Tensor< long, 2, true > &outIndices)
Definition: IVFPQ.cu:515
const int device_
The GPU device we are resident on.
Definition: GpuIndex.h:93
void copyFrom(const faiss::IndexIVFPQ *index)
Tensor< float, 3, true > getPQCentroids()
Definition: IVFPQ.cu:590
GpuResources * resources_
Manages streans, cuBLAS handles and scratch memory for devices.
Definition: GpuIndex.h:90
void copyTo(faiss::IndexIVF *index) const
Copy what we have to the CPU equivalent.
Definition: GpuIndexIVF.cu:148
long idx_t
all indices are this type
Definition: Index.h:64
void replace_invlists(InvertedLists *il, bool own=false)
replace the inverted lists, old one is deallocated if own_invlists
Definition: IndexIVF.cpp:486
int nlist_
Number of inverted lists that we manage.
Definition: GpuIndexIVF.h:87
void addImpl_(faiss::Index::idx_t n, const float *x, const faiss::Index::idx_t *ids) override
Called from GpuIndex for add/add_with_ids.
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
bool verbose
verbosity level
Definition: Index.h:68
void setPrecomputedCodes(bool enable)
Enable or disable pre-computed codes.
Definition: IVFPQ.cu:101
std::vector< unsigned char > getListCodes(int listId) const
Return the list codes of a particular list back to the CPU.
Definition: IVFPQ.cu:582
void copyTo(faiss::IndexIVFPQ *index) const
size_t nlist
number of possible key values
Definition: InvertedLists.h:35
const MemorySpace memorySpace_
The memory space of our primary storage on the GPU.
Definition: GpuIndex.h:96
bool by_residual
Encode residual or plain vector?
Definition: IndexIVFPQ.h:36
GpuIndexFlat * quantizer_
Quantizer for inverted lists.
Definition: GpuIndexIVF.h:93
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:74
ProductQuantizer pq
produces the codes
Definition: IndexIVFPQ.h:38
InvertedLists * invlists
Acess to the actual data.
Definition: IndexIVF.h:93
size_t M
number of subquantizers
int getNumSubQuantizers() const
Return the number of sub-quantizers we are using.
std::vector< long > getListIndices(int listId) const
Return the list indices of a particular list back to the CPU.
Definition: IVFBase.cu:206
int getCentroidsPerSubQuantizer() const
Return the number of centroids per PQ code (2^bits per code)
void setPrecomputedCodes(bool enable)
Enable or disable pre-computed codes.
virtual const uint8_t * get_codes(size_t list_no) const =0
void copyFrom(const faiss::IndexIVF *index)
Copy what we need from the CPU equivalent.
Definition: GpuIndexIVF.cu:80
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:71
void compute_residual(const float *x, float *residual, idx_t key) const
Definition: Index.cpp:87
size_t max_codes
max nb of codes to visit to do a query
Definition: IndexIVF.h:99
bool getPrecomputedCodes() const
Are pre-computed codes enabled?
IndicesOptions indicesOptions
Index storage options for the GPU.
Definition: GpuIndexIVF.h:30
size_t reclaimMemory()
Definition: IVFBase.cu:104
Implementing class for IVFPQ on the GPU.
Definition: IVFPQ.cuh:18
size_t code_size
code size per vector in bytes
Definition: IndexIVF.h:96
MetricType
Some algorithms support both an inner product version and a L2 search version.
Definition: Index.h:45
int use_precomputed_table
if by_residual, build precompute tables
Definition: IndexIVFPQ.h:55
std::vector< unsigned char > getListCodes(int listId) const
std::vector< float > centroids
Centroid table, size M * ksub * dsub.
static bool isSupportedNoPrecomputedSubDimSize(int dims)
Definition: IVFPQ.cu:96