Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/IndexIVFPQ.cpp
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 /* Copyright 2004-present Facebook. All Rights Reserved.
11  Inverted list structure.
12 */
13 
14 #include "IndexIVFPQ.h"
15 
16 #include <cstdio>
17 #include <cassert>
18 
19 #include <sys/mman.h>
20 
21 #include <algorithm>
22 
23 #include "Heap.h"
24 #include "utils.h"
25 
26 #include "Clustering.h"
27 #include "IndexFlat.h"
28 
29 #include "hamming.h"
30 
31 #include "FaissAssert.h"
32 
33 #include "AuxIndexStructures.h"
34 
35 namespace faiss {
36 
37 
38 
39 
40 
41 /*****************************************
42  * IndexIVFPQ implementation
43  ******************************************/
44 
45 IndexIVFPQ::IndexIVFPQ (Index * quantizer, size_t d, size_t nlist,
46  size_t M, size_t nbits_per_idx):
47  IndexIVF (quantizer, d, nlist, METRIC_L2),
48  pq (d, M, nbits_per_idx)
49 {
50  FAISS_ASSERT (nbits_per_idx <= 8);
51  code_size = pq.code_size;
52  is_trained = false;
53  codes.resize (nlist);
54  by_residual = true;
55  use_precomputed_table = 0;
56  scan_table_threshold = 0;
57  max_codes = 0; // means unlimited
58 
59  polysemous_training = nullptr;
60  do_polysemous_training = false;
61  polysemous_ht = 0;
62 
63  set_typename();
64 }
65 
66 
67 
68 void IndexIVFPQ::set_typename ()
69 {
70  std::stringstream s;
71  s << "IvfPQ_" << pq.M << "x" << pq.nbits
72  << "[" << nlist << ":" << quantizer->index_typename << "]";
73  index_typename = s.str();
74 }
75 
76 
77 void IndexIVFPQ::train_residual (idx_t n, const float *x)
78 {
79  train_residual_o (n, x, nullptr);
80 }
81 
82 
83 void IndexIVFPQ::train_residual_o (idx_t n, const float *x, float *residuals_2)
84 {
85  idx_t ntrain = pq.ksub * 64;
86  if(n > ntrain) n = ntrain;
87 
88  const float *trainset;
89  if (by_residual) {
90  if(verbose) printf("computing residuals\n");
91  idx_t * assign = new idx_t [n]; // assignement to coarse centroids
92  quantizer->assign (n, x, assign);
93  float *residuals = new float [n * d];
94  for (idx_t i = 0; i < n; i++)
95  quantizer->compute_residual (x + i * d, residuals+i*d, assign[i]);
96  delete [] assign;
97  trainset = residuals;
98  } else {
99  trainset = x;
100  }
101  if (verbose)
102  printf ("training %zdx%zd product quantizer on %ld vectors in %dD\n",
103  pq.M, pq.ksub, n, d);
104  pq.verbose = verbose;
105  pq.train (n, trainset);
106 
108  PolysemousTraining default_pt;
110  if (!pt) pt = &default_pt;
111  pt->optimize_pq_for_hamming (pq, n, trainset);
112  }
113 
114  // prepare second-level residuals for refine PQ
115  if (residuals_2) {
116  uint8_t *train_codes = new uint8_t [pq.code_size * n];
117  pq.compute_codes (trainset, train_codes, n);
118 
119  for (idx_t i = 0; i < n; i++) {
120  const float *xx = trainset + i * d;
121  float * res = residuals_2 + i * d;
122  pq.decode (train_codes + i * pq.code_size, res);
123  for (int j = 0; j < d; j++)
124  res[j] = xx[j] - res[j];
125  }
126 
127  delete [] train_codes;
128  }
129 
130  if (by_residual) {
131  delete [] trainset;
132  precompute_table ();
133  }
134 
135 }
136 
137 
138 /* produce a binary signature based on the residual vector */
139 void IndexIVFPQ::encode (long key, const float * x, uint8_t * code) const
140 {
141  if (by_residual) {
142  float residual_vec[d];
143  quantizer->compute_residual (x, residual_vec, key);
144  pq.compute_code (residual_vec, code);
145  }
146  else pq.compute_code (x, code);
147 }
148 
149 
150 
151 
152 
153 void IndexIVFPQ::encode_multiple (size_t n, const long *keys,
154  const float * x, uint8_t * xcodes) const
155 {
156  if (by_residual) {
157  float *residuals = new float [n * d];
158  // TODO: parallelize?
159  for (size_t i = 0; i < n; i++)
160  quantizer->compute_residual (x + i * d, residuals + i * d, keys[i]);
161  pq.compute_codes (residuals, xcodes, n);
162  delete [] residuals;
163  } else {
164  pq.compute_codes (x, xcodes, n);
165  }
166 }
167 
168 
169 
170 void IndexIVFPQ::add_with_ids (idx_t n, const float * x, const long *xids)
171 {
172  add_core_o (n, x, xids, nullptr);
173 }
174 
175 
176 void IndexIVFPQ::add_core_o (idx_t n, const float * x, const long *xids,
177  float *residuals_2, const long *precomputed_idx)
178 {
179  FAISS_ASSERT (is_trained);
180  double t0 = getmillisecs ();
181  const long * idx;
182 
183  if (precomputed_idx) {
184  idx = precomputed_idx;
185  } else {
186  long * idx0 = new long [n];
187  quantizer->assign (n, x, idx0);
188  idx = idx0;
189  }
190 
191  double t1 = getmillisecs ();
192  uint8_t * xcodes = new uint8_t [n * code_size];
193 
194  const float *to_encode = nullptr;
195 
196  if (by_residual) {
197  float *residuals = new float [n * d];
198  // TODO: parallelize?
199  for (size_t i = 0; i < n; i++) {
200  if (idx[i] < 0)
201  memset (residuals + i * d, 0, sizeof(*residuals) * d);
202  else
203  quantizer->compute_residual (
204  x + i * d, residuals + i * d, idx[i]);
205  }
206  to_encode = residuals;
207  } else {
208  to_encode = x;
209  }
210  pq.compute_codes (to_encode, xcodes, n);
211 
212  double t2 = getmillisecs ();
213  // TODO: parallelize?
214  size_t n_ignore = 0;
215  for (size_t i = 0; i < n; i++) {
216  idx_t key = idx[i];
217  if (key < 0) {
218  n_ignore ++;
219  if (residuals_2)
220  memset (residuals_2, 0, sizeof(*residuals_2) * d);
221  continue;
222  }
223  idx_t id = xids ? xids[i] : ntotal + i;
224  ids[key].push_back (id);
225  uint8_t *code = xcodes + i * code_size;
226  for (size_t j = 0; j < code_size; j++)
227  codes[key].push_back (code[j]);
228 
229  if (residuals_2) {
230  float *res2 = residuals_2 + i * d;
231  const float *xi = to_encode + i * d;
232  pq.decode (code, res2);
233  for (int j = 0; j < d; j++)
234  res2[j] = xi[j] - res2[j];
235  }
236 
238  direct_map.push_back (key << 32 | (ids[key].size() - 1));
239  }
240 
241  if (by_residual)
242  delete [] to_encode;
243 
244  delete [] xcodes;
245  if (!precomputed_idx)
246  delete [] idx;
247  double t3 = getmillisecs ();
248  if(verbose) {
249  char comment[100] = {0};
250  if (n_ignore > 0)
251  snprintf (comment, 100, "(%ld vectors ignored)", n_ignore);
252  printf(" add_core times: %.3f %.3f %.3f %s\n",
253  t1 - t0, t2 - t1, t3 - t2, comment);
254  }
255  ntotal += n;
256 }
257 
258 void IndexIVFPQ::reconstruct_n (idx_t i0, idx_t ni, float *recons) const
259 {
260  FAISS_ASSERT (ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
261 
262  std::vector<float> centroid (d);
263 
264  for (int key = 0; key < nlist; key++) {
265  const std::vector<long> & idlist = ids[key];
266  const uint8_t * code_line = codes[key].data();
267 
268  for (long ofs = 0; ofs < idlist.size(); ofs++) {
269  long id = idlist[ofs];
270  if (!(id >= i0 && id < i0 + ni)) continue;
271  float *r = recons + d * (id - i0);
272  if (by_residual) {
273  quantizer->reconstruct (key, centroid.data());
274  pq.decode (code_line + ofs * pq.code_size, r);
275  for (int j = 0; j < d; j++) {
276  r[j] += centroid[j];
277  }
278  }
279  else {
280  pq.decode (code_line + ofs * pq.code_size, r);
281  }
282  }
283  }
284 }
285 
286 
287 void IndexIVFPQ::reconstruct (idx_t key, float * recons) const
288 {
289  FAISS_ASSERT (direct_map.size() == ntotal);
290  int list_no = direct_map[key] >> 32;
291  int ofs = direct_map[key] & 0xffffffff;
292 
293  quantizer->reconstruct (list_no, recons);
294  const uint8_t * code = &(codes[list_no][ofs * pq.code_size]);
295 
296  for (size_t m = 0; m < pq.M; m++) {
297  float * out = recons + m * pq.dsub;
298  const float * cent = pq.get_centroids (m, code[m]);
299  for (size_t i = 0; i < pq.dsub; i++) {
300  out[i] += cent[i];
301  }
302  }
303 }
304 
305 
306 
308 {
309  IndexIVFPQ &other = dynamic_cast<IndexIVFPQ &> (other_in);
310  for (int i = 0; i < nlist; i++) {
311  codes[i].insert (codes[i].end(),
312  other.codes[i].begin(), other.codes[i].end());
313  other.codes[i].clear();
314  }
315 }
316 
317 void IndexIVFPQ::copy_subset_to (IndexIVFPQ & other, int subset_type,
318  long a1, long a2) const
319 {
320  FAISS_ASSERT (nlist == other.nlist);
321  FAISS_ASSERT (!other.maintain_direct_map);
322  size_t code_size = pq.code_size;
323  for (long list_no = 0; list_no < nlist; list_no++) {
324  const std::vector<idx_t> & ids_in = ids[list_no];
325  std::vector<idx_t> & ids_out = other.ids[list_no];
326  const std::vector<uint8_t> & codes_in = codes[list_no];
327  std::vector<uint8_t> & codes_out = other.codes[list_no];
328 
329  for (long i = 0; i < ids_in.size(); i++) {
330  idx_t id = ids_in[i];
331  if (subset_type == 0 && a1 <= id && id < a2) {
332  ids_out.push_back (id);
333  codes_out.insert (codes_out.end(),
334  codes_in.begin() + i * code_size,
335  codes_in.begin() + (i + 1) * code_size);
336  other.ntotal++;
337  }
338  }
339  }
340 }
341 
342 
343 
344 
345 
346 /** Precomputed tables for residuals
347  *
348  * During IVFPQ search with by_residual, we compute
349  *
350  * d = || x - y_C - y_R ||^2
351  *
352  * where x is the query vector, y_C the coarse centroid, y_R the
353  * refined PQ centroid. The expression can be decomposed as:
354  *
355  * d = || x - y_C ||^2 + || y_R ||^2 + 2 * (y_C|y_R) - 2 * (x|y_R)
356  * --------------- --------------------------- -------
357  * term 1 term 2 term 3
358  *
359  * When using multiprobe, we use the following decomposition:
360  * - term 1 is the distance to the coarse centroid, that is computed
361  * during the 1st stage search.
362  * - term 2 can be precomputed, as it does not involve x. However,
363  * because of the PQ, it needs nlist * M * ksub storage. This is why
364  * use_precomputed_table is off by default
365  * - term 3 is the classical non-residual distance table.
366  *
367  * Since y_R defined by a product quantizer, it is split across
368  * subvectors and stored separately for each subvector. If the coarse
369  * quantizer is a MultiIndexQuantizer then the table can be stored
370  * more compactly.
371  *
372  * At search time, the tables for term 2 and term 3 are added up. This
373  * is faster when the length of the lists is > ksub * M.
374  */
375 
377 {
378 
379 
380  if (use_precomputed_table == 0) { // then choose the type of table
381  if (quantizer->metric_type == METRIC_INNER_PRODUCT) {
382  fprintf(stderr, "IndexIVFPQ::precompute_table: WARN precomputed "
383  "tables not supported for inner product quantizers\n");
384  return;
385  }
386  const MultiIndexQuantizer *miq =
387  dynamic_cast<const MultiIndexQuantizer *> (quantizer);
388  if (miq && pq.M % miq->pq.M == 0)
390  else
392  } // otherwise assume user has set appropriate flag on input
393 
394 
395  // squared norms of the PQ centroids
396  std::vector<float> r_norms (pq.M * pq.ksub, 0.0/0.0);
397  for (int m = 0; m < pq.M; m++)
398  for (int j = 0; j < pq.ksub; j++)
399  r_norms [m * pq.ksub + j] =
401 
402  if (use_precomputed_table == 1) {
403 
404  precomputed_table.resize (nlist * pq.M * pq.ksub);
405  std::vector<float> centroid (d);
406 
407  for (size_t i = 0; i < nlist; i++) {
408  quantizer->reconstruct (i, centroid.data());
409 
410  float *tab = &precomputed_table[i * pq.M * pq.ksub];
411  pq.compute_inner_prod_table (centroid.data(), tab);
412  fvec_madd (pq.M * pq.ksub, r_norms.data(), 2.0, tab, tab);
413  }
414  } else if (use_precomputed_table == 2) {
415  const MultiIndexQuantizer *miq =
416  dynamic_cast<const MultiIndexQuantizer *> (quantizer);
417  FAISS_ASSERT (miq);
418  const ProductQuantizer &cpq = miq->pq;
419  FAISS_ASSERT (pq.M % cpq.M == 0);
420 
421  precomputed_table.resize(cpq.ksub * pq.M * pq.ksub);
422 
423  // reorder PQ centroid table
424  std::vector<float> centroids (d * cpq.ksub, 0.0/0.0);
425 
426  for (int m = 0; m < cpq.M; m++) {
427  for (size_t i = 0; i < cpq.ksub; i++) {
428  memcpy (centroids.data() + i * d + m * cpq.dsub,
429  cpq.get_centroids (m, i),
430  sizeof (*centroids.data()) * cpq.dsub);
431  }
432  }
433 
434  pq.compute_inner_prod_tables (cpq.ksub, centroids.data (),
435  precomputed_table.data ());
436 
437  for (size_t i = 0; i < cpq.ksub; i++) {
438  float *tab = &precomputed_table[i * pq.M * pq.ksub];
439  fvec_madd (pq.M * pq.ksub, r_norms.data(), 2.0, tab, tab);
440  }
441 
442  }
443 }
444 
445 namespace {
446 
447 static uint64_t get_cycles () {
448  uint32_t high, low;
449  asm volatile("rdtsc \n\t"
450  : "=a" (low),
451  "=d" (high));
452  return ((uint64_t)high << 32) | (low);
453 }
454 
455 #define TIC t0 = get_cycles()
456 #define TOC get_cycles () - t0
457 
458 
459 
460 /** QueryTables manages the various ways of searching an
461  * IndexIVFPQ. The code contains a lot of branches, depending on:
462  * - metric_type: are we computing L2 or Inner product similarity?
463  * - by_residual: do we encode raw vectors or residuals?
464  * - use_precomputed_table: are x_R|x_C tables precomputed?
465  * - polysemous_ht: are we filtering with polysemous codes?
466  */
467 struct QueryTables {
468 
469  /*****************************************************
470  * General data from the IVFPQ
471  *****************************************************/
472 
473  const IndexIVFPQ & ivfpq;
474 
475  // copied from IndexIVFPQ for easier access
476  int d;
477  const ProductQuantizer & pq;
478  MetricType metric_type;
479  bool by_residual;
480  int use_precomputed_table;
481 
482  // pre-allocated data buffers
483  float * sim_table, * sim_table_2;
484  float * residual_vec, *decoded_vec;
485 
486  // single data buffer
487  std::vector<float> mem;
488 
489  // for table pointers
490  std::vector<const float *> sim_table_ptrs;
491 
492  explicit QueryTables (const IndexIVFPQ & ivfpq):
493  ivfpq(ivfpq),
494  d(ivfpq.d),
495  pq (ivfpq.pq),
496  metric_type (ivfpq.metric_type),
497  by_residual (ivfpq.by_residual),
498  use_precomputed_table (ivfpq.use_precomputed_table)
499  {
500  mem.resize (pq.ksub * pq.M * 2 + d *2);
501  sim_table = mem.data();
502  sim_table_2 = sim_table + pq.ksub * pq.M;
503  residual_vec = sim_table_2 + pq.ksub * pq.M;
504  decoded_vec = residual_vec + d;
505 
506  // for polysemous
507  if (ivfpq.polysemous_ht != 0) {
508  q_code.resize (pq.code_size);
509  }
510  init_list_cycles = 0;
511  sim_table_ptrs.resize (pq.M);
512  }
513 
514  /*****************************************************
515  * What we do when query is known
516  *****************************************************/
517 
518  // field specific to query
519  const float * qi;
520 
521  // query-specific intialization
522  void init_query (const float * qi) {
523  this->qi = qi;
524  if (metric_type == METRIC_INNER_PRODUCT)
525  init_query_IP ();
526  else
527  init_query_L2 ();
528  if (!by_residual && ivfpq.polysemous_ht != 0)
529  pq.compute_code (qi, q_code.data());
530  }
531 
532  void init_query_IP () {
533  // precompute some tables specific to the query qi
534  pq.compute_inner_prod_table (qi, sim_table);
535  // we compute negated inner products for use with the maxheap
536  for (int i = 0; i < pq.ksub * pq.M; i++) {
537  sim_table[i] = - sim_table[i];
538  }
539  }
540 
541  void init_query_L2 () {
542  if (!by_residual) {
543  pq.compute_distance_table (qi, sim_table);
544  } else if (use_precomputed_table) {
545  pq.compute_inner_prod_table (qi, sim_table_2);
546  }
547  }
548 
549  /*****************************************************
550  * When inverted list is known: prepare computations
551  *****************************************************/
552 
553  // fields specific to list
554  Index::idx_t key;
555  float coarse_dis;
556  std::vector<uint8_t> q_code;
557 
558  uint64_t init_list_cycles;
559 
560  /// once we know the query and the centroid, we can prepare the
561  /// sim_table that will be used for accumulation
562  /// and dis0, the initial value
563  float precompute_list_tables () {
564  float dis0 = 0;
565  uint64_t t0; TIC;
566  if (by_residual) {
567  if (metric_type == METRIC_INNER_PRODUCT)
568  dis0 = precompute_list_tables_IP ();
569  else
570  dis0 = precompute_list_tables_L2 ();
571  }
572  init_list_cycles += TOC;
573  return dis0;
574  }
575 
576  float precompute_list_table_pointers () {
577  float dis0 = 0;
578  uint64_t t0; TIC;
579  if (by_residual) {
580  if (metric_type == METRIC_INNER_PRODUCT)
581  FAISS_ASSERT (!"not implemented");
582  else
583  dis0 = precompute_list_table_pointers_L2 ();
584  }
585  init_list_cycles += TOC;
586  return dis0;
587  }
588 
589  /*****************************************************
590  * compute tables for inner prod
591  *****************************************************/
592 
593  float precompute_list_tables_IP ()
594  {
595  // prepare the sim_table that will be used for accumulation
596  // and dis0, the initial value
597  ivfpq.quantizer->reconstruct (key, decoded_vec);
598  // decoded_vec = centroid
599  float dis0 = -fvec_inner_product (qi, decoded_vec, d);
600 
601  if (ivfpq.polysemous_ht) {
602  for (int i = 0; i < d; i++) {
603  residual_vec [i] = qi[i] - decoded_vec[i];
604  }
605  pq.compute_code (residual_vec, q_code.data());
606  }
607  return dis0;
608  }
609 
610 
611  /*****************************************************
612  * compute tables for L2 distance
613  *****************************************************/
614 
615  float precompute_list_tables_L2 ()
616  {
617  float dis0 = 0;
618 
619  if (use_precomputed_table == 0) {
620  ivfpq.quantizer->compute_residual (qi, residual_vec, key);
621  pq.compute_distance_table (residual_vec, sim_table);
622  } else if (use_precomputed_table == 1) {
623  dis0 = coarse_dis;
624 
625  fvec_madd (pq.M * pq.ksub,
626  &ivfpq.precomputed_table [key * pq.ksub * pq.M],
627  -2.0, sim_table_2,
628  sim_table);
629  } else if (use_precomputed_table == 2) {
630  dis0 = coarse_dis;
631 
632  const MultiIndexQuantizer *miq =
633  dynamic_cast<const MultiIndexQuantizer *> (ivfpq.quantizer);
634  FAISS_ASSERT (miq);
635  const ProductQuantizer &cpq = miq->pq;
636  int Mf = pq.M / cpq.M;
637 
638  const float *qtab = sim_table_2; // query-specific table
639  float *ltab = sim_table; // (output) list-specific table
640 
641  long k = key;
642  for (int cm = 0; cm < cpq.M; cm++) {
643  // compute PQ index
644  int ki = k & ((uint64_t(1) << cpq.nbits) - 1);
645  k >>= cpq.nbits;
646 
647  // get corresponding table
648  const float *pc = &ivfpq.precomputed_table
649  [(ki * pq.M + cm * Mf) * pq.ksub];
650 
651  if (ivfpq.polysemous_ht == 0) {
652 
653  // sum up with query-specific table
654  fvec_madd (Mf * pq.ksub,
655  pc,
656  -2.0, qtab,
657  ltab);
658  ltab += Mf * pq.ksub;
659  qtab += Mf * pq.ksub;
660  } else {
661  for (int m = cm * Mf; m < (cm + 1) * Mf; m++) {
662  q_code[m] = fvec_madd_and_argmin
663  (pq.ksub, pc, -2, qtab, ltab);
664  pc += pq.ksub;
665  ltab += pq.ksub;
666  qtab += pq.ksub;
667  }
668  }
669 
670  }
671  }
672 
673  return dis0;
674  }
675 
676  float precompute_list_table_pointers_L2 ()
677  {
678  float dis0 = 0;
679 
680  if (use_precomputed_table == 1) {
681  dis0 = coarse_dis;
682 
683  const float * s = &ivfpq.precomputed_table [key * pq.ksub * pq.M];
684  for (int m = 0; m < pq.M; m++) {
685  sim_table_ptrs [m] = s;
686  s += pq.ksub;
687  }
688  } else if (use_precomputed_table == 2) {
689  dis0 = coarse_dis;
690 
691  const MultiIndexQuantizer *miq =
692  dynamic_cast<const MultiIndexQuantizer *> (ivfpq.quantizer);
693  FAISS_ASSERT (miq);
694  const ProductQuantizer &cpq = miq->pq;
695  int Mf = pq.M / cpq.M;
696 
697  long k = key;
698  int m0 = 0;
699  for (int cm = 0; cm < cpq.M; cm++) {
700  int ki = k & ((uint64_t(1) << cpq.nbits) - 1);
701  k >>= cpq.nbits;
702 
703  const float *pc = &ivfpq.precomputed_table
704  [(ki * pq.M + cm * Mf) * pq.ksub];
705 
706  for (int m = m0; m < m0 + Mf; m++) {
707  sim_table_ptrs [m] = pc;
708  pc += pq.ksub;
709  }
710  m0 += Mf;
711  }
712  } else FAISS_ASSERT (!"need precomputed tables");
713 
714  if (ivfpq.polysemous_ht) {
715  FAISS_ASSERT (!"not implemented");
716  // Not clear that it makes sense to implemente this,
717  // because it costs M * ksub, which is what we wanted to
718  // avoid with the tables pointers.
719  }
720 
721  return dis0;
722  }
723 
724 
725 };
726 
727 
728 /*****************************************************
729  * Scaning the codes.
730  * The scanning functions call their favorite precompute_*
731  * function to precompute the tables they need.
732  *****************************************************/
733 template <typename IDType>
734 struct InvertedListScanner: QueryTables {
735 
736  const uint8_t * __restrict list_codes;
737  const IDType * list_ids;
738  size_t list_size;
739 
740  explicit InvertedListScanner (const IndexIVFPQ & ivfpq):
741  QueryTables (ivfpq)
742  {
743  FAISS_ASSERT(pq.byte_per_idx == 1);
744  n_hamming_pass = 0;
745  }
746 
747  /// list_specific intialization
748  void init_list (Index::idx_t key, float coarse_dis,
749  size_t list_size_in, const IDType *list_ids_in,
750  const uint8_t *list_codes_in) {
751  this->key = key;
752  this->coarse_dis = coarse_dis;
753  list_size = list_size_in;
754  list_codes = list_codes_in;
755  list_ids = list_ids_in;
756  }
757 
758  /*****************************************************
759  * Scaning the codes: simple PQ scan.
760  *****************************************************/
761 
762  /// version of the scan where we use precomputed tables
763  void scan_list_with_table (
764  size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
765  {
766  float dis0 = precompute_list_tables ();
767 
768  for (size_t j = 0; j < list_size; j++) {
769 
770  float dis = dis0;
771  const float *tab = sim_table;
772 
773  for (size_t m = 0; m < pq.M; m++) {
774  dis += tab[*list_codes++];
775  tab += pq.ksub;
776  }
777 
778  if (dis < heap_sim[0]) {
779  maxheap_pop (k, heap_sim, heap_ids);
780  long id = store_pairs ? (key << 32 | j) : list_ids[j];
781  maxheap_push (k, heap_sim, heap_ids, dis, id);
782  }
783  }
784  }
785 
786 
787  /// tables are not precomputed, but pointers are provided to the
788  /// relevant X_c|x_r tables
789  void scan_list_with_pointer (
790  size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
791  {
792 
793  float dis0 = precompute_list_table_pointers ();
794 
795  for (size_t j = 0; j < list_size; j++) {
796 
797  float dis = dis0;
798  const float *tab = sim_table_2;
799 
800  for (size_t m = 0; m < pq.M; m++) {
801  int ci = *list_codes++;
802  dis += sim_table_ptrs [m][ci] - 2 * tab [ci];
803  tab += pq.ksub;
804  }
805 
806  if (dis < heap_sim[0]) {
807  maxheap_pop (k, heap_sim, heap_ids);
808  long id = store_pairs ? (key << 32 | j) : list_ids[j];
809  maxheap_push (k, heap_sim, heap_ids, dis, id);
810  }
811  }
812 
813  }
814 
815  /// nothing is precomputed: access residuals on-the-fly
816  void scan_on_the_fly_dist (
817  size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
818  {
819 
820  if (by_residual && use_precomputed_table) {
821  scan_list_with_pointer (k, heap_sim, heap_ids, store_pairs);
822  return;
823  }
824 
825  const float *dvec;
826  float dis0 = 0;
827 
828  if (by_residual) {
829  if (metric_type == METRIC_INNER_PRODUCT) {
830  ivfpq.quantizer->reconstruct (key, residual_vec);
831  dis0 = fvec_inner_product (residual_vec, qi, d);
832  } else {
833  ivfpq.quantizer->compute_residual (qi, residual_vec, key);
834  }
835  dvec = residual_vec;
836  } else {
837  dvec = qi;
838  dis0 = 0;
839  }
840 
841  for (size_t j = 0; j < list_size; j++) {
842 
843  pq.decode (list_codes, decoded_vec);
844  list_codes += pq.code_size;
845 
846  float dis;
847  if (metric_type == METRIC_INNER_PRODUCT) {
848  dis = -dis0 - fvec_inner_product (decoded_vec, qi, d);
849  } else {
850  dis = fvec_L2sqr (decoded_vec, dvec, d);
851  }
852 
853  if (dis < heap_sim[0]) {
854  maxheap_pop (k, heap_sim, heap_ids);
855  long id = store_pairs ? (key << 32 | j) : list_ids[j];
856  maxheap_push (k, heap_sim, heap_ids, dis, id);
857  }
858  }
859  }
860 
861  /*****************************************************
862  * Scanning codes with polysemous filtering
863  *****************************************************/
864 
865  // code for the query
866  size_t n_hamming_pass;
867 
868 
869  template <class HammingComputer>
870  void scan_list_polysemous_hc (
871  size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
872  {
873  float dis0 = precompute_list_tables ();
874  int ht = ivfpq.polysemous_ht;
875 
876  int code_size = pq.code_size;
877 
878  HammingComputer hc (q_code.data(), code_size);
879 
880  for (size_t j = 0; j < list_size; j++) {
881  const uint8_t *b_code = list_codes;
882  int hd = hc.hamming (b_code);
883  if (hd < ht) {
884  n_hamming_pass ++;
885 
886  float dis = dis0;
887  const float *tab = sim_table;
888 
889  for (size_t m = 0; m < pq.M; m++) {
890  dis += tab[*b_code++];
891  tab += pq.ksub;
892  }
893 
894  if (dis < heap_sim[0]) {
895  maxheap_pop (k, heap_sim, heap_ids);
896  long id = store_pairs ? (key << 32 | j) : list_ids[j];
897  maxheap_push (k, heap_sim, heap_ids, dis, id);
898  }
899  }
900  list_codes += code_size;
901  }
902  }
903 
904  void scan_list_polysemous (
905  size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
906  {
907  switch (pq.code_size) {
908 #define HANDLE_CODE_SIZE(cs) \
909  case cs: \
910  scan_list_polysemous_hc <HammingComputer ## cs> \
911  (k, heap_sim, heap_ids, store_pairs); \
912  break
913  HANDLE_CODE_SIZE(4);
914  HANDLE_CODE_SIZE(8);
915  HANDLE_CODE_SIZE(16);
916  HANDLE_CODE_SIZE(20);
917  HANDLE_CODE_SIZE(32);
918  HANDLE_CODE_SIZE(64);
919 #undef HANDLE_CODE_SIZE
920  default:
921  if (pq.code_size % 8 == 0)
922  scan_list_polysemous_hc <HammingComputerM8>
923  (k, heap_sim, heap_ids, store_pairs);
924  else
925  scan_list_polysemous_hc <HammingComputerM4>
926  (k, heap_sim, heap_ids, store_pairs);
927  break;
928  }
929  }
930 
931 };
932 
933 
934 
935 
936 } // anonymous namespace
937 
938 
939 IndexIVFPQStats indexIVFPQ_stats;
940 
941 void IndexIVFPQStats::reset () {
942  memset (this, 0, sizeof (*this));
943 }
944 
945 
947  size_t nx,
948  const float * qx,
949  const long * keys,
950  const float * coarse_dis,
951  float_maxheap_array_t * res,
952  bool store_pairs) const
953 {
954  const size_t k = res->k;
955 
956 #pragma omp parallel
957  {
958  InvertedListScanner<long> qt (*this);
959  size_t stats_nlist = 0;
960  size_t stats_ncode = 0;
961  uint64_t init_query_cycles = 0;
962  uint64_t scan_cycles = 0;
963  uint64_t heap_cycles = 0;
964 
965 #pragma omp for
966  for (size_t i = 0; i < nx; i++) {
967  const float *qi = qx + i * d;
968  const long * keysi = keys + i * nprobe;
969  const float *coarse_dis_i = coarse_dis + i * nprobe;
970  float * heap_sim = res->get_val (i);
971  long * heap_ids = res->get_ids (i);
972 
973  uint64_t t0;
974  TIC;
975  maxheap_heapify (k, heap_sim, heap_ids);
976  heap_cycles += TOC;
977 
978  TIC;
979  qt.init_query (qi);
980  init_query_cycles += TOC;
981 
982  size_t nscan = 0;
983 
984  for (size_t ik = 0; ik < nprobe; ik++) {
985  long key = keysi[ik]; /* select the list */
986  if (key < 0) {
987  // not enough centroids for multiprobe
988  continue;
989  }
990  if (key >= (long) nlist) {
991  fprintf (stderr, "Invalid key=%ld nlist=%ld\n", key, nlist);
992  throw;
993  }
994  size_t list_size = ids[key].size();
995  stats_nlist ++;
996  nscan += list_size;
997 
998  if (list_size == 0) continue;
999 
1000  qt.init_list (key, coarse_dis_i[ik],
1001  list_size, ids[key].data(),
1002  codes[key].data());
1003 
1004  TIC;
1005  if (polysemous_ht > 0) {
1006  qt.scan_list_polysemous
1007  (k, heap_sim, heap_ids, store_pairs);
1008  } else if (list_size > scan_table_threshold) {
1009  qt.scan_list_with_table (k, heap_sim, heap_ids, store_pairs);
1010  } else {
1011  qt.scan_on_the_fly_dist (k, heap_sim, heap_ids, store_pairs);
1012  }
1013  scan_cycles += TOC;
1014 
1015  if (max_codes && nscan >= max_codes) break;
1016  }
1017  stats_ncode += nscan;
1018  TIC;
1019  maxheap_reorder (k, heap_sim, heap_ids);
1020 
1021  if (metric_type == METRIC_INNER_PRODUCT) {
1022  for (size_t j = 0; j < k; j++)
1023  heap_sim[j] = -heap_sim[j];
1024  }
1025  heap_cycles += TOC;
1026  }
1027 
1028 #pragma omp critical
1029  {
1030  indexIVFPQ_stats.n_hamming_pass += qt.n_hamming_pass;
1031  indexIVFPQ_stats.nlist += stats_nlist;
1032  indexIVFPQ_stats.ncode += stats_ncode;
1033 
1034  indexIVFPQ_stats.init_query_cycles += init_query_cycles;
1035  indexIVFPQ_stats.init_list_cycles += qt.init_list_cycles;
1036  indexIVFPQ_stats.scan_cycles += scan_cycles - qt.init_list_cycles;
1037  indexIVFPQ_stats.heap_cycles += heap_cycles;
1038  }
1039 
1040  }
1041  indexIVFPQ_stats.nq += nx;
1042 }
1043 
1044 
1045 void IndexIVFPQ::search (idx_t n, const float *x, idx_t k,
1046  float *distances, idx_t *labels) const
1047 {
1048  long * idx = new long [n * nprobe];
1049  float * coarse_dis = new float [n * nprobe];
1050  uint64_t t0;
1051  TIC;
1052  quantizer->search (n, x, nprobe, coarse_dis, idx);
1053  indexIVFPQ_stats.assign_cycles += TOC;
1054 
1055  TIC;
1056  float_maxheap_array_t res = { size_t(n), size_t(k), labels, distances};
1057 
1058  search_knn_with_key (n, x, idx, coarse_dis, &res);
1059  delete [] idx;
1060  delete [] coarse_dis;
1061  indexIVFPQ_stats.search_cycles += TOC;
1062 }
1063 
1064 
1066 {
1067  IndexIVF::reset();
1068  for (size_t key = 0; key < nlist; key++) {
1069  codes[key].clear();
1070  }
1071 }
1072 
1074 {
1075  FAISS_ASSERT (!maintain_direct_map ||
1076  !"direct map remove not implemented");
1077  long nremove = 0;
1078 #pragma omp parallel for reduction(+: nremove)
1079  for (long i = 0; i < nlist; i++) {
1080  std::vector<idx_t> & idsi = ids[i];
1081  uint8_t * codesi = codes[i].data();
1082 
1083  long l = idsi.size(), j = 0;
1084  while (j < l) {
1085  if (sel.is_member (idsi[j])) {
1086  l--;
1087  idsi [j] = idsi [l];
1088  memmove (codesi + j * code_size,
1089  codesi + l * code_size, code_size);
1090  } else {
1091  j++;
1092  }
1093  }
1094  if (l < idsi.size()) {
1095  nremove += idsi.size() - l;
1096  idsi.resize (l);
1097  codes[i].resize (l * code_size);
1098  }
1099  }
1100  ntotal -= nremove;
1101  return nremove;
1102 }
1103 
1104 
1105 IndexIVFPQ::IndexIVFPQ ()
1106 {
1107  // initialize some runtime values
1110  do_polysemous_training = false;
1111  polysemous_ht = 0;
1112  max_codes = 0;
1113  polysemous_training = nullptr;
1114 }
1115 
1116 
1117 struct CodeCmp {
1118  const uint8_t *tab;
1119  size_t code_size;
1120  bool operator () (int a, int b) const {
1121  return cmp (a, b) > 0;
1122  }
1123  int cmp (int a, int b) const {
1124  return memcmp (tab + a * code_size, tab + b * code_size,
1125  code_size);
1126  }
1127 };
1128 
1129 
1130 size_t IndexIVFPQ::find_duplicates (idx_t *dup_ids, size_t *lims) const
1131 {
1132  size_t ngroup = 0;
1133  lims[0] = 0;
1134  for (size_t list_no = 0; list_no < nlist; list_no++) {
1135  size_t n = ids[list_no].size();
1136  std::vector<int> ord (n);
1137  for (int i = 0; i < n; i++) ord[i] = i;
1138  CodeCmp cs = { codes[list_no].data(), code_size };
1139  std::sort (ord.begin(), ord.end(), cs);
1140 
1141  const idx_t *list_ids = ids[list_no].data();
1142  int prev = -1; // all elements from prev to i-1 are equal
1143  for (int i = 0; i < n; i++) {
1144  if (prev >= 0 && cs.cmp (ord [prev], ord [i]) == 0) {
1145  // same as previous => remember
1146  if (prev + 1 == i) { // start new group
1147  ngroup++;
1148  lims[ngroup] = lims[ngroup - 1];
1149  dup_ids [lims [ngroup]++] = list_ids [ord [prev]];
1150  }
1151  dup_ids [lims [ngroup]++] = list_ids [ord [i]];
1152  } else { // not same as previous.
1153  prev = i;
1154  }
1155  }
1156  }
1157  return ngroup;
1158 }
1159 
1160 
1161 
1162 
1163 /*****************************************
1164  * IndexIVFPQR implementation
1165  ******************************************/
1166 
1167 IndexIVFPQR::IndexIVFPQR (
1168  Index * quantizer, size_t d, size_t nlist,
1169  size_t M, size_t nbits_per_idx,
1170  size_t M_refine, size_t nbits_per_idx_refine):
1171  IndexIVFPQ (quantizer, d, nlist, M, nbits_per_idx),
1172  refine_pq (d, M_refine, nbits_per_idx_refine),
1173  k_factor (4)
1174 {
1175  by_residual = true;
1176  set_typename();
1177 }
1178 
1179 IndexIVFPQR::IndexIVFPQR ():
1180  k_factor (1)
1181 {
1182  by_residual = true;
1183 }
1184 
1185 
1186 void IndexIVFPQR::set_typename()
1187 {
1188  std::stringstream s;
1189  s << "IvfPQR_" << pq.M << "x" << pq.nbits
1190  << "+" << refine_pq.M << "x" << refine_pq.nbits
1191  << "[" << nlist << ":" << quantizer->index_typename << "]";
1192  index_typename = s.str();
1193 
1194 }
1195 
1196 
1198 {
1200  refine_codes.clear();
1201 }
1202 
1203 
1204 
1205 
1206 void IndexIVFPQR::train_residual (idx_t n, const float *x)
1207 {
1208 
1209  float * residual_2 = new float [n * d];
1210 
1211  train_residual_o (n, x, residual_2);
1212 
1213  if (verbose)
1214  printf ("training %zdx%zd 2nd level PQ quantizer on %ld %dD-vectors\n",
1215  refine_pq.M, refine_pq.ksub, n, d);
1216 
1218  refine_pq.cp.verbose = verbose;
1219 
1220  refine_pq.train (n, residual_2);
1221  delete [] residual_2;
1222 
1223 }
1224 
1225 
1226 void IndexIVFPQR::add_with_ids (idx_t n, const float *x, const long *xids) {
1227  add_core (n, x, xids, nullptr);
1228 }
1229 
1230 void IndexIVFPQR::add_core (idx_t n, const float *x, const long *xids,
1231  const long *precomputed_idx) {
1232 
1233  float * residual_2 = new float [n * d];
1234 
1235  idx_t n0 = ntotal;
1236 
1237  add_core_o (n, x, xids, residual_2, precomputed_idx);
1238 
1240 
1242  residual_2, &refine_codes[n0 * refine_pq.code_size], n);
1243 
1244  delete [] residual_2;
1245 
1246 }
1247 
1248 
1250  idx_t n, const float *x, idx_t k,
1251  float *distances, idx_t *labels) const
1252 {
1253  FAISS_ASSERT (is_trained);
1254  long * idx = new long [n * nprobe];
1255  float * L1_dis = new float [n * nprobe];
1256  uint64_t t0;
1257  TIC;
1258  quantizer->search (n, x, nprobe, L1_dis, idx);
1259  indexIVFPQ_stats.assign_cycles += TOC;
1260 
1261  TIC;
1262  size_t k_coarse = long(k * k_factor);
1263  idx_t *coarse_labels = new idx_t [k_coarse * n];
1264  { // query with quantizer levels 1 and 2.
1265  float *coarse_distances = new float [k_coarse * n];
1266 
1267  faiss::float_maxheap_array_t res_coarse = {
1268  size_t(n), k_coarse, coarse_labels, coarse_distances};
1269  search_knn_with_key (n, x, idx, L1_dis, &res_coarse, true);
1270  delete [] coarse_distances;
1271  }
1272  delete [] L1_dis;
1273  indexIVFPQ_stats.search_cycles += TOC;
1274 
1275  TIC;
1276 
1277  // 3rd level refinement
1278  size_t n_refine = 0;
1279 #pragma omp parallel reduction(+ : n_refine)
1280  {
1281  // tmp buffers
1282  float *residual_1 = new float [2 * d];
1283  float *residual_2 = residual_1 + d;
1284 #pragma omp for
1285  for (idx_t i = 0; i < n; i++) {
1286  const float *xq = x + i * d;
1287  const long * shortlist = coarse_labels + k_coarse * i;
1288  float * heap_sim = distances + k * i;
1289  long * heap_ids = labels + k * i;
1290  maxheap_heapify (k, heap_sim, heap_ids);
1291 
1292  for (int j = 0; j < k_coarse; j++) {
1293  long sl = shortlist[j];
1294 
1295  if (sl == -1) continue;
1296 
1297  int list_no = sl >> 32;
1298  int ofs = sl & 0xffffffff;
1299 
1300  assert (list_no >= 0 && list_no < nlist);
1301  assert (ofs >= 0 && ofs < ids[list_no].size());
1302 
1303  // 1st level residual
1304  quantizer->compute_residual (xq, residual_1, list_no);
1305 
1306  // 2nd level residual
1307  const uint8_t * l2code = &codes[list_no][ofs * pq.code_size];
1308  pq.decode (l2code, residual_2);
1309  for (int l = 0; l < d; l++)
1310  residual_2[l] = residual_1[l] - residual_2[l];
1311 
1312  // 3rd level residual's approximation
1313  idx_t id = ids[list_no][ofs];
1314  assert (0 <= id && id < ntotal);
1316  residual_1);
1317 
1318  float dis = fvec_L2sqr (residual_1, residual_2, d);
1319 
1320  if (dis < heap_sim[0]) {
1321  maxheap_pop (k, heap_sim, heap_ids);
1322  maxheap_push (k, heap_sim, heap_ids, dis, id);
1323  }
1324  n_refine ++;
1325  }
1326  maxheap_reorder (k, heap_sim, heap_ids);
1327  }
1328  delete [] residual_1;
1329  }
1330  delete [] coarse_labels;
1331  delete [] idx;
1332  indexIVFPQ_stats.nrefine += n_refine;
1333  indexIVFPQ_stats.refine_cycles += TOC;
1334 }
1335 
1336 void IndexIVFPQR::reconstruct_n (idx_t i0, idx_t ni, float *recons) const
1337 {
1338  float *r3 = new float [d];
1339 
1340  IndexIVFPQ::reconstruct_n (i0, ni, recons);
1341 
1342  for (idx_t i = i0; i < i0 + ni; i++) {
1343  float *r = recons + i * d;
1345 
1346  for (int j = 0; j < d; j++)
1347  r[j] += r3[j];
1348 
1349  }
1350  delete [] r3;
1351 }
1352 
1354 {
1355  IndexIVFPQR &other = dynamic_cast<IndexIVFPQR &> (other_in);
1357  refine_codes.insert (refine_codes.end(),
1358  other.refine_codes.begin(), other.refine_codes.end());
1359  other.refine_codes.clear();
1360 }
1361 
1363 {
1364  FAISS_ASSERT(!"not implemented");
1365 }
1366 
1367 /*****************************************
1368  * IndexIVFPQCompact implementation
1369  ******************************************/
1370 
1371 IndexIVFPQCompact::IndexIVFPQCompact ()
1372 {
1373  alloc_type = Alloc_type_none;
1374  limits = nullptr;
1375  compact_ids = nullptr;
1376  compact_codes = nullptr;
1377 }
1378 
1379 
1380 IndexIVFPQCompact::IndexIVFPQCompact (const IndexIVFPQ &other)
1381 {
1382  FAISS_ASSERT (other.ntotal < (1UL << 31) ||
1383  !"IndexIVFPQCompact cannot store more than 2G images");
1384 
1385  // here it would be more convenient to just use the
1386  // copy-constructor, but it would copy the lists as well: too much
1387  // overhead...
1388 
1389  // copy fields from Index
1390  d = other.d;
1391  ntotal = other.ntotal;
1392  verbose = other.verbose;
1393  is_trained = other.is_trained;
1394  metric_type = other.metric_type;
1395 
1396  // copy fields from IndexIVF (except ids)
1397  nlist = other.nlist;
1398  nprobe = other.nprobe;
1399  quantizer = other.quantizer;
1400  quantizer_trains_alone = other.quantizer_trains_alone;
1401  own_fields = false;
1402  direct_map = other.direct_map;
1403 
1404  // copy fields from IndexIVFPQ (except codes)
1405  by_residual = other.by_residual;
1406  use_precomputed_table = other.use_precomputed_table;
1407  precomputed_table = other.precomputed_table;
1408  code_size = other.code_size;
1409  pq = other.pq;
1410  do_polysemous_training = other.do_polysemous_training;
1411  polysemous_training = nullptr;
1412 
1413  scan_table_threshold = other.scan_table_threshold;
1414  max_codes = other.max_codes;
1415  polysemous_ht = other.polysemous_ht;
1416 
1417  //allocate
1418  alloc_type = Alloc_type_new;
1419  limits = new uint32_t [nlist + 1];
1420  compact_ids = new uint32_t [ntotal];
1421  compact_codes = new uint8_t [ntotal * code_size];
1422 
1423 
1424  // copy content from other
1425  size_t ofs = 0;
1426  for (size_t i = 0; i < nlist; i++) {
1427  limits [i] = ofs;
1428  const std::vector<long> &other_ids = other.ids[i];
1429  for (size_t j = 0; j < other_ids.size(); j++) {
1430  long id = other_ids[j];
1431  FAISS_ASSERT (id < (1UL << 31) ||
1432  !"IndexIVFPQCompact cannot store ids > 2G");
1433  compact_ids[ofs + j] = id;
1434  }
1435  memcpy (compact_codes + ofs * code_size,
1436  other.codes[i].data(),
1437  other.codes[i].size());
1438  ofs += other_ids.size();
1439  }
1440  FAISS_ASSERT (ofs == ntotal);
1441  limits [nlist] = ofs;
1442 
1443 }
1444 
1445 void IndexIVFPQCompact::add (idx_t, const float *) {
1446  FAISS_ASSERT (!"cannot add to an IndexIVFPQCompact");
1447 }
1448 
1450  FAISS_ASSERT (!"cannot reset an IndexIVFPQCompact");
1451 }
1452 
1453 void IndexIVFPQCompact::train (idx_t, const float *) {
1454  FAISS_ASSERT (!"cannot train an IndexIVFPQCompact");
1455 }
1456 
1457 
1458 
1459 
1460 IndexIVFPQCompact::~IndexIVFPQCompact ()
1461 {
1462  if (alloc_type == Alloc_type_new) {
1463  delete [] limits;
1464  delete [] compact_codes;
1465  delete [] compact_ids;
1466  } else if (alloc_type == Alloc_type_mmap) {
1467  munmap (mmap_buffer, mmap_length);
1468 
1469  }
1470 
1471 }
1472 
1474  size_t nx,
1475  const float * qx,
1476  const long * keys,
1477  const float * coarse_dis,
1478  float_maxheap_array_t * res,
1479  bool store_pairs) const
1480 {
1481  const size_t k = res->k;
1482 
1483 #pragma omp parallel
1484  {
1485  InvertedListScanner<uint32_t> qt (*this);
1486  size_t stats_nlist = 0;
1487  size_t stats_ncode = 0;
1488  uint64_t init_query_cycles = 0;
1489  uint64_t scan_cycles = 0;
1490  uint64_t heap_cycles = 0;
1491 
1492 #pragma omp for
1493  for (size_t i = 0; i < nx; i++) {
1494  const float *qi = qx + i * d;
1495  const long * keysi = keys + i * nprobe;
1496  const float *coarse_dis_i = coarse_dis + i * nprobe;
1497  float * heap_sim = res->get_val (i);
1498  long * heap_ids = res->get_ids (i);
1499 
1500  uint64_t t0;
1501  TIC;
1502  maxheap_heapify (k, heap_sim, heap_ids);
1503  heap_cycles += TOC;
1504 
1505  TIC;
1506  qt.init_query (qi);
1507  init_query_cycles += TOC;
1508 
1509  size_t nscan = 0;
1510 
1511  for (size_t ik = 0; ik < nprobe; ik++) {
1512  long key = keysi[ik]; /* select the list */
1513  if (key < 0) {
1514  // not enough centroids for multiprobe
1515  continue;
1516  }
1517  if (key >= (long) nlist) {
1518  fprintf (stderr, "Invalid key=%ld nlist=%ld\n", key, nlist);
1519  throw;
1520  }
1521  size_t list_size = limits[key + 1] - limits[key];
1522  stats_nlist ++;
1523  nscan += list_size;
1524 
1525  if (list_size == 0) continue;
1526 
1527  qt.init_list (key, coarse_dis_i[ik],
1528  list_size, compact_ids + limits[key],
1529  compact_codes + limits[key] * code_size);
1530 
1531  TIC;
1532  if (polysemous_ht > 0) {
1533  qt.scan_list_polysemous
1534  (k, heap_sim, heap_ids, store_pairs);
1535  } else if (list_size > scan_table_threshold) {
1536  qt.scan_list_with_table (k, heap_sim, heap_ids, store_pairs);
1537  } else {
1538  qt.scan_on_the_fly_dist (k, heap_sim, heap_ids, store_pairs);
1539  }
1540  scan_cycles += TOC;
1541 
1542  if (max_codes && nscan >= max_codes) break;
1543  }
1544  stats_ncode += nscan;
1545  TIC;
1546  maxheap_reorder (k, heap_sim, heap_ids);
1547 
1548  if (metric_type == METRIC_INNER_PRODUCT) {
1549  for (size_t j = 0; j < k; j++) {
1550  heap_sim[i] = -heap_sim[i];
1551  }
1552  }
1553  heap_cycles += TOC;
1554  }
1555 
1556 #pragma omp critical
1557  {
1558  indexIVFPQ_stats.n_hamming_pass += qt.n_hamming_pass;
1559  indexIVFPQ_stats.nlist += stats_nlist;
1560  indexIVFPQ_stats.ncode += stats_ncode;
1561 
1562  indexIVFPQ_stats.init_query_cycles += init_query_cycles;
1563  indexIVFPQ_stats.init_list_cycles += qt.init_list_cycles;
1564  indexIVFPQ_stats.scan_cycles += scan_cycles - qt.init_list_cycles;
1565  indexIVFPQ_stats.heap_cycles += heap_cycles;
1566  }
1567 
1568  }
1569  indexIVFPQ_stats.nq += nx;
1570 }
1571 
1572 
1573 
1574 } // namespace faiss
uint32_t * compact_ids
size ntotal
Definition: IndexIVFPQ.h:249
uint8_t * compact_codes
size ntotal * code_size
Definition: IndexIVFPQ.h:250
void precompute_table()
build precomputed table
Definition: IndexIVFPQ.cpp:376
void copy_subset_to(IndexIVFPQ &other, int subset_type, long a1, long a2) const
Definition: IndexIVFPQ.cpp:317
size_t nbits
number of bits per quantization index
virtual void reconstruct(idx_t key, float *recons) const override
Definition: IndexIVFPQ.cpp:287
void decode(const uint8_t *code, float *x) const
decode a vector from a given code (or n vectors if third argument)
ProductQuantizer refine_pq
3rd level quantizer
Definition: IndexIVFPQ.h:190
float fvec_L2sqr(const float *x, const float *y, size_t d)
Squared L2 distance between two vectors.
Definition: utils.cpp:431
PolysemousTraining * polysemous_training
if NULL, use default
Definition: IndexIVFPQ.h:37
T * get_val(size_t key)
Return the list of values for a heap.
Definition: Heap.h:361
virtual void add(idx_t, const float *) override
the three following functions will fail at runtime
virtual void search_knn_with_key(size_t nx, const float *qx, const long *keys, const float *coarse_dis, float_maxheap_array_t *res, bool store_pairs=false) const override
virtual void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:48
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:24
bool quantizer_trains_alone
just pass over the trainset to quantizer
Definition: IndexIVF.h:51
virtual void set_typename() override
Definition: IndexIVF.cpp:207
virtual void merge_from_residuals(IndexIVF &other) override
used to implement merging
Definition: IndexIVFPQ.cpp:307
void train_residual_o(idx_t n, const float *x, float *residuals_2)
same as train_residual, also output 2nd level residuals
Definition: IndexIVFPQ.cpp:83
bool do_polysemous_training
reorder PQ centroids after training?
Definition: IndexIVFPQ.h:36
size_t scan_table_threshold
use table computation or on-the-fly?
Definition: IndexIVFPQ.h:40
size_t k
allocated size per heap
Definition: Heap.h:356
virtual void train_residual(idx_t n, const float *x) override
trains the two product quantizers
void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx=nullptr)
same as add_with_ids, but optionally use the precomputed list ids
uint32_t * limits
size nlist + 1
Definition: IndexIVFPQ.h:248
size_t dsub
dimensionality of each subvector
std::vector< float > precomputed_table
Definition: IndexIVFPQ.h:48
void fvec_madd(size_t n, const float *a, float bf, const float *b, float *c)
Definition: utils.cpp:1706
int polysemous_ht
Hamming thresh for polysemous filtering.
Definition: IndexIVFPQ.h:42
virtual void search_knn_with_key(size_t nx, const float *qx, const long *keys, const float *coarse_dis, float_maxheap_array_t *res, bool store_pairs=false) const
Definition: IndexIVFPQ.cpp:946
virtual void reset() override
removes all elements from the database.
virtual void add_with_ids(idx_t n, const float *x, const long *xids=nullptr) override
Definition: IndexIVFPQ.cpp:170
std::vector< std::vector< long > > ids
Inverted lists for indexes.
Definition: IndexIVF.h:56
void compute_codes(const float *x, uint8_t *codes, size_t n) const
same as compute_code for several vectors
int d
vector dimension
Definition: Index.h:66
Index * quantizer
quantizer that maps vectors to inverted lists
Definition: IndexIVF.h:50
size_t max_codes
max nb of codes to visit to do a query
Definition: IndexIVFPQ.h:41
std::vector< uint8_t > refine_codes
corresponding codes
Definition: IndexIVFPQ.h:191
size_t code_size
byte per indexed vector
virtual long remove_ids(const IDSelector &sel) override
virtual void train_residual(idx_t n, const float *x) override
trains the product quantizer
Definition: IndexIVFPQ.cpp:77
virtual void train(idx_t, const float *) override
Trains the quantizer and calls train_residual to train sub-quantizers.
bool own_fields
whether object owns the quantizer
Definition: IndexIVF.h:52
size_t ksub
number of centroids for each subquantizer
long idx_t
all indices are this type
Definition: Index.h:64
void compute_code(const float *x, uint8_t *code) const
Quantize one vector with the product quantizer.
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
virtual void reset() override
removes all elements from the database.
bool verbose
verbosity level
Definition: Index.h:68
virtual void reset() override
removes all elements from the database.
Definition: IndexIVF.cpp:87
double getmillisecs()
ms elapsed since some arbitrary epoch
Definition: utils.cpp:72
optimizes the order of indices in a ProductQuantizer
float fvec_norm_L2sqr(const float *x, size_t d)
Definition: utils.cpp:512
ClusteringParameters cp
parameters used during clustering
virtual void merge_from_residuals(IndexIVF &other) override
used to implement merging
bool by_residual
Encode residual or plain vector?
Definition: IndexIVFPQ.h:31
TI * get_ids(size_t key)
Correspponding identifiers.
Definition: Heap.h:364
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:74
ProductQuantizer pq
produces the codes
Definition: IndexIVFPQ.h:34
size_t M
number of subquantizers
size_t nlist
number of possible key values
Definition: IndexIVF.h:47
virtual void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
Definition: IndexIVFPQ.cpp:258
void add_core_o(idx_t n, const float *x, const long *xids, float *residuals_2, const long *precomputed_idx=nullptr)
Definition: IndexIVFPQ.cpp:176
int fvec_madd_and_argmin(size_t n, const float *a, float bf, const float *b, float *c)
Definition: utils.cpp:1780
size_t code_size
code size per vector in bytes
Definition: IndexIVFPQ.h:33
virtual long remove_ids(const IDSelector &sel) override
virtual void reset() override
removes all elements from the database.
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:71
float * get_centroids(size_t m, size_t i)
return the centroids associated with subvector m
void encode_multiple(size_t n, const long *keys, const float *x, uint8_t *codes) const
same as encode, for multiple points at once
Definition: IndexIVFPQ.cpp:153
bool maintain_direct_map
map for direct access to the elements. Enables reconstruct().
Definition: IndexIVF.h:59
void optimize_pq_for_hamming(ProductQuantizer &pq, size_t n, const float *x) const
int max_points_per_centroid
to limit size of dataset
Definition: Clustering.h:35
bool verbose
verbose during training?
virtual void add_with_ids(idx_t n, const float *x, const long *xids) override
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
size_t find_duplicates(idx_t *ids, size_t *lims) const
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:44
float k_factor
factor between k requested in search and the k requested from the IVFPQ
Definition: IndexIVFPQ.h:194
int use_precomputed_table
if by_residual, build precompute tables
Definition: IndexIVFPQ.h:32