Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/IndexIVFPQ.cpp
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 /* Copyright 2004-present Facebook. All Rights Reserved.
11  Inverted list structure.
12 */
13 
14 #include "IndexIVFPQ.h"
15 
16 #include <cstdio>
17 #include <cassert>
18 
19 #include <sys/mman.h>
20 
21 #include <algorithm>
22 
23 #include "Heap.h"
24 #include "utils.h"
25 
26 #include "Clustering.h"
27 #include "IndexFlat.h"
28 
29 #include "hamming.h"
30 
31 #include "FaissAssert.h"
32 
33 #include "AuxIndexStructures.h"
34 
35 namespace faiss {
36 
37 
38 
39 
40 
41 /*****************************************
42  * IndexIVFPQ implementation
43  ******************************************/
44 
45 IndexIVFPQ::IndexIVFPQ (Index * quantizer, size_t d, size_t nlist,
46  size_t M, size_t nbits_per_idx):
47  IndexIVF (quantizer, d, nlist, METRIC_L2),
48  pq (d, M, nbits_per_idx)
49 {
50  FAISS_ASSERT (nbits_per_idx <= 8);
51  code_size = pq.code_size;
52  is_trained = false;
53  codes.resize (nlist);
54  by_residual = true;
55  use_precomputed_table = 0;
56  scan_table_threshold = 0;
57  max_codes = 0; // means unlimited
58 
59  polysemous_training = nullptr;
60  do_polysemous_training = false;
61  polysemous_ht = 0;
62 
63  set_typename();
64 }
65 
66 
67 
68 void IndexIVFPQ::set_typename ()
69 {
70  std::stringstream s;
71  s << "IvfPQ_" << pq.M << "x" << pq.nbits
72  << "[" << nlist << ":" << quantizer->index_typename << "]";
73  index_typename = s.str();
74 }
75 
76 
77 void IndexIVFPQ::train_residual (idx_t n, const float *x)
78 {
79  train_residual_o (n, x, nullptr);
80 }
81 
82 
83 void IndexIVFPQ::train_residual_o (idx_t n, const float *x, float *residuals_2)
84 {
85  const float * x_in = x;
86 
88  d, (size_t*)&n, pq.cp.max_points_per_centroid * pq.ksub,
89  x, verbose, pq.cp.seed);
90 
91  const float *trainset;
92  if (by_residual) {
93  if(verbose) printf("computing residuals\n");
94  idx_t * assign = new idx_t [n]; // assignement to coarse centroids
95  quantizer->assign (n, x, assign);
96  float *residuals = new float [n * d];
97  for (idx_t i = 0; i < n; i++)
98  quantizer->compute_residual (x + i * d, residuals+i*d, assign[i]);
99  delete [] assign;
100  trainset = residuals;
101  } else {
102  trainset = x;
103  }
104  if (verbose)
105  printf ("training %zdx%zd product quantizer on %ld vectors in %dD\n",
106  pq.M, pq.ksub, n, d);
107  pq.verbose = verbose;
108  pq.train (n, trainset);
109 
111  PolysemousTraining default_pt;
113  if (!pt) pt = &default_pt;
114  pt->optimize_pq_for_hamming (pq, n, trainset);
115  }
116 
117  // prepare second-level residuals for refine PQ
118  if (residuals_2) {
119  uint8_t *train_codes = new uint8_t [pq.code_size * n];
120  pq.compute_codes (trainset, train_codes, n);
121 
122  for (idx_t i = 0; i < n; i++) {
123  const float *xx = trainset + i * d;
124  float * res = residuals_2 + i * d;
125  pq.decode (train_codes + i * pq.code_size, res);
126  for (int j = 0; j < d; j++)
127  res[j] = xx[j] - res[j];
128  }
129 
130  delete [] train_codes;
131  }
132 
133  if (by_residual) {
134  delete [] trainset;
135  precompute_table ();
136  }
137 
138  if (x_in != x) delete [] x;
139 }
140 
141 
142 /* produce a binary signature based on the residual vector */
143 void IndexIVFPQ::encode (long key, const float * x, uint8_t * code) const
144 {
145  if (by_residual) {
146  float residual_vec[d];
147  quantizer->compute_residual (x, residual_vec, key);
148  pq.compute_code (residual_vec, code);
149  }
150  else pq.compute_code (x, code);
151 }
152 
153 
154 
155 
156 
157 void IndexIVFPQ::encode_multiple (size_t n, const long *keys,
158  const float * x, uint8_t * xcodes) const
159 {
160  if (by_residual) {
161  float *residuals = new float [n * d];
162  // TODO: parallelize?
163  for (size_t i = 0; i < n; i++)
164  quantizer->compute_residual (x + i * d, residuals + i * d, keys[i]);
165  pq.compute_codes (residuals, xcodes, n);
166  delete [] residuals;
167  } else {
168  pq.compute_codes (x, xcodes, n);
169  }
170 }
171 
172 
173 
174 void IndexIVFPQ::add_with_ids (idx_t n, const float * x, const long *xids)
175 {
176  add_core_o (n, x, xids, nullptr);
177 }
178 
179 
180 void IndexIVFPQ::add_core_o (idx_t n, const float * x, const long *xids,
181  float *residuals_2, const long *precomputed_idx)
182 {
183  FAISS_ASSERT (is_trained);
184  double t0 = getmillisecs ();
185  const long * idx;
186 
187  if (precomputed_idx) {
188  idx = precomputed_idx;
189  } else {
190  long * idx0 = new long [n];
191  quantizer->assign (n, x, idx0);
192  idx = idx0;
193  }
194 
195  double t1 = getmillisecs ();
196  uint8_t * xcodes = new uint8_t [n * code_size];
197 
198  const float *to_encode = nullptr;
199 
200  if (by_residual) {
201  float *residuals = new float [n * d];
202  // TODO: parallelize?
203  for (size_t i = 0; i < n; i++) {
204  if (idx[i] < 0)
205  memset (residuals + i * d, 0, sizeof(*residuals) * d);
206  else
207  quantizer->compute_residual (
208  x + i * d, residuals + i * d, idx[i]);
209  }
210  to_encode = residuals;
211  } else {
212  to_encode = x;
213  }
214  pq.compute_codes (to_encode, xcodes, n);
215 
216  double t2 = getmillisecs ();
217  // TODO: parallelize?
218  size_t n_ignore = 0;
219  for (size_t i = 0; i < n; i++) {
220  idx_t key = idx[i];
221  if (key < 0) {
222  n_ignore ++;
223  if (residuals_2)
224  memset (residuals_2, 0, sizeof(*residuals_2) * d);
225  continue;
226  }
227  idx_t id = xids ? xids[i] : ntotal + i;
228  ids[key].push_back (id);
229  uint8_t *code = xcodes + i * code_size;
230  for (size_t j = 0; j < code_size; j++)
231  codes[key].push_back (code[j]);
232 
233  if (residuals_2) {
234  float *res2 = residuals_2 + i * d;
235  const float *xi = to_encode + i * d;
236  pq.decode (code, res2);
237  for (int j = 0; j < d; j++)
238  res2[j] = xi[j] - res2[j];
239  }
240 
242  direct_map.push_back (key << 32 | (ids[key].size() - 1));
243  }
244 
245  if (by_residual)
246  delete [] to_encode;
247 
248  delete [] xcodes;
249  if (!precomputed_idx)
250  delete [] idx;
251  double t3 = getmillisecs ();
252  if(verbose) {
253  char comment[100] = {0};
254  if (n_ignore > 0)
255  snprintf (comment, 100, "(%ld vectors ignored)", n_ignore);
256  printf(" add_core times: %.3f %.3f %.3f %s\n",
257  t1 - t0, t2 - t1, t3 - t2, comment);
258  }
259  ntotal += n;
260 }
261 
262 void IndexIVFPQ::reconstruct_n (idx_t i0, idx_t ni, float *recons) const
263 {
264  FAISS_ASSERT (ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
265 
266  std::vector<float> centroid (d);
267 
268  for (int key = 0; key < nlist; key++) {
269  const std::vector<long> & idlist = ids[key];
270  const uint8_t * code_line = codes[key].data();
271 
272  for (long ofs = 0; ofs < idlist.size(); ofs++) {
273  long id = idlist[ofs];
274  if (!(id >= i0 && id < i0 + ni)) continue;
275  float *r = recons + d * (id - i0);
276  if (by_residual) {
277  quantizer->reconstruct (key, centroid.data());
278  pq.decode (code_line + ofs * pq.code_size, r);
279  for (int j = 0; j < d; j++) {
280  r[j] += centroid[j];
281  }
282  }
283  else {
284  pq.decode (code_line + ofs * pq.code_size, r);
285  }
286  }
287  }
288 }
289 
290 
291 void IndexIVFPQ::reconstruct (idx_t key, float * recons) const
292 {
293  FAISS_ASSERT (direct_map.size() == ntotal);
294  int list_no = direct_map[key] >> 32;
295  int ofs = direct_map[key] & 0xffffffff;
296 
297  quantizer->reconstruct (list_no, recons);
298  const uint8_t * code = &(codes[list_no][ofs * pq.code_size]);
299 
300  for (size_t m = 0; m < pq.M; m++) {
301  float * out = recons + m * pq.dsub;
302  const float * cent = pq.get_centroids (m, code[m]);
303  for (size_t i = 0; i < pq.dsub; i++) {
304  out[i] += cent[i];
305  }
306  }
307 }
308 
309 
310 
312 {
313  IndexIVFPQ &other = dynamic_cast<IndexIVFPQ &> (other_in);
314  for (int i = 0; i < nlist; i++) {
315  codes[i].insert (codes[i].end(),
316  other.codes[i].begin(), other.codes[i].end());
317  other.codes[i].clear();
318  }
319 }
320 
321 void IndexIVFPQ::copy_subset_to (IndexIVFPQ & other, int subset_type,
322  long a1, long a2) const
323 {
324  FAISS_ASSERT (nlist == other.nlist);
325  FAISS_ASSERT (!other.maintain_direct_map);
326  size_t code_size = pq.code_size;
327  for (long list_no = 0; list_no < nlist; list_no++) {
328  const std::vector<idx_t> & ids_in = ids[list_no];
329  std::vector<idx_t> & ids_out = other.ids[list_no];
330  const std::vector<uint8_t> & codes_in = codes[list_no];
331  std::vector<uint8_t> & codes_out = other.codes[list_no];
332 
333  for (long i = 0; i < ids_in.size(); i++) {
334  idx_t id = ids_in[i];
335  if (subset_type == 0 && a1 <= id && id < a2) {
336  ids_out.push_back (id);
337  codes_out.insert (codes_out.end(),
338  codes_in.begin() + i * code_size,
339  codes_in.begin() + (i + 1) * code_size);
340  other.ntotal++;
341  }
342  }
343  }
344 }
345 
346 
347 
348 
349 
350 /** Precomputed tables for residuals
351  *
352  * During IVFPQ search with by_residual, we compute
353  *
354  * d = || x - y_C - y_R ||^2
355  *
356  * where x is the query vector, y_C the coarse centroid, y_R the
357  * refined PQ centroid. The expression can be decomposed as:
358  *
359  * d = || x - y_C ||^2 + || y_R ||^2 + 2 * (y_C|y_R) - 2 * (x|y_R)
360  * --------------- --------------------------- -------
361  * term 1 term 2 term 3
362  *
363  * When using multiprobe, we use the following decomposition:
364  * - term 1 is the distance to the coarse centroid, that is computed
365  * during the 1st stage search.
366  * - term 2 can be precomputed, as it does not involve x. However,
367  * because of the PQ, it needs nlist * M * ksub storage. This is why
368  * use_precomputed_table is off by default
369  * - term 3 is the classical non-residual distance table.
370  *
371  * Since y_R defined by a product quantizer, it is split across
372  * subvectors and stored separately for each subvector. If the coarse
373  * quantizer is a MultiIndexQuantizer then the table can be stored
374  * more compactly.
375  *
376  * At search time, the tables for term 2 and term 3 are added up. This
377  * is faster when the length of the lists is > ksub * M.
378  */
379 
381 {
382 
383 
384  if (use_precomputed_table == 0) { // then choose the type of table
385  if (quantizer->metric_type == METRIC_INNER_PRODUCT) {
386  fprintf(stderr, "IndexIVFPQ::precompute_table: WARN precomputed "
387  "tables not supported for inner product quantizers\n");
388  return;
389  }
390  const MultiIndexQuantizer *miq =
391  dynamic_cast<const MultiIndexQuantizer *> (quantizer);
392  if (miq && pq.M % miq->pq.M == 0)
394  else
396  } // otherwise assume user has set appropriate flag on input
397 
398 
399  // squared norms of the PQ centroids
400  std::vector<float> r_norms (pq.M * pq.ksub, 0.0/0.0);
401  for (int m = 0; m < pq.M; m++)
402  for (int j = 0; j < pq.ksub; j++)
403  r_norms [m * pq.ksub + j] =
405 
406  if (use_precomputed_table == 1) {
407 
408  precomputed_table.resize (nlist * pq.M * pq.ksub);
409  std::vector<float> centroid (d);
410 
411  for (size_t i = 0; i < nlist; i++) {
412  quantizer->reconstruct (i, centroid.data());
413 
414  float *tab = &precomputed_table[i * pq.M * pq.ksub];
415  pq.compute_inner_prod_table (centroid.data(), tab);
416  fvec_madd (pq.M * pq.ksub, r_norms.data(), 2.0, tab, tab);
417  }
418  } else if (use_precomputed_table == 2) {
419  const MultiIndexQuantizer *miq =
420  dynamic_cast<const MultiIndexQuantizer *> (quantizer);
421  FAISS_ASSERT (miq);
422  const ProductQuantizer &cpq = miq->pq;
423  FAISS_ASSERT (pq.M % cpq.M == 0);
424 
425  precomputed_table.resize(cpq.ksub * pq.M * pq.ksub);
426 
427  // reorder PQ centroid table
428  std::vector<float> centroids (d * cpq.ksub, 0.0/0.0);
429 
430  for (int m = 0; m < cpq.M; m++) {
431  for (size_t i = 0; i < cpq.ksub; i++) {
432  memcpy (centroids.data() + i * d + m * cpq.dsub,
433  cpq.get_centroids (m, i),
434  sizeof (*centroids.data()) * cpq.dsub);
435  }
436  }
437 
438  pq.compute_inner_prod_tables (cpq.ksub, centroids.data (),
439  precomputed_table.data ());
440 
441  for (size_t i = 0; i < cpq.ksub; i++) {
442  float *tab = &precomputed_table[i * pq.M * pq.ksub];
443  fvec_madd (pq.M * pq.ksub, r_norms.data(), 2.0, tab, tab);
444  }
445 
446  }
447 }
448 
449 namespace {
450 
451 static uint64_t get_cycles () {
452  uint32_t high, low;
453  asm volatile("rdtsc \n\t"
454  : "=a" (low),
455  "=d" (high));
456  return ((uint64_t)high << 32) | (low);
457 }
458 
459 #define TIC t0 = get_cycles()
460 #define TOC get_cycles () - t0
461 
462 
463 
464 /** QueryTables manages the various ways of searching an
465  * IndexIVFPQ. The code contains a lot of branches, depending on:
466  * - metric_type: are we computing L2 or Inner product similarity?
467  * - by_residual: do we encode raw vectors or residuals?
468  * - use_precomputed_table: are x_R|x_C tables precomputed?
469  * - polysemous_ht: are we filtering with polysemous codes?
470  */
471 struct QueryTables {
472 
473  /*****************************************************
474  * General data from the IVFPQ
475  *****************************************************/
476 
477  const IndexIVFPQ & ivfpq;
478 
479  // copied from IndexIVFPQ for easier access
480  int d;
481  const ProductQuantizer & pq;
482  MetricType metric_type;
483  bool by_residual;
484  int use_precomputed_table;
485 
486  // pre-allocated data buffers
487  float * sim_table, * sim_table_2;
488  float * residual_vec, *decoded_vec;
489 
490  // single data buffer
491  std::vector<float> mem;
492 
493  // for table pointers
494  std::vector<const float *> sim_table_ptrs;
495 
496  explicit QueryTables (const IndexIVFPQ & ivfpq):
497  ivfpq(ivfpq),
498  d(ivfpq.d),
499  pq (ivfpq.pq),
500  metric_type (ivfpq.metric_type),
501  by_residual (ivfpq.by_residual),
502  use_precomputed_table (ivfpq.use_precomputed_table)
503  {
504  mem.resize (pq.ksub * pq.M * 2 + d *2);
505  sim_table = mem.data();
506  sim_table_2 = sim_table + pq.ksub * pq.M;
507  residual_vec = sim_table_2 + pq.ksub * pq.M;
508  decoded_vec = residual_vec + d;
509 
510  // for polysemous
511  if (ivfpq.polysemous_ht != 0) {
512  q_code.resize (pq.code_size);
513  }
514  init_list_cycles = 0;
515  sim_table_ptrs.resize (pq.M);
516  }
517 
518  /*****************************************************
519  * What we do when query is known
520  *****************************************************/
521 
522  // field specific to query
523  const float * qi;
524 
525  // query-specific intialization
526  void init_query (const float * qi) {
527  this->qi = qi;
528  if (metric_type == METRIC_INNER_PRODUCT)
529  init_query_IP ();
530  else
531  init_query_L2 ();
532  if (!by_residual && ivfpq.polysemous_ht != 0)
533  pq.compute_code (qi, q_code.data());
534  }
535 
536  void init_query_IP () {
537  // precompute some tables specific to the query qi
538  pq.compute_inner_prod_table (qi, sim_table);
539  // we compute negated inner products for use with the maxheap
540  for (int i = 0; i < pq.ksub * pq.M; i++) {
541  sim_table[i] = - sim_table[i];
542  }
543  }
544 
545  void init_query_L2 () {
546  if (!by_residual) {
547  pq.compute_distance_table (qi, sim_table);
548  } else if (use_precomputed_table) {
549  pq.compute_inner_prod_table (qi, sim_table_2);
550  }
551  }
552 
553  /*****************************************************
554  * When inverted list is known: prepare computations
555  *****************************************************/
556 
557  // fields specific to list
558  Index::idx_t key;
559  float coarse_dis;
560  std::vector<uint8_t> q_code;
561 
562  uint64_t init_list_cycles;
563 
564  /// once we know the query and the centroid, we can prepare the
565  /// sim_table that will be used for accumulation
566  /// and dis0, the initial value
567  float precompute_list_tables () {
568  float dis0 = 0;
569  uint64_t t0; TIC;
570  if (by_residual) {
571  if (metric_type == METRIC_INNER_PRODUCT)
572  dis0 = precompute_list_tables_IP ();
573  else
574  dis0 = precompute_list_tables_L2 ();
575  }
576  init_list_cycles += TOC;
577  return dis0;
578  }
579 
580  float precompute_list_table_pointers () {
581  float dis0 = 0;
582  uint64_t t0; TIC;
583  if (by_residual) {
584  if (metric_type == METRIC_INNER_PRODUCT)
585  FAISS_ASSERT (!"not implemented");
586  else
587  dis0 = precompute_list_table_pointers_L2 ();
588  }
589  init_list_cycles += TOC;
590  return dis0;
591  }
592 
593  /*****************************************************
594  * compute tables for inner prod
595  *****************************************************/
596 
597  float precompute_list_tables_IP ()
598  {
599  // prepare the sim_table that will be used for accumulation
600  // and dis0, the initial value
601  ivfpq.quantizer->reconstruct (key, decoded_vec);
602  // decoded_vec = centroid
603  float dis0 = -fvec_inner_product (qi, decoded_vec, d);
604 
605  if (ivfpq.polysemous_ht) {
606  for (int i = 0; i < d; i++) {
607  residual_vec [i] = qi[i] - decoded_vec[i];
608  }
609  pq.compute_code (residual_vec, q_code.data());
610  }
611  return dis0;
612  }
613 
614 
615  /*****************************************************
616  * compute tables for L2 distance
617  *****************************************************/
618 
619  float precompute_list_tables_L2 ()
620  {
621  float dis0 = 0;
622 
623  if (use_precomputed_table == 0) {
624  ivfpq.quantizer->compute_residual (qi, residual_vec, key);
625  pq.compute_distance_table (residual_vec, sim_table);
626  } else if (use_precomputed_table == 1) {
627  dis0 = coarse_dis;
628 
629  fvec_madd (pq.M * pq.ksub,
630  &ivfpq.precomputed_table [key * pq.ksub * pq.M],
631  -2.0, sim_table_2,
632  sim_table);
633  } else if (use_precomputed_table == 2) {
634  dis0 = coarse_dis;
635 
636  const MultiIndexQuantizer *miq =
637  dynamic_cast<const MultiIndexQuantizer *> (ivfpq.quantizer);
638  FAISS_ASSERT (miq);
639  const ProductQuantizer &cpq = miq->pq;
640  int Mf = pq.M / cpq.M;
641 
642  const float *qtab = sim_table_2; // query-specific table
643  float *ltab = sim_table; // (output) list-specific table
644 
645  long k = key;
646  for (int cm = 0; cm < cpq.M; cm++) {
647  // compute PQ index
648  int ki = k & ((uint64_t(1) << cpq.nbits) - 1);
649  k >>= cpq.nbits;
650 
651  // get corresponding table
652  const float *pc = &ivfpq.precomputed_table
653  [(ki * pq.M + cm * Mf) * pq.ksub];
654 
655  if (ivfpq.polysemous_ht == 0) {
656 
657  // sum up with query-specific table
658  fvec_madd (Mf * pq.ksub,
659  pc,
660  -2.0, qtab,
661  ltab);
662  ltab += Mf * pq.ksub;
663  qtab += Mf * pq.ksub;
664  } else {
665  for (int m = cm * Mf; m < (cm + 1) * Mf; m++) {
666  q_code[m] = fvec_madd_and_argmin
667  (pq.ksub, pc, -2, qtab, ltab);
668  pc += pq.ksub;
669  ltab += pq.ksub;
670  qtab += pq.ksub;
671  }
672  }
673 
674  }
675  }
676 
677  return dis0;
678  }
679 
680  float precompute_list_table_pointers_L2 ()
681  {
682  float dis0 = 0;
683 
684  if (use_precomputed_table == 1) {
685  dis0 = coarse_dis;
686 
687  const float * s = &ivfpq.precomputed_table [key * pq.ksub * pq.M];
688  for (int m = 0; m < pq.M; m++) {
689  sim_table_ptrs [m] = s;
690  s += pq.ksub;
691  }
692  } else if (use_precomputed_table == 2) {
693  dis0 = coarse_dis;
694 
695  const MultiIndexQuantizer *miq =
696  dynamic_cast<const MultiIndexQuantizer *> (ivfpq.quantizer);
697  FAISS_ASSERT (miq);
698  const ProductQuantizer &cpq = miq->pq;
699  int Mf = pq.M / cpq.M;
700 
701  long k = key;
702  int m0 = 0;
703  for (int cm = 0; cm < cpq.M; cm++) {
704  int ki = k & ((uint64_t(1) << cpq.nbits) - 1);
705  k >>= cpq.nbits;
706 
707  const float *pc = &ivfpq.precomputed_table
708  [(ki * pq.M + cm * Mf) * pq.ksub];
709 
710  for (int m = m0; m < m0 + Mf; m++) {
711  sim_table_ptrs [m] = pc;
712  pc += pq.ksub;
713  }
714  m0 += Mf;
715  }
716  } else FAISS_ASSERT (!"need precomputed tables");
717 
718  if (ivfpq.polysemous_ht) {
719  FAISS_ASSERT (!"not implemented");
720  // Not clear that it makes sense to implemente this,
721  // because it costs M * ksub, which is what we wanted to
722  // avoid with the tables pointers.
723  }
724 
725  return dis0;
726  }
727 
728 
729 };
730 
731 
732 /*****************************************************
733  * Scaning the codes.
734  * The scanning functions call their favorite precompute_*
735  * function to precompute the tables they need.
736  *****************************************************/
737 template <typename IDType>
738 struct InvertedListScanner: QueryTables {
739 
740  const uint8_t * __restrict list_codes;
741  const IDType * list_ids;
742  size_t list_size;
743 
744  explicit InvertedListScanner (const IndexIVFPQ & ivfpq):
745  QueryTables (ivfpq)
746  {
747  FAISS_ASSERT(pq.byte_per_idx == 1);
748  n_hamming_pass = 0;
749  }
750 
751  /// list_specific intialization
752  void init_list (Index::idx_t key, float coarse_dis,
753  size_t list_size_in, const IDType *list_ids_in,
754  const uint8_t *list_codes_in) {
755  this->key = key;
756  this->coarse_dis = coarse_dis;
757  list_size = list_size_in;
758  list_codes = list_codes_in;
759  list_ids = list_ids_in;
760  }
761 
762  /*****************************************************
763  * Scaning the codes: simple PQ scan.
764  *****************************************************/
765 
766  /// version of the scan where we use precomputed tables
767  void scan_list_with_table (
768  size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
769  {
770  float dis0 = precompute_list_tables ();
771 
772  for (size_t j = 0; j < list_size; j++) {
773 
774  float dis = dis0;
775  const float *tab = sim_table;
776 
777  for (size_t m = 0; m < pq.M; m++) {
778  dis += tab[*list_codes++];
779  tab += pq.ksub;
780  }
781 
782  if (dis < heap_sim[0]) {
783  maxheap_pop (k, heap_sim, heap_ids);
784  long id = store_pairs ? (key << 32 | j) : list_ids[j];
785  maxheap_push (k, heap_sim, heap_ids, dis, id);
786  }
787  }
788  }
789 
790 
791  /// tables are not precomputed, but pointers are provided to the
792  /// relevant X_c|x_r tables
793  void scan_list_with_pointer (
794  size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
795  {
796 
797  float dis0 = precompute_list_table_pointers ();
798 
799  for (size_t j = 0; j < list_size; j++) {
800 
801  float dis = dis0;
802  const float *tab = sim_table_2;
803 
804  for (size_t m = 0; m < pq.M; m++) {
805  int ci = *list_codes++;
806  dis += sim_table_ptrs [m][ci] - 2 * tab [ci];
807  tab += pq.ksub;
808  }
809 
810  if (dis < heap_sim[0]) {
811  maxheap_pop (k, heap_sim, heap_ids);
812  long id = store_pairs ? (key << 32 | j) : list_ids[j];
813  maxheap_push (k, heap_sim, heap_ids, dis, id);
814  }
815  }
816 
817  }
818 
819  /// nothing is precomputed: access residuals on-the-fly
820  void scan_on_the_fly_dist (
821  size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
822  {
823 
824  if (by_residual && use_precomputed_table) {
825  scan_list_with_pointer (k, heap_sim, heap_ids, store_pairs);
826  return;
827  }
828 
829  const float *dvec;
830  float dis0 = 0;
831 
832  if (by_residual) {
833  if (metric_type == METRIC_INNER_PRODUCT) {
834  ivfpq.quantizer->reconstruct (key, residual_vec);
835  dis0 = fvec_inner_product (residual_vec, qi, d);
836  } else {
837  ivfpq.quantizer->compute_residual (qi, residual_vec, key);
838  }
839  dvec = residual_vec;
840  } else {
841  dvec = qi;
842  dis0 = 0;
843  }
844 
845  for (size_t j = 0; j < list_size; j++) {
846 
847  pq.decode (list_codes, decoded_vec);
848  list_codes += pq.code_size;
849 
850  float dis;
851  if (metric_type == METRIC_INNER_PRODUCT) {
852  dis = -dis0 - fvec_inner_product (decoded_vec, qi, d);
853  } else {
854  dis = fvec_L2sqr (decoded_vec, dvec, d);
855  }
856 
857  if (dis < heap_sim[0]) {
858  maxheap_pop (k, heap_sim, heap_ids);
859  long id = store_pairs ? (key << 32 | j) : list_ids[j];
860  maxheap_push (k, heap_sim, heap_ids, dis, id);
861  }
862  }
863  }
864 
865  /*****************************************************
866  * Scanning codes with polysemous filtering
867  *****************************************************/
868 
869  // code for the query
870  size_t n_hamming_pass;
871 
872 
873  template <class HammingComputer>
874  void scan_list_polysemous_hc (
875  size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
876  {
877  float dis0 = precompute_list_tables ();
878  int ht = ivfpq.polysemous_ht;
879 
880  int code_size = pq.code_size;
881 
882  HammingComputer hc (q_code.data(), code_size);
883 
884  for (size_t j = 0; j < list_size; j++) {
885  const uint8_t *b_code = list_codes;
886  int hd = hc.hamming (b_code);
887  if (hd < ht) {
888  n_hamming_pass ++;
889 
890  float dis = dis0;
891  const float *tab = sim_table;
892 
893  for (size_t m = 0; m < pq.M; m++) {
894  dis += tab[*b_code++];
895  tab += pq.ksub;
896  }
897 
898  if (dis < heap_sim[0]) {
899  maxheap_pop (k, heap_sim, heap_ids);
900  long id = store_pairs ? (key << 32 | j) : list_ids[j];
901  maxheap_push (k, heap_sim, heap_ids, dis, id);
902  }
903  }
904  list_codes += code_size;
905  }
906  }
907 
908  void scan_list_polysemous (
909  size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
910  {
911  switch (pq.code_size) {
912 #define HANDLE_CODE_SIZE(cs) \
913  case cs: \
914  scan_list_polysemous_hc <HammingComputer ## cs> \
915  (k, heap_sim, heap_ids, store_pairs); \
916  break
917  HANDLE_CODE_SIZE(4);
918  HANDLE_CODE_SIZE(8);
919  HANDLE_CODE_SIZE(16);
920  HANDLE_CODE_SIZE(20);
921  HANDLE_CODE_SIZE(32);
922  HANDLE_CODE_SIZE(64);
923 #undef HANDLE_CODE_SIZE
924  default:
925  if (pq.code_size % 8 == 0)
926  scan_list_polysemous_hc <HammingComputerM8>
927  (k, heap_sim, heap_ids, store_pairs);
928  else
929  scan_list_polysemous_hc <HammingComputerM4>
930  (k, heap_sim, heap_ids, store_pairs);
931  break;
932  }
933  }
934 
935 };
936 
937 
938 
939 
940 } // anonymous namespace
941 
942 
943 IndexIVFPQStats indexIVFPQ_stats;
944 
945 void IndexIVFPQStats::reset () {
946  memset (this, 0, sizeof (*this));
947 }
948 
949 
951  size_t nx,
952  const float * qx,
953  const long * keys,
954  const float * coarse_dis,
955  float_maxheap_array_t * res,
956  bool store_pairs) const
957 {
958  const size_t k = res->k;
959 
960 #pragma omp parallel
961  {
962  InvertedListScanner<long> qt (*this);
963  size_t stats_nlist = 0;
964  size_t stats_ncode = 0;
965  uint64_t init_query_cycles = 0;
966  uint64_t scan_cycles = 0;
967  uint64_t heap_cycles = 0;
968 
969 #pragma omp for
970  for (size_t i = 0; i < nx; i++) {
971  const float *qi = qx + i * d;
972  const long * keysi = keys + i * nprobe;
973  const float *coarse_dis_i = coarse_dis + i * nprobe;
974  float * heap_sim = res->get_val (i);
975  long * heap_ids = res->get_ids (i);
976 
977  uint64_t t0;
978  TIC;
979  maxheap_heapify (k, heap_sim, heap_ids);
980  heap_cycles += TOC;
981 
982  TIC;
983  qt.init_query (qi);
984  init_query_cycles += TOC;
985 
986  size_t nscan = 0;
987 
988  for (size_t ik = 0; ik < nprobe; ik++) {
989  long key = keysi[ik]; /* select the list */
990  if (key < 0) {
991  // not enough centroids for multiprobe
992  continue;
993  }
994  if (key >= (long) nlist) {
995  fprintf (stderr, "Invalid key=%ld nlist=%ld\n", key, nlist);
996  throw;
997  }
998  size_t list_size = ids[key].size();
999  stats_nlist ++;
1000  nscan += list_size;
1001 
1002  if (list_size == 0) continue;
1003 
1004  qt.init_list (key, coarse_dis_i[ik],
1005  list_size, ids[key].data(),
1006  codes[key].data());
1007 
1008  TIC;
1009  if (polysemous_ht > 0) {
1010  qt.scan_list_polysemous
1011  (k, heap_sim, heap_ids, store_pairs);
1012  } else if (list_size > scan_table_threshold) {
1013  qt.scan_list_with_table (k, heap_sim, heap_ids, store_pairs);
1014  } else {
1015  qt.scan_on_the_fly_dist (k, heap_sim, heap_ids, store_pairs);
1016  }
1017  scan_cycles += TOC;
1018 
1019  if (max_codes && nscan >= max_codes) break;
1020  }
1021  stats_ncode += nscan;
1022  TIC;
1023  maxheap_reorder (k, heap_sim, heap_ids);
1024 
1025  if (metric_type == METRIC_INNER_PRODUCT) {
1026  for (size_t j = 0; j < k; j++)
1027  heap_sim[j] = -heap_sim[j];
1028  }
1029  heap_cycles += TOC;
1030  }
1031 
1032 #pragma omp critical
1033  {
1034  indexIVFPQ_stats.n_hamming_pass += qt.n_hamming_pass;
1035  indexIVFPQ_stats.nlist += stats_nlist;
1036  indexIVFPQ_stats.ncode += stats_ncode;
1037 
1038  indexIVFPQ_stats.init_query_cycles += init_query_cycles;
1039  indexIVFPQ_stats.init_list_cycles += qt.init_list_cycles;
1040  indexIVFPQ_stats.scan_cycles += scan_cycles - qt.init_list_cycles;
1041  indexIVFPQ_stats.heap_cycles += heap_cycles;
1042  }
1043 
1044  }
1045  indexIVFPQ_stats.nq += nx;
1046 }
1047 
1048 
1049 void IndexIVFPQ::search (idx_t n, const float *x, idx_t k,
1050  float *distances, idx_t *labels) const
1051 {
1052  long * idx = new long [n * nprobe];
1053  float * coarse_dis = new float [n * nprobe];
1054  uint64_t t0;
1055  TIC;
1056  quantizer->search (n, x, nprobe, coarse_dis, idx);
1057  indexIVFPQ_stats.assign_cycles += TOC;
1058 
1059  TIC;
1060  float_maxheap_array_t res = { size_t(n), size_t(k), labels, distances};
1061 
1062  search_knn_with_key (n, x, idx, coarse_dis, &res);
1063  delete [] idx;
1064  delete [] coarse_dis;
1065  indexIVFPQ_stats.search_cycles += TOC;
1066 }
1067 
1068 
1070 {
1071  IndexIVF::reset();
1072  for (size_t key = 0; key < nlist; key++) {
1073  codes[key].clear();
1074  }
1075 }
1076 
1078 {
1079  FAISS_ASSERT (!maintain_direct_map ||
1080  !"direct map remove not implemented");
1081  long nremove = 0;
1082 #pragma omp parallel for reduction(+: nremove)
1083  for (long i = 0; i < nlist; i++) {
1084  std::vector<idx_t> & idsi = ids[i];
1085  uint8_t * codesi = codes[i].data();
1086 
1087  long l = idsi.size(), j = 0;
1088  while (j < l) {
1089  if (sel.is_member (idsi[j])) {
1090  l--;
1091  idsi [j] = idsi [l];
1092  memmove (codesi + j * code_size,
1093  codesi + l * code_size, code_size);
1094  } else {
1095  j++;
1096  }
1097  }
1098  if (l < idsi.size()) {
1099  nremove += idsi.size() - l;
1100  idsi.resize (l);
1101  codes[i].resize (l * code_size);
1102  }
1103  }
1104  ntotal -= nremove;
1105  return nremove;
1106 }
1107 
1108 
1109 IndexIVFPQ::IndexIVFPQ ()
1110 {
1111  // initialize some runtime values
1114  do_polysemous_training = false;
1115  polysemous_ht = 0;
1116  max_codes = 0;
1117  polysemous_training = nullptr;
1118 }
1119 
1120 
1121 struct CodeCmp {
1122  const uint8_t *tab;
1123  size_t code_size;
1124  bool operator () (int a, int b) const {
1125  return cmp (a, b) > 0;
1126  }
1127  int cmp (int a, int b) const {
1128  return memcmp (tab + a * code_size, tab + b * code_size,
1129  code_size);
1130  }
1131 };
1132 
1133 
1134 size_t IndexIVFPQ::find_duplicates (idx_t *dup_ids, size_t *lims) const
1135 {
1136  size_t ngroup = 0;
1137  lims[0] = 0;
1138  for (size_t list_no = 0; list_no < nlist; list_no++) {
1139  size_t n = ids[list_no].size();
1140  std::vector<int> ord (n);
1141  for (int i = 0; i < n; i++) ord[i] = i;
1142  CodeCmp cs = { codes[list_no].data(), code_size };
1143  std::sort (ord.begin(), ord.end(), cs);
1144 
1145  const idx_t *list_ids = ids[list_no].data();
1146  int prev = -1; // all elements from prev to i-1 are equal
1147  for (int i = 0; i < n; i++) {
1148  if (prev >= 0 && cs.cmp (ord [prev], ord [i]) == 0) {
1149  // same as previous => remember
1150  if (prev + 1 == i) { // start new group
1151  ngroup++;
1152  lims[ngroup] = lims[ngroup - 1];
1153  dup_ids [lims [ngroup]++] = list_ids [ord [prev]];
1154  }
1155  dup_ids [lims [ngroup]++] = list_ids [ord [i]];
1156  } else { // not same as previous.
1157  prev = i;
1158  }
1159  }
1160  }
1161  return ngroup;
1162 }
1163 
1164 
1165 
1166 
1167 /*****************************************
1168  * IndexIVFPQR implementation
1169  ******************************************/
1170 
1171 IndexIVFPQR::IndexIVFPQR (
1172  Index * quantizer, size_t d, size_t nlist,
1173  size_t M, size_t nbits_per_idx,
1174  size_t M_refine, size_t nbits_per_idx_refine):
1175  IndexIVFPQ (quantizer, d, nlist, M, nbits_per_idx),
1176  refine_pq (d, M_refine, nbits_per_idx_refine),
1177  k_factor (4)
1178 {
1179  by_residual = true;
1180  set_typename();
1181 }
1182 
1183 IndexIVFPQR::IndexIVFPQR ():
1184  k_factor (1)
1185 {
1186  by_residual = true;
1187 }
1188 
1189 
1190 void IndexIVFPQR::set_typename()
1191 {
1192  std::stringstream s;
1193  s << "IvfPQR_" << pq.M << "x" << pq.nbits
1194  << "+" << refine_pq.M << "x" << refine_pq.nbits
1195  << "[" << nlist << ":" << quantizer->index_typename << "]";
1196  index_typename = s.str();
1197 
1198 }
1199 
1200 
1202 {
1204  refine_codes.clear();
1205 }
1206 
1207 
1208 
1209 
1210 void IndexIVFPQR::train_residual (idx_t n, const float *x)
1211 {
1212 
1213  float * residual_2 = new float [n * d];
1214 
1215  train_residual_o (n, x, residual_2);
1216 
1217  if (verbose)
1218  printf ("training %zdx%zd 2nd level PQ quantizer on %ld %dD-vectors\n",
1219  refine_pq.M, refine_pq.ksub, n, d);
1220 
1222  refine_pq.cp.verbose = verbose;
1223 
1224  refine_pq.train (n, residual_2);
1225  delete [] residual_2;
1226 
1227 }
1228 
1229 
1230 void IndexIVFPQR::add_with_ids (idx_t n, const float *x, const long *xids) {
1231  add_core (n, x, xids, nullptr);
1232 }
1233 
1234 void IndexIVFPQR::add_core (idx_t n, const float *x, const long *xids,
1235  const long *precomputed_idx) {
1236 
1237  float * residual_2 = new float [n * d];
1238 
1239  idx_t n0 = ntotal;
1240 
1241  add_core_o (n, x, xids, residual_2, precomputed_idx);
1242 
1244 
1246  residual_2, &refine_codes[n0 * refine_pq.code_size], n);
1247 
1248  delete [] residual_2;
1249 
1250 }
1251 
1252 
1254  idx_t n, const float *x, idx_t k,
1255  float *distances, idx_t *labels) const
1256 {
1257  FAISS_ASSERT (is_trained);
1258  long * idx = new long [n * nprobe];
1259  float * L1_dis = new float [n * nprobe];
1260  uint64_t t0;
1261  TIC;
1262  quantizer->search (n, x, nprobe, L1_dis, idx);
1263  indexIVFPQ_stats.assign_cycles += TOC;
1264 
1265  TIC;
1266  size_t k_coarse = long(k * k_factor);
1267  idx_t *coarse_labels = new idx_t [k_coarse * n];
1268  { // query with quantizer levels 1 and 2.
1269  float *coarse_distances = new float [k_coarse * n];
1270 
1271  faiss::float_maxheap_array_t res_coarse = {
1272  size_t(n), k_coarse, coarse_labels, coarse_distances};
1273  search_knn_with_key (n, x, idx, L1_dis, &res_coarse, true);
1274  delete [] coarse_distances;
1275  }
1276  delete [] L1_dis;
1277  indexIVFPQ_stats.search_cycles += TOC;
1278 
1279  TIC;
1280 
1281  // 3rd level refinement
1282  size_t n_refine = 0;
1283 #pragma omp parallel reduction(+ : n_refine)
1284  {
1285  // tmp buffers
1286  float *residual_1 = new float [2 * d];
1287  float *residual_2 = residual_1 + d;
1288 #pragma omp for
1289  for (idx_t i = 0; i < n; i++) {
1290  const float *xq = x + i * d;
1291  const long * shortlist = coarse_labels + k_coarse * i;
1292  float * heap_sim = distances + k * i;
1293  long * heap_ids = labels + k * i;
1294  maxheap_heapify (k, heap_sim, heap_ids);
1295 
1296  for (int j = 0; j < k_coarse; j++) {
1297  long sl = shortlist[j];
1298 
1299  if (sl == -1) continue;
1300 
1301  int list_no = sl >> 32;
1302  int ofs = sl & 0xffffffff;
1303 
1304  assert (list_no >= 0 && list_no < nlist);
1305  assert (ofs >= 0 && ofs < ids[list_no].size());
1306 
1307  // 1st level residual
1308  quantizer->compute_residual (xq, residual_1, list_no);
1309 
1310  // 2nd level residual
1311  const uint8_t * l2code = &codes[list_no][ofs * pq.code_size];
1312  pq.decode (l2code, residual_2);
1313  for (int l = 0; l < d; l++)
1314  residual_2[l] = residual_1[l] - residual_2[l];
1315 
1316  // 3rd level residual's approximation
1317  idx_t id = ids[list_no][ofs];
1318  assert (0 <= id && id < ntotal);
1320  residual_1);
1321 
1322  float dis = fvec_L2sqr (residual_1, residual_2, d);
1323 
1324  if (dis < heap_sim[0]) {
1325  maxheap_pop (k, heap_sim, heap_ids);
1326  maxheap_push (k, heap_sim, heap_ids, dis, id);
1327  }
1328  n_refine ++;
1329  }
1330  maxheap_reorder (k, heap_sim, heap_ids);
1331  }
1332  delete [] residual_1;
1333  }
1334  delete [] coarse_labels;
1335  delete [] idx;
1336  indexIVFPQ_stats.nrefine += n_refine;
1337  indexIVFPQ_stats.refine_cycles += TOC;
1338 }
1339 
1340 void IndexIVFPQR::reconstruct_n (idx_t i0, idx_t ni, float *recons) const
1341 {
1342  float *r3 = new float [d];
1343 
1344  IndexIVFPQ::reconstruct_n (i0, ni, recons);
1345 
1346  for (idx_t i = i0; i < i0 + ni; i++) {
1347  float *r = recons + i * d;
1349 
1350  for (int j = 0; j < d; j++)
1351  r[j] += r3[j];
1352 
1353  }
1354  delete [] r3;
1355 }
1356 
1358 {
1359  IndexIVFPQR &other = dynamic_cast<IndexIVFPQR &> (other_in);
1361  refine_codes.insert (refine_codes.end(),
1362  other.refine_codes.begin(), other.refine_codes.end());
1363  other.refine_codes.clear();
1364 }
1365 
1367 {
1368  FAISS_ASSERT(!"not implemented");
1369 }
1370 
1371 /*****************************************
1372  * IndexIVFPQCompact implementation
1373  ******************************************/
1374 
1375 IndexIVFPQCompact::IndexIVFPQCompact ()
1376 {
1377  alloc_type = Alloc_type_none;
1378  limits = nullptr;
1379  compact_ids = nullptr;
1380  compact_codes = nullptr;
1381 }
1382 
1383 
1384 IndexIVFPQCompact::IndexIVFPQCompact (const IndexIVFPQ &other)
1385 {
1386  FAISS_ASSERT (other.ntotal < (1UL << 31) ||
1387  !"IndexIVFPQCompact cannot store more than 2G images");
1388 
1389  // here it would be more convenient to just use the
1390  // copy-constructor, but it would copy the lists as well: too much
1391  // overhead...
1392 
1393  // copy fields from Index
1394  d = other.d;
1395  ntotal = other.ntotal;
1396  verbose = other.verbose;
1397  is_trained = other.is_trained;
1398  metric_type = other.metric_type;
1399 
1400  // copy fields from IndexIVF (except ids)
1401  nlist = other.nlist;
1402  nprobe = other.nprobe;
1403  quantizer = other.quantizer;
1404  quantizer_trains_alone = other.quantizer_trains_alone;
1405  own_fields = false;
1406  direct_map = other.direct_map;
1407 
1408  // copy fields from IndexIVFPQ (except codes)
1409  by_residual = other.by_residual;
1410  use_precomputed_table = other.use_precomputed_table;
1411  precomputed_table = other.precomputed_table;
1412  code_size = other.code_size;
1413  pq = other.pq;
1414  do_polysemous_training = other.do_polysemous_training;
1415  polysemous_training = nullptr;
1416 
1417  scan_table_threshold = other.scan_table_threshold;
1418  max_codes = other.max_codes;
1419  polysemous_ht = other.polysemous_ht;
1420 
1421  //allocate
1422  alloc_type = Alloc_type_new;
1423  limits = new uint32_t [nlist + 1];
1424  compact_ids = new uint32_t [ntotal];
1425  compact_codes = new uint8_t [ntotal * code_size];
1426 
1427 
1428  // copy content from other
1429  size_t ofs = 0;
1430  for (size_t i = 0; i < nlist; i++) {
1431  limits [i] = ofs;
1432  const std::vector<long> &other_ids = other.ids[i];
1433  for (size_t j = 0; j < other_ids.size(); j++) {
1434  long id = other_ids[j];
1435  FAISS_ASSERT (id < (1UL << 31) ||
1436  !"IndexIVFPQCompact cannot store ids > 2G");
1437  compact_ids[ofs + j] = id;
1438  }
1439  memcpy (compact_codes + ofs * code_size,
1440  other.codes[i].data(),
1441  other.codes[i].size());
1442  ofs += other_ids.size();
1443  }
1444  FAISS_ASSERT (ofs == ntotal);
1445  limits [nlist] = ofs;
1446 
1447 }
1448 
1449 void IndexIVFPQCompact::add (idx_t, const float *) {
1450  FAISS_ASSERT (!"cannot add to an IndexIVFPQCompact");
1451 }
1452 
1454  FAISS_ASSERT (!"cannot reset an IndexIVFPQCompact");
1455 }
1456 
1457 void IndexIVFPQCompact::train (idx_t, const float *) {
1458  FAISS_ASSERT (!"cannot train an IndexIVFPQCompact");
1459 }
1460 
1461 
1462 
1463 
1464 IndexIVFPQCompact::~IndexIVFPQCompact ()
1465 {
1466  if (alloc_type == Alloc_type_new) {
1467  delete [] limits;
1468  delete [] compact_codes;
1469  delete [] compact_ids;
1470  } else if (alloc_type == Alloc_type_mmap) {
1471  munmap (mmap_buffer, mmap_length);
1472 
1473  }
1474 
1475 }
1476 
1478  size_t nx,
1479  const float * qx,
1480  const long * keys,
1481  const float * coarse_dis,
1482  float_maxheap_array_t * res,
1483  bool store_pairs) const
1484 {
1485  const size_t k = res->k;
1486 
1487 #pragma omp parallel
1488  {
1489  InvertedListScanner<uint32_t> qt (*this);
1490  size_t stats_nlist = 0;
1491  size_t stats_ncode = 0;
1492  uint64_t init_query_cycles = 0;
1493  uint64_t scan_cycles = 0;
1494  uint64_t heap_cycles = 0;
1495 
1496 #pragma omp for
1497  for (size_t i = 0; i < nx; i++) {
1498  const float *qi = qx + i * d;
1499  const long * keysi = keys + i * nprobe;
1500  const float *coarse_dis_i = coarse_dis + i * nprobe;
1501  float * heap_sim = res->get_val (i);
1502  long * heap_ids = res->get_ids (i);
1503 
1504  uint64_t t0;
1505  TIC;
1506  maxheap_heapify (k, heap_sim, heap_ids);
1507  heap_cycles += TOC;
1508 
1509  TIC;
1510  qt.init_query (qi);
1511  init_query_cycles += TOC;
1512 
1513  size_t nscan = 0;
1514 
1515  for (size_t ik = 0; ik < nprobe; ik++) {
1516  long key = keysi[ik]; /* select the list */
1517  if (key < 0) {
1518  // not enough centroids for multiprobe
1519  continue;
1520  }
1521  if (key >= (long) nlist) {
1522  fprintf (stderr, "Invalid key=%ld nlist=%ld\n", key, nlist);
1523  throw;
1524  }
1525  size_t list_size = limits[key + 1] - limits[key];
1526  stats_nlist ++;
1527  nscan += list_size;
1528 
1529  if (list_size == 0) continue;
1530 
1531  qt.init_list (key, coarse_dis_i[ik],
1532  list_size, compact_ids + limits[key],
1533  compact_codes + limits[key] * code_size);
1534 
1535  TIC;
1536  if (polysemous_ht > 0) {
1537  qt.scan_list_polysemous
1538  (k, heap_sim, heap_ids, store_pairs);
1539  } else if (list_size > scan_table_threshold) {
1540  qt.scan_list_with_table (k, heap_sim, heap_ids, store_pairs);
1541  } else {
1542  qt.scan_on_the_fly_dist (k, heap_sim, heap_ids, store_pairs);
1543  }
1544  scan_cycles += TOC;
1545 
1546  if (max_codes && nscan >= max_codes) break;
1547  }
1548  stats_ncode += nscan;
1549  TIC;
1550  maxheap_reorder (k, heap_sim, heap_ids);
1551 
1552  if (metric_type == METRIC_INNER_PRODUCT) {
1553  for (size_t j = 0; j < k; j++) {
1554  heap_sim[i] = -heap_sim[i];
1555  }
1556  }
1557  heap_cycles += TOC;
1558  }
1559 
1560 #pragma omp critical
1561  {
1562  indexIVFPQ_stats.n_hamming_pass += qt.n_hamming_pass;
1563  indexIVFPQ_stats.nlist += stats_nlist;
1564  indexIVFPQ_stats.ncode += stats_ncode;
1565 
1566  indexIVFPQ_stats.init_query_cycles += init_query_cycles;
1567  indexIVFPQ_stats.init_list_cycles += qt.init_list_cycles;
1568  indexIVFPQ_stats.scan_cycles += scan_cycles - qt.init_list_cycles;
1569  indexIVFPQ_stats.heap_cycles += heap_cycles;
1570  }
1571 
1572  }
1573  indexIVFPQ_stats.nq += nx;
1574 }
1575 
1576 
1577 
1578 } // namespace faiss
uint32_t * compact_ids
size ntotal
Definition: IndexIVFPQ.h:249
uint8_t * compact_codes
size ntotal * code_size
Definition: IndexIVFPQ.h:250
void precompute_table()
build precomputed table
Definition: IndexIVFPQ.cpp:380
void copy_subset_to(IndexIVFPQ &other, int subset_type, long a1, long a2) const
Definition: IndexIVFPQ.cpp:321
size_t nbits
number of bits per quantization index
virtual void reconstruct(idx_t key, float *recons) const override
Definition: IndexIVFPQ.cpp:291
void decode(const uint8_t *code, float *x) const
decode a vector from a given code (or n vectors if third argument)
ProductQuantizer refine_pq
3rd level quantizer
Definition: IndexIVFPQ.h:190
float fvec_L2sqr(const float *x, const float *y, size_t d)
Squared L2 distance between two vectors.
Definition: utils.cpp:430
PolysemousTraining * polysemous_training
if NULL, use default
Definition: IndexIVFPQ.h:37
T * get_val(size_t key)
Return the list of values for a heap.
Definition: Heap.h:361
virtual void add(idx_t, const float *) override
the three following functions will fail at runtime
virtual void search_knn_with_key(size_t nx, const float *qx, const long *keys, const float *coarse_dis, float_maxheap_array_t *res, bool store_pairs=false) const override
virtual void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
const float * fvecs_maybe_subsample(size_t d, size_t *n, size_t nmax, const float *x, bool verbose, long seed)
Definition: utils.cpp:1793
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:48
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:24
bool quantizer_trains_alone
just pass over the trainset to quantizer
Definition: IndexIVF.h:51
virtual void set_typename() override
Definition: IndexIVF.cpp:207
virtual void merge_from_residuals(IndexIVF &other) override
used to implement merging
Definition: IndexIVFPQ.cpp:311
void train_residual_o(idx_t n, const float *x, float *residuals_2)
same as train_residual, also output 2nd level residuals
Definition: IndexIVFPQ.cpp:83
bool do_polysemous_training
reorder PQ centroids after training?
Definition: IndexIVFPQ.h:36
size_t scan_table_threshold
use table computation or on-the-fly?
Definition: IndexIVFPQ.h:40
size_t k
allocated size per heap
Definition: Heap.h:356
virtual void train_residual(idx_t n, const float *x) override
trains the two product quantizers
void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx=nullptr)
same as add_with_ids, but optionally use the precomputed list ids
uint32_t * limits
size nlist + 1
Definition: IndexIVFPQ.h:248
size_t dsub
dimensionality of each subvector
int seed
seed for the random number generator
Definition: Clustering.h:36
std::vector< float > precomputed_table
Definition: IndexIVFPQ.h:48
void fvec_madd(size_t n, const float *a, float bf, const float *b, float *c)
Definition: utils.cpp:1707
int polysemous_ht
Hamming thresh for polysemous filtering.
Definition: IndexIVFPQ.h:42
virtual void search_knn_with_key(size_t nx, const float *qx, const long *keys, const float *coarse_dis, float_maxheap_array_t *res, bool store_pairs=false) const
Definition: IndexIVFPQ.cpp:950
virtual void reset() override
removes all elements from the database.
virtual void add_with_ids(idx_t n, const float *x, const long *xids=nullptr) override
Definition: IndexIVFPQ.cpp:174
std::vector< std::vector< long > > ids
Inverted lists for indexes.
Definition: IndexIVF.h:56
void compute_codes(const float *x, uint8_t *codes, size_t n) const
same as compute_code for several vectors
int d
vector dimension
Definition: Index.h:66
Index * quantizer
quantizer that maps vectors to inverted lists
Definition: IndexIVF.h:50
size_t max_codes
max nb of codes to visit to do a query
Definition: IndexIVFPQ.h:41
std::vector< uint8_t > refine_codes
corresponding codes
Definition: IndexIVFPQ.h:191
size_t code_size
byte per indexed vector
virtual long remove_ids(const IDSelector &sel) override
virtual void train_residual(idx_t n, const float *x) override
trains the product quantizer
Definition: IndexIVFPQ.cpp:77
virtual void train(idx_t, const float *) override
Trains the quantizer and calls train_residual to train sub-quantizers.
bool own_fields
whether object owns the quantizer
Definition: IndexIVF.h:52
size_t ksub
number of centroids for each subquantizer
long idx_t
all indices are this type
Definition: Index.h:64
void compute_code(const float *x, uint8_t *code) const
Quantize one vector with the product quantizer.
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
virtual void reset() override
removes all elements from the database.
bool verbose
verbosity level
Definition: Index.h:68
virtual void reset() override
removes all elements from the database.
Definition: IndexIVF.cpp:87
double getmillisecs()
ms elapsed since some arbitrary epoch
Definition: utils.cpp:71
optimizes the order of indices in a ProductQuantizer
float fvec_norm_L2sqr(const float *x, size_t d)
Definition: utils.cpp:511
ClusteringParameters cp
parameters used during clustering
virtual void merge_from_residuals(IndexIVF &other) override
used to implement merging
bool by_residual
Encode residual or plain vector?
Definition: IndexIVFPQ.h:31
TI * get_ids(size_t key)
Correspponding identifiers.
Definition: Heap.h:364
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:74
ProductQuantizer pq
produces the codes
Definition: IndexIVFPQ.h:34
size_t M
number of subquantizers
size_t nlist
number of possible key values
Definition: IndexIVF.h:47
virtual void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
Definition: IndexIVFPQ.cpp:262
void add_core_o(idx_t n, const float *x, const long *xids, float *residuals_2, const long *precomputed_idx=nullptr)
Definition: IndexIVFPQ.cpp:180
int fvec_madd_and_argmin(size_t n, const float *a, float bf, const float *b, float *c)
Definition: utils.cpp:1781
size_t code_size
code size per vector in bytes
Definition: IndexIVFPQ.h:33
virtual long remove_ids(const IDSelector &sel) override
virtual void reset() override
removes all elements from the database.
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:71
float * get_centroids(size_t m, size_t i)
return the centroids associated with subvector m
void encode_multiple(size_t n, const long *keys, const float *x, uint8_t *codes) const
same as encode, for multiple points at once
Definition: IndexIVFPQ.cpp:157
bool maintain_direct_map
map for direct access to the elements. Enables reconstruct().
Definition: IndexIVF.h:59
void optimize_pq_for_hamming(ProductQuantizer &pq, size_t n, const float *x) const
int max_points_per_centroid
to limit size of dataset
Definition: Clustering.h:34
bool verbose
verbose during training?
virtual void add_with_ids(idx_t n, const float *x, const long *xids) override
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
size_t find_duplicates(idx_t *ids, size_t *lims) const
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:44
float k_factor
factor between k requested in search and the k requested from the IVFPQ
Definition: IndexIVFPQ.h:194
int use_precomputed_table
if by_residual, build precompute tables
Definition: IndexIVFPQ.h:32