Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/IndexIVFPQ.cpp
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 /* Copyright 2004-present Facebook. All Rights Reserved.
11  Inverted list structure.
12 */
13 
14 #include "IndexIVFPQ.h"
15 
16 #include <cmath>
17 #include <cstdio>
18 #include <cassert>
19 
20 #include <sys/mman.h>
21 
22 #include <algorithm>
23 
24 #include "Heap.h"
25 #include "utils.h"
26 
27 #include "Clustering.h"
28 #include "IndexFlat.h"
29 
30 #include "hamming.h"
31 
32 #include "FaissAssert.h"
33 
34 #include "AuxIndexStructures.h"
35 
36 namespace faiss {
37 
38 
39 
40 
41 
42 /*****************************************
43  * IndexIVFPQ implementation
44  ******************************************/
45 
46 IndexIVFPQ::IndexIVFPQ (Index * quantizer, size_t d, size_t nlist,
47  size_t M, size_t nbits_per_idx):
48  IndexIVF (quantizer, d, nlist, METRIC_L2),
49  pq (d, M, nbits_per_idx)
50 {
51  FAISS_ASSERT (nbits_per_idx <= 8);
52  code_size = pq.code_size;
53  is_trained = false;
54  codes.resize (nlist);
55  by_residual = true;
56  use_precomputed_table = 0;
57  scan_table_threshold = 0;
58  max_codes = 0; // means unlimited
59 
60  polysemous_training = nullptr;
61  do_polysemous_training = false;
62  polysemous_ht = 0;
63 
64  set_typename();
65 }
66 
67 
68 
69 void IndexIVFPQ::set_typename ()
70 {
71  std::stringstream s;
72  s << "IvfPQ_" << pq.M << "x" << pq.nbits
73  << "[" << nlist << ":" << quantizer->index_typename << "]";
74  index_typename = s.str();
75 }
76 
77 
78 void IndexIVFPQ::train_residual (idx_t n, const float *x)
79 {
80  train_residual_o (n, x, nullptr);
81 }
82 
83 
84 void IndexIVFPQ::train_residual_o (idx_t n, const float *x, float *residuals_2)
85 {
86  const float * x_in = x;
87 
89  d, (size_t*)&n, pq.cp.max_points_per_centroid * pq.ksub,
90  x, verbose, pq.cp.seed);
91 
92  const float *trainset;
93  if (by_residual) {
94  if(verbose) printf("computing residuals\n");
95  idx_t * assign = new idx_t [n]; // assignement to coarse centroids
96  quantizer->assign (n, x, assign);
97  float *residuals = new float [n * d];
98  for (idx_t i = 0; i < n; i++)
99  quantizer->compute_residual (x + i * d, residuals+i*d, assign[i]);
100  delete [] assign;
101  trainset = residuals;
102  } else {
103  trainset = x;
104  }
105  if (verbose)
106  printf ("training %zdx%zd product quantizer on %ld vectors in %dD\n",
107  pq.M, pq.ksub, n, d);
108  pq.verbose = verbose;
109  pq.train (n, trainset);
110 
112  PolysemousTraining default_pt;
114  if (!pt) pt = &default_pt;
115  pt->optimize_pq_for_hamming (pq, n, trainset);
116  }
117 
118  // prepare second-level residuals for refine PQ
119  if (residuals_2) {
120  uint8_t *train_codes = new uint8_t [pq.code_size * n];
121  pq.compute_codes (trainset, train_codes, n);
122 
123  for (idx_t i = 0; i < n; i++) {
124  const float *xx = trainset + i * d;
125  float * res = residuals_2 + i * d;
126  pq.decode (train_codes + i * pq.code_size, res);
127  for (int j = 0; j < d; j++)
128  res[j] = xx[j] - res[j];
129  }
130 
131  delete [] train_codes;
132  }
133 
134  if (by_residual) {
135  delete [] trainset;
136  precompute_table ();
137  }
138 
139  if (x_in != x) delete [] x;
140 }
141 
142 
143 /* produce a binary signature based on the residual vector */
144 void IndexIVFPQ::encode (long key, const float * x, uint8_t * code) const
145 {
146  if (by_residual) {
147  float residual_vec[d];
148  quantizer->compute_residual (x, residual_vec, key);
149  pq.compute_code (residual_vec, code);
150  }
151  else pq.compute_code (x, code);
152 }
153 
154 
155 
156 
157 
158 void IndexIVFPQ::encode_multiple (size_t n, long *keys,
159  const float * x, uint8_t * xcodes,
160  bool compute_keys) const
161 {
162  if (compute_keys)
163  quantizer->assign (n, x, keys);
164 
165  if (by_residual) {
166  float *residuals = new float [n * d];
167  // TODO: parallelize?
168  for (size_t i = 0; i < n; i++)
169  quantizer->compute_residual (x + i * d, residuals + i * d, keys[i]);
170  pq.compute_codes (residuals, xcodes, n);
171  delete [] residuals;
172  } else {
173  pq.compute_codes (x, xcodes, n);
174  }
175 }
176 
177 void IndexIVFPQ::decode_multiple (size_t n, const long *keys,
178  const uint8_t * xcodes, float * x) const
179 {
180  pq.decode (xcodes, x, n);
181  if (by_residual) {
182  std::vector<float> centroid (d);
183  for (size_t i = 0; i < n; i++) {
184  quantizer->reconstruct (keys[i], centroid.data());
185  float *xi = x + i * d;
186  for (size_t j = 0; j < d; j++) {
187  xi [j] += centroid [j];
188  }
189  }
190  }
191 }
192 
193 
194 void IndexIVFPQ::add_with_ids (idx_t n, const float * x, const long *xids)
195 {
196  add_core_o (n, x, xids, nullptr);
197 }
198 
199 
200 void IndexIVFPQ::add_core_o (idx_t n, const float * x, const long *xids,
201  float *residuals_2, const long *precomputed_idx)
202 {
203  FAISS_ASSERT (is_trained);
204  double t0 = getmillisecs ();
205  const long * idx;
206 
207  if (precomputed_idx) {
208  idx = precomputed_idx;
209  } else {
210  long * idx0 = new long [n];
211  quantizer->assign (n, x, idx0);
212  idx = idx0;
213  }
214 
215  double t1 = getmillisecs ();
216  uint8_t * xcodes = new uint8_t [n * code_size];
217 
218  const float *to_encode = nullptr;
219 
220  if (by_residual) {
221  float *residuals = new float [n * d];
222  // TODO: parallelize?
223  for (size_t i = 0; i < n; i++) {
224  if (idx[i] < 0)
225  memset (residuals + i * d, 0, sizeof(*residuals) * d);
226  else
227  quantizer->compute_residual (
228  x + i * d, residuals + i * d, idx[i]);
229  }
230  to_encode = residuals;
231  } else {
232  to_encode = x;
233  }
234  pq.compute_codes (to_encode, xcodes, n);
235 
236  double t2 = getmillisecs ();
237  // TODO: parallelize?
238  size_t n_ignore = 0;
239  for (size_t i = 0; i < n; i++) {
240  idx_t key = idx[i];
241  if (key < 0) {
242  n_ignore ++;
243  if (residuals_2)
244  memset (residuals_2, 0, sizeof(*residuals_2) * d);
245  continue;
246  }
247  idx_t id = xids ? xids[i] : ntotal + i;
248  ids[key].push_back (id);
249  uint8_t *code = xcodes + i * code_size;
250  for (size_t j = 0; j < code_size; j++)
251  codes[key].push_back (code[j]);
252 
253  if (residuals_2) {
254  float *res2 = residuals_2 + i * d;
255  const float *xi = to_encode + i * d;
256  pq.decode (code, res2);
257  for (int j = 0; j < d; j++)
258  res2[j] = xi[j] - res2[j];
259  }
260 
262  direct_map.push_back (key << 32 | (ids[key].size() - 1));
263  }
264 
265  if (by_residual)
266  delete [] to_encode;
267 
268  delete [] xcodes;
269  if (!precomputed_idx)
270  delete [] idx;
271  double t3 = getmillisecs ();
272  if(verbose) {
273  char comment[100] = {0};
274  if (n_ignore > 0)
275  snprintf (comment, 100, "(%ld vectors ignored)", n_ignore);
276  printf(" add_core times: %.3f %.3f %.3f %s\n",
277  t1 - t0, t2 - t1, t3 - t2, comment);
278  }
279  ntotal += n;
280 }
281 
282 void IndexIVFPQ::reconstruct_n (idx_t i0, idx_t ni, float *recons) const
283 {
284  FAISS_ASSERT (ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
285 
286  std::vector<float> centroid (d);
287 
288  for (int key = 0; key < nlist; key++) {
289  const std::vector<long> & idlist = ids[key];
290  const uint8_t * code_line = codes[key].data();
291 
292  for (long ofs = 0; ofs < idlist.size(); ofs++) {
293  long id = idlist[ofs];
294  if (!(id >= i0 && id < i0 + ni)) continue;
295  float *r = recons + d * (id - i0);
296  if (by_residual) {
297  quantizer->reconstruct (key, centroid.data());
298  pq.decode (code_line + ofs * pq.code_size, r);
299  for (int j = 0; j < d; j++) {
300  r[j] += centroid[j];
301  }
302  }
303  else {
304  pq.decode (code_line + ofs * pq.code_size, r);
305  }
306  }
307  }
308 }
309 
310 
311 void IndexIVFPQ::reconstruct (idx_t key, float * recons) const
312 {
313  FAISS_ASSERT (direct_map.size() == ntotal);
314  int list_no = direct_map[key] >> 32;
315  int ofs = direct_map[key] & 0xffffffff;
316 
317  quantizer->reconstruct (list_no, recons);
318  const uint8_t * code = &(codes[list_no][ofs * pq.code_size]);
319 
320  for (size_t m = 0; m < pq.M; m++) {
321  float * out = recons + m * pq.dsub;
322  const float * cent = pq.get_centroids (m, code[m]);
323  for (size_t i = 0; i < pq.dsub; i++) {
324  out[i] += cent[i];
325  }
326  }
327 }
328 
329 
330 
332 {
333  IndexIVFPQ &other = dynamic_cast<IndexIVFPQ &> (other_in);
334  for (int i = 0; i < nlist; i++) {
335  codes[i].insert (codes[i].end(),
336  other.codes[i].begin(), other.codes[i].end());
337  other.codes[i].clear();
338  }
339 }
340 
341 void IndexIVFPQ::copy_subset_to (IndexIVFPQ & other, int subset_type,
342  long a1, long a2) const
343 {
344  FAISS_ASSERT (nlist == other.nlist);
345  FAISS_ASSERT (!other.maintain_direct_map);
346  size_t code_size = pq.code_size;
347  for (long list_no = 0; list_no < nlist; list_no++) {
348  const std::vector<idx_t> & ids_in = ids[list_no];
349  std::vector<idx_t> & ids_out = other.ids[list_no];
350  const std::vector<uint8_t> & codes_in = codes[list_no];
351  std::vector<uint8_t> & codes_out = other.codes[list_no];
352 
353  for (long i = 0; i < ids_in.size(); i++) {
354  idx_t id = ids_in[i];
355  if (subset_type == 0 && a1 <= id && id < a2) {
356  ids_out.push_back (id);
357  codes_out.insert (codes_out.end(),
358  codes_in.begin() + i * code_size,
359  codes_in.begin() + (i + 1) * code_size);
360  other.ntotal++;
361  }
362  }
363  }
364 }
365 
366 
367 
368 
369 
370 /** Precomputed tables for residuals
371  *
372  * During IVFPQ search with by_residual, we compute
373  *
374  * d = || x - y_C - y_R ||^2
375  *
376  * where x is the query vector, y_C the coarse centroid, y_R the
377  * refined PQ centroid. The expression can be decomposed as:
378  *
379  * d = || x - y_C ||^2 + || y_R ||^2 + 2 * (y_C|y_R) - 2 * (x|y_R)
380  * --------------- --------------------------- -------
381  * term 1 term 2 term 3
382  *
383  * When using multiprobe, we use the following decomposition:
384  * - term 1 is the distance to the coarse centroid, that is computed
385  * during the 1st stage search.
386  * - term 2 can be precomputed, as it does not involve x. However,
387  * because of the PQ, it needs nlist * M * ksub storage. This is why
388  * use_precomputed_table is off by default
389  * - term 3 is the classical non-residual distance table.
390  *
391  * Since y_R defined by a product quantizer, it is split across
392  * subvectors and stored separately for each subvector. If the coarse
393  * quantizer is a MultiIndexQuantizer then the table can be stored
394  * more compactly.
395  *
396  * At search time, the tables for term 2 and term 3 are added up. This
397  * is faster when the length of the lists is > ksub * M.
398  */
399 
401 {
402 
403 
404  if (use_precomputed_table == 0) { // then choose the type of table
405  if (quantizer->metric_type == METRIC_INNER_PRODUCT) {
406  fprintf(stderr, "IndexIVFPQ::precompute_table: WARN precomputed "
407  "tables not supported for inner product quantizers\n");
408  return;
409  }
410  const MultiIndexQuantizer *miq =
411  dynamic_cast<const MultiIndexQuantizer *> (quantizer);
412  if (miq && pq.M % miq->pq.M == 0)
414  else
416  } // otherwise assume user has set appropriate flag on input
417 
418 
419  // squared norms of the PQ centroids
420  std::vector<float> r_norms (pq.M * pq.ksub, NAN);
421  for (int m = 0; m < pq.M; m++)
422  for (int j = 0; j < pq.ksub; j++)
423  r_norms [m * pq.ksub + j] =
425 
426  if (use_precomputed_table == 1) {
427 
428  precomputed_table.resize (nlist * pq.M * pq.ksub);
429  std::vector<float> centroid (d);
430 
431  for (size_t i = 0; i < nlist; i++) {
432  quantizer->reconstruct (i, centroid.data());
433 
434  float *tab = &precomputed_table[i * pq.M * pq.ksub];
435  pq.compute_inner_prod_table (centroid.data(), tab);
436  fvec_madd (pq.M * pq.ksub, r_norms.data(), 2.0, tab, tab);
437  }
438  } else if (use_precomputed_table == 2) {
439  const MultiIndexQuantizer *miq =
440  dynamic_cast<const MultiIndexQuantizer *> (quantizer);
441  FAISS_ASSERT (miq);
442  const ProductQuantizer &cpq = miq->pq;
443  FAISS_ASSERT (pq.M % cpq.M == 0);
444 
445  precomputed_table.resize(cpq.ksub * pq.M * pq.ksub);
446 
447  // reorder PQ centroid table
448  std::vector<float> centroids (d * cpq.ksub, NAN);
449 
450  for (int m = 0; m < cpq.M; m++) {
451  for (size_t i = 0; i < cpq.ksub; i++) {
452  memcpy (centroids.data() + i * d + m * cpq.dsub,
453  cpq.get_centroids (m, i),
454  sizeof (*centroids.data()) * cpq.dsub);
455  }
456  }
457 
458  pq.compute_inner_prod_tables (cpq.ksub, centroids.data (),
459  precomputed_table.data ());
460 
461  for (size_t i = 0; i < cpq.ksub; i++) {
462  float *tab = &precomputed_table[i * pq.M * pq.ksub];
463  fvec_madd (pq.M * pq.ksub, r_norms.data(), 2.0, tab, tab);
464  }
465 
466  }
467 }
468 
469 namespace {
470 
471 static uint64_t get_cycles () {
472  uint32_t high, low;
473  asm volatile("rdtsc \n\t"
474  : "=a" (low),
475  "=d" (high));
476  return ((uint64_t)high << 32) | (low);
477 }
478 
479 #define TIC t0 = get_cycles()
480 #define TOC get_cycles () - t0
481 
482 
483 
484 /** QueryTables manages the various ways of searching an
485  * IndexIVFPQ. The code contains a lot of branches, depending on:
486  * - metric_type: are we computing L2 or Inner product similarity?
487  * - by_residual: do we encode raw vectors or residuals?
488  * - use_precomputed_table: are x_R|x_C tables precomputed?
489  * - polysemous_ht: are we filtering with polysemous codes?
490  */
491 struct QueryTables {
492 
493  /*****************************************************
494  * General data from the IVFPQ
495  *****************************************************/
496 
497  const IndexIVFPQ & ivfpq;
498 
499  // copied from IndexIVFPQ for easier access
500  int d;
501  const ProductQuantizer & pq;
502  MetricType metric_type;
503  bool by_residual;
504  int use_precomputed_table;
505 
506  // pre-allocated data buffers
507  float * sim_table, * sim_table_2;
508  float * residual_vec, *decoded_vec;
509 
510  // single data buffer
511  std::vector<float> mem;
512 
513  // for table pointers
514  std::vector<const float *> sim_table_ptrs;
515 
516  explicit QueryTables (const IndexIVFPQ & ivfpq):
517  ivfpq(ivfpq),
518  d(ivfpq.d),
519  pq (ivfpq.pq),
520  metric_type (ivfpq.metric_type),
521  by_residual (ivfpq.by_residual),
522  use_precomputed_table (ivfpq.use_precomputed_table)
523  {
524  mem.resize (pq.ksub * pq.M * 2 + d *2);
525  sim_table = mem.data();
526  sim_table_2 = sim_table + pq.ksub * pq.M;
527  residual_vec = sim_table_2 + pq.ksub * pq.M;
528  decoded_vec = residual_vec + d;
529 
530  // for polysemous
531  if (ivfpq.polysemous_ht != 0) {
532  q_code.resize (pq.code_size);
533  }
534  init_list_cycles = 0;
535  sim_table_ptrs.resize (pq.M);
536  }
537 
538  /*****************************************************
539  * What we do when query is known
540  *****************************************************/
541 
542  // field specific to query
543  const float * qi;
544 
545  // query-specific intialization
546  void init_query (const float * qi) {
547  this->qi = qi;
548  if (metric_type == METRIC_INNER_PRODUCT)
549  init_query_IP ();
550  else
551  init_query_L2 ();
552  if (!by_residual && ivfpq.polysemous_ht != 0)
553  pq.compute_code (qi, q_code.data());
554  }
555 
556  void init_query_IP () {
557  // precompute some tables specific to the query qi
558  pq.compute_inner_prod_table (qi, sim_table);
559  // we compute negated inner products for use with the maxheap
560  for (int i = 0; i < pq.ksub * pq.M; i++) {
561  sim_table[i] = - sim_table[i];
562  }
563  }
564 
565  void init_query_L2 () {
566  if (!by_residual) {
567  pq.compute_distance_table (qi, sim_table);
568  } else if (use_precomputed_table) {
569  pq.compute_inner_prod_table (qi, sim_table_2);
570  }
571  }
572 
573  /*****************************************************
574  * When inverted list is known: prepare computations
575  *****************************************************/
576 
577  // fields specific to list
578  Index::idx_t key;
579  float coarse_dis;
580  std::vector<uint8_t> q_code;
581 
582  uint64_t init_list_cycles;
583 
584  /// once we know the query and the centroid, we can prepare the
585  /// sim_table that will be used for accumulation
586  /// and dis0, the initial value
587  float precompute_list_tables () {
588  float dis0 = 0;
589  uint64_t t0; TIC;
590  if (by_residual) {
591  if (metric_type == METRIC_INNER_PRODUCT)
592  dis0 = precompute_list_tables_IP ();
593  else
594  dis0 = precompute_list_tables_L2 ();
595  }
596  init_list_cycles += TOC;
597  return dis0;
598  }
599 
600  float precompute_list_table_pointers () {
601  float dis0 = 0;
602  uint64_t t0; TIC;
603  if (by_residual) {
604  if (metric_type == METRIC_INNER_PRODUCT)
605  FAISS_ASSERT (!"not implemented");
606  else
607  dis0 = precompute_list_table_pointers_L2 ();
608  }
609  init_list_cycles += TOC;
610  return dis0;
611  }
612 
613  /*****************************************************
614  * compute tables for inner prod
615  *****************************************************/
616 
617  float precompute_list_tables_IP ()
618  {
619  // prepare the sim_table that will be used for accumulation
620  // and dis0, the initial value
621  ivfpq.quantizer->reconstruct (key, decoded_vec);
622  // decoded_vec = centroid
623  float dis0 = -fvec_inner_product (qi, decoded_vec, d);
624 
625  if (ivfpq.polysemous_ht) {
626  for (int i = 0; i < d; i++) {
627  residual_vec [i] = qi[i] - decoded_vec[i];
628  }
629  pq.compute_code (residual_vec, q_code.data());
630  }
631  return dis0;
632  }
633 
634 
635  /*****************************************************
636  * compute tables for L2 distance
637  *****************************************************/
638 
639  float precompute_list_tables_L2 ()
640  {
641  float dis0 = 0;
642 
643  if (use_precomputed_table == 0) {
644  ivfpq.quantizer->compute_residual (qi, residual_vec, key);
645  pq.compute_distance_table (residual_vec, sim_table);
646  } else if (use_precomputed_table == 1) {
647  dis0 = coarse_dis;
648 
649  fvec_madd (pq.M * pq.ksub,
650  &ivfpq.precomputed_table [key * pq.ksub * pq.M],
651  -2.0, sim_table_2,
652  sim_table);
653  } else if (use_precomputed_table == 2) {
654  dis0 = coarse_dis;
655 
656  const MultiIndexQuantizer *miq =
657  dynamic_cast<const MultiIndexQuantizer *> (ivfpq.quantizer);
658  FAISS_ASSERT (miq);
659  const ProductQuantizer &cpq = miq->pq;
660  int Mf = pq.M / cpq.M;
661 
662  const float *qtab = sim_table_2; // query-specific table
663  float *ltab = sim_table; // (output) list-specific table
664 
665  long k = key;
666  for (int cm = 0; cm < cpq.M; cm++) {
667  // compute PQ index
668  int ki = k & ((uint64_t(1) << cpq.nbits) - 1);
669  k >>= cpq.nbits;
670 
671  // get corresponding table
672  const float *pc = &ivfpq.precomputed_table
673  [(ki * pq.M + cm * Mf) * pq.ksub];
674 
675  if (ivfpq.polysemous_ht == 0) {
676 
677  // sum up with query-specific table
678  fvec_madd (Mf * pq.ksub,
679  pc,
680  -2.0, qtab,
681  ltab);
682  ltab += Mf * pq.ksub;
683  qtab += Mf * pq.ksub;
684  } else {
685  for (int m = cm * Mf; m < (cm + 1) * Mf; m++) {
686  q_code[m] = fvec_madd_and_argmin
687  (pq.ksub, pc, -2, qtab, ltab);
688  pc += pq.ksub;
689  ltab += pq.ksub;
690  qtab += pq.ksub;
691  }
692  }
693 
694  }
695  }
696 
697  return dis0;
698  }
699 
700  float precompute_list_table_pointers_L2 ()
701  {
702  float dis0 = 0;
703 
704  if (use_precomputed_table == 1) {
705  dis0 = coarse_dis;
706 
707  const float * s = &ivfpq.precomputed_table [key * pq.ksub * pq.M];
708  for (int m = 0; m < pq.M; m++) {
709  sim_table_ptrs [m] = s;
710  s += pq.ksub;
711  }
712  } else if (use_precomputed_table == 2) {
713  dis0 = coarse_dis;
714 
715  const MultiIndexQuantizer *miq =
716  dynamic_cast<const MultiIndexQuantizer *> (ivfpq.quantizer);
717  FAISS_ASSERT (miq);
718  const ProductQuantizer &cpq = miq->pq;
719  int Mf = pq.M / cpq.M;
720 
721  long k = key;
722  int m0 = 0;
723  for (int cm = 0; cm < cpq.M; cm++) {
724  int ki = k & ((uint64_t(1) << cpq.nbits) - 1);
725  k >>= cpq.nbits;
726 
727  const float *pc = &ivfpq.precomputed_table
728  [(ki * pq.M + cm * Mf) * pq.ksub];
729 
730  for (int m = m0; m < m0 + Mf; m++) {
731  sim_table_ptrs [m] = pc;
732  pc += pq.ksub;
733  }
734  m0 += Mf;
735  }
736  } else FAISS_ASSERT (!"need precomputed tables");
737 
738  if (ivfpq.polysemous_ht) {
739  FAISS_ASSERT (!"not implemented");
740  // Not clear that it makes sense to implemente this,
741  // because it costs M * ksub, which is what we wanted to
742  // avoid with the tables pointers.
743  }
744 
745  return dis0;
746  }
747 
748 
749 };
750 
751 
752 /*****************************************************
753  * Scaning the codes.
754  * The scanning functions call their favorite precompute_*
755  * function to precompute the tables they need.
756  *****************************************************/
757 template <typename IDType>
758 struct InvertedListScanner: QueryTables {
759 
760  const uint8_t * __restrict list_codes;
761  const IDType * list_ids;
762  size_t list_size;
763 
764  explicit InvertedListScanner (const IndexIVFPQ & ivfpq):
765  QueryTables (ivfpq)
766  {
767  FAISS_ASSERT(pq.byte_per_idx == 1);
768  n_hamming_pass = 0;
769  }
770 
771  /// list_specific intialization
772  void init_list (Index::idx_t key, float coarse_dis,
773  size_t list_size_in, const IDType *list_ids_in,
774  const uint8_t *list_codes_in) {
775  this->key = key;
776  this->coarse_dis = coarse_dis;
777  list_size = list_size_in;
778  list_codes = list_codes_in;
779  list_ids = list_ids_in;
780  }
781 
782  /*****************************************************
783  * Scaning the codes: simple PQ scan.
784  *****************************************************/
785 
786  /// version of the scan where we use precomputed tables
787  void scan_list_with_table (
788  size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
789  {
790  float dis0 = precompute_list_tables ();
791 
792  for (size_t j = 0; j < list_size; j++) {
793 
794  float dis = dis0;
795  const float *tab = sim_table;
796 
797  for (size_t m = 0; m < pq.M; m++) {
798  dis += tab[*list_codes++];
799  tab += pq.ksub;
800  }
801 
802  if (dis < heap_sim[0]) {
803  maxheap_pop (k, heap_sim, heap_ids);
804  long id = store_pairs ? (key << 32 | j) : list_ids[j];
805  maxheap_push (k, heap_sim, heap_ids, dis, id);
806  }
807  }
808  }
809 
810 
811  /// tables are not precomputed, but pointers are provided to the
812  /// relevant X_c|x_r tables
813  void scan_list_with_pointer (
814  size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
815  {
816 
817  float dis0 = precompute_list_table_pointers ();
818 
819  for (size_t j = 0; j < list_size; j++) {
820 
821  float dis = dis0;
822  const float *tab = sim_table_2;
823 
824  for (size_t m = 0; m < pq.M; m++) {
825  int ci = *list_codes++;
826  dis += sim_table_ptrs [m][ci] - 2 * tab [ci];
827  tab += pq.ksub;
828  }
829 
830  if (dis < heap_sim[0]) {
831  maxheap_pop (k, heap_sim, heap_ids);
832  long id = store_pairs ? (key << 32 | j) : list_ids[j];
833  maxheap_push (k, heap_sim, heap_ids, dis, id);
834  }
835  }
836 
837  }
838 
839  /// nothing is precomputed: access residuals on-the-fly
840  void scan_on_the_fly_dist (
841  size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
842  {
843 
844  if (by_residual && use_precomputed_table) {
845  scan_list_with_pointer (k, heap_sim, heap_ids, store_pairs);
846  return;
847  }
848 
849  const float *dvec;
850  float dis0 = 0;
851 
852  if (by_residual) {
853  if (metric_type == METRIC_INNER_PRODUCT) {
854  ivfpq.quantizer->reconstruct (key, residual_vec);
855  dis0 = fvec_inner_product (residual_vec, qi, d);
856  } else {
857  ivfpq.quantizer->compute_residual (qi, residual_vec, key);
858  }
859  dvec = residual_vec;
860  } else {
861  dvec = qi;
862  dis0 = 0;
863  }
864 
865  for (size_t j = 0; j < list_size; j++) {
866 
867  pq.decode (list_codes, decoded_vec);
868  list_codes += pq.code_size;
869 
870  float dis;
871  if (metric_type == METRIC_INNER_PRODUCT) {
872  dis = -dis0 - fvec_inner_product (decoded_vec, qi, d);
873  } else {
874  dis = fvec_L2sqr (decoded_vec, dvec, d);
875  }
876 
877  if (dis < heap_sim[0]) {
878  maxheap_pop (k, heap_sim, heap_ids);
879  long id = store_pairs ? (key << 32 | j) : list_ids[j];
880  maxheap_push (k, heap_sim, heap_ids, dis, id);
881  }
882  }
883  }
884 
885  /*****************************************************
886  * Scanning codes with polysemous filtering
887  *****************************************************/
888 
889  // code for the query
890  size_t n_hamming_pass;
891 
892 
893  template <class HammingComputer>
894  void scan_list_polysemous_hc (
895  size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
896  {
897  float dis0 = precompute_list_tables ();
898  int ht = ivfpq.polysemous_ht;
899 
900  int code_size = pq.code_size;
901 
902  HammingComputer hc (q_code.data(), code_size);
903 
904  for (size_t j = 0; j < list_size; j++) {
905  const uint8_t *b_code = list_codes;
906  int hd = hc.hamming (b_code);
907  if (hd < ht) {
908  n_hamming_pass ++;
909 
910  float dis = dis0;
911  const float *tab = sim_table;
912 
913  for (size_t m = 0; m < pq.M; m++) {
914  dis += tab[*b_code++];
915  tab += pq.ksub;
916  }
917 
918  if (dis < heap_sim[0]) {
919  maxheap_pop (k, heap_sim, heap_ids);
920  long id = store_pairs ? (key << 32 | j) : list_ids[j];
921  maxheap_push (k, heap_sim, heap_ids, dis, id);
922  }
923  }
924  list_codes += code_size;
925  }
926  }
927 
928  void scan_list_polysemous (
929  size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
930  {
931  switch (pq.code_size) {
932 #define HANDLE_CODE_SIZE(cs) \
933  case cs: \
934  scan_list_polysemous_hc <HammingComputer ## cs> \
935  (k, heap_sim, heap_ids, store_pairs); \
936  break
937  HANDLE_CODE_SIZE(4);
938  HANDLE_CODE_SIZE(8);
939  HANDLE_CODE_SIZE(16);
940  HANDLE_CODE_SIZE(20);
941  HANDLE_CODE_SIZE(32);
942  HANDLE_CODE_SIZE(64);
943 #undef HANDLE_CODE_SIZE
944  default:
945  if (pq.code_size % 8 == 0)
946  scan_list_polysemous_hc <HammingComputerM8>
947  (k, heap_sim, heap_ids, store_pairs);
948  else
949  scan_list_polysemous_hc <HammingComputerM4>
950  (k, heap_sim, heap_ids, store_pairs);
951  break;
952  }
953  }
954 
955 };
956 
957 
958 
959 
960 } // anonymous namespace
961 
962 
963 IndexIVFPQStats indexIVFPQ_stats;
964 
965 void IndexIVFPQStats::reset () {
966  memset (this, 0, sizeof (*this));
967 }
968 
969 
971  size_t nx,
972  const float * qx,
973  const long * keys,
974  const float * coarse_dis,
975  float_maxheap_array_t * res,
976  bool store_pairs) const
977 {
978  const size_t k = res->k;
979 
980 #pragma omp parallel
981  {
982  InvertedListScanner<long> qt (*this);
983  size_t stats_nlist = 0;
984  size_t stats_ncode = 0;
985  uint64_t init_query_cycles = 0;
986  uint64_t scan_cycles = 0;
987  uint64_t heap_cycles = 0;
988 
989 #pragma omp for
990  for (size_t i = 0; i < nx; i++) {
991  const float *qi = qx + i * d;
992  const long * keysi = keys + i * nprobe;
993  const float *coarse_dis_i = coarse_dis + i * nprobe;
994  float * heap_sim = res->get_val (i);
995  long * heap_ids = res->get_ids (i);
996 
997  uint64_t t0;
998  TIC;
999  maxheap_heapify (k, heap_sim, heap_ids);
1000  heap_cycles += TOC;
1001 
1002  TIC;
1003  qt.init_query (qi);
1004  init_query_cycles += TOC;
1005 
1006  size_t nscan = 0;
1007 
1008  for (size_t ik = 0; ik < nprobe; ik++) {
1009  long key = keysi[ik]; /* select the list */
1010  if (key < 0) {
1011  // not enough centroids for multiprobe
1012  continue;
1013  }
1014  if (key >= (long) nlist) {
1015  fprintf (stderr, "Invalid key=%ld nlist=%ld\n", key, nlist);
1016  throw;
1017  }
1018  size_t list_size = ids[key].size();
1019  stats_nlist ++;
1020  nscan += list_size;
1021 
1022  if (list_size == 0) continue;
1023 
1024  qt.init_list (key, coarse_dis_i[ik],
1025  list_size, ids[key].data(),
1026  codes[key].data());
1027 
1028  TIC;
1029  if (polysemous_ht > 0) {
1030  qt.scan_list_polysemous
1031  (k, heap_sim, heap_ids, store_pairs);
1032  } else if (list_size > scan_table_threshold) {
1033  qt.scan_list_with_table (k, heap_sim, heap_ids, store_pairs);
1034  } else {
1035  qt.scan_on_the_fly_dist (k, heap_sim, heap_ids, store_pairs);
1036  }
1037  scan_cycles += TOC;
1038 
1039  if (max_codes && nscan >= max_codes) break;
1040  }
1041  stats_ncode += nscan;
1042  TIC;
1043  maxheap_reorder (k, heap_sim, heap_ids);
1044 
1045  if (metric_type == METRIC_INNER_PRODUCT) {
1046  for (size_t j = 0; j < k; j++)
1047  heap_sim[j] = -heap_sim[j];
1048  }
1049  heap_cycles += TOC;
1050  }
1051 
1052 #pragma omp critical
1053  {
1054  indexIVFPQ_stats.n_hamming_pass += qt.n_hamming_pass;
1055  indexIVFPQ_stats.nlist += stats_nlist;
1056  indexIVFPQ_stats.ncode += stats_ncode;
1057 
1058  indexIVFPQ_stats.init_query_cycles += init_query_cycles;
1059  indexIVFPQ_stats.init_list_cycles += qt.init_list_cycles;
1060  indexIVFPQ_stats.scan_cycles += scan_cycles - qt.init_list_cycles;
1061  indexIVFPQ_stats.heap_cycles += heap_cycles;
1062  }
1063 
1064  }
1065  indexIVFPQ_stats.nq += nx;
1066 }
1067 
1068 
1069 void IndexIVFPQ::search (idx_t n, const float *x, idx_t k,
1070  float *distances, idx_t *labels) const
1071 {
1072  long * idx = new long [n * nprobe];
1073  float * coarse_dis = new float [n * nprobe];
1074  uint64_t t0;
1075  TIC;
1076  quantizer->search (n, x, nprobe, coarse_dis, idx);
1077  indexIVFPQ_stats.assign_cycles += TOC;
1078 
1079  TIC;
1080  float_maxheap_array_t res = { size_t(n), size_t(k), labels, distances};
1081 
1082  search_knn_with_key (n, x, idx, coarse_dis, &res);
1083  delete [] idx;
1084  delete [] coarse_dis;
1085  indexIVFPQ_stats.search_cycles += TOC;
1086 }
1087 
1088 
1090 {
1091  IndexIVF::reset();
1092  for (size_t key = 0; key < nlist; key++) {
1093  codes[key].clear();
1094  }
1095 }
1096 
1098 {
1099  FAISS_ASSERT (!maintain_direct_map ||
1100  !"direct map remove not implemented");
1101  long nremove = 0;
1102 #pragma omp parallel for reduction(+: nremove)
1103  for (long i = 0; i < nlist; i++) {
1104  std::vector<idx_t> & idsi = ids[i];
1105  uint8_t * codesi = codes[i].data();
1106 
1107  long l = idsi.size(), j = 0;
1108  while (j < l) {
1109  if (sel.is_member (idsi[j])) {
1110  l--;
1111  idsi [j] = idsi [l];
1112  memmove (codesi + j * code_size,
1113  codesi + l * code_size, code_size);
1114  } else {
1115  j++;
1116  }
1117  }
1118  if (l < idsi.size()) {
1119  nremove += idsi.size() - l;
1120  idsi.resize (l);
1121  codes[i].resize (l * code_size);
1122  }
1123  }
1124  ntotal -= nremove;
1125  return nremove;
1126 }
1127 
1128 
1129 IndexIVFPQ::IndexIVFPQ ()
1130 {
1131  // initialize some runtime values
1134  do_polysemous_training = false;
1135  polysemous_ht = 0;
1136  max_codes = 0;
1137  polysemous_training = nullptr;
1138 }
1139 
1140 
1141 struct CodeCmp {
1142  const uint8_t *tab;
1143  size_t code_size;
1144  bool operator () (int a, int b) const {
1145  return cmp (a, b) > 0;
1146  }
1147  int cmp (int a, int b) const {
1148  return memcmp (tab + a * code_size, tab + b * code_size,
1149  code_size);
1150  }
1151 };
1152 
1153 
1154 size_t IndexIVFPQ::find_duplicates (idx_t *dup_ids, size_t *lims) const
1155 {
1156  size_t ngroup = 0;
1157  lims[0] = 0;
1158  for (size_t list_no = 0; list_no < nlist; list_no++) {
1159  size_t n = ids[list_no].size();
1160  std::vector<int> ord (n);
1161  for (int i = 0; i < n; i++) ord[i] = i;
1162  CodeCmp cs = { codes[list_no].data(), code_size };
1163  std::sort (ord.begin(), ord.end(), cs);
1164 
1165  const idx_t *list_ids = ids[list_no].data();
1166  int prev = -1; // all elements from prev to i-1 are equal
1167  for (int i = 0; i < n; i++) {
1168  if (prev >= 0 && cs.cmp (ord [prev], ord [i]) == 0) {
1169  // same as previous => remember
1170  if (prev + 1 == i) { // start new group
1171  ngroup++;
1172  lims[ngroup] = lims[ngroup - 1];
1173  dup_ids [lims [ngroup]++] = list_ids [ord [prev]];
1174  }
1175  dup_ids [lims [ngroup]++] = list_ids [ord [i]];
1176  } else { // not same as previous.
1177  prev = i;
1178  }
1179  }
1180  }
1181  return ngroup;
1182 }
1183 
1184 
1185 
1186 
1187 /*****************************************
1188  * IndexIVFPQR implementation
1189  ******************************************/
1190 
1191 IndexIVFPQR::IndexIVFPQR (
1192  Index * quantizer, size_t d, size_t nlist,
1193  size_t M, size_t nbits_per_idx,
1194  size_t M_refine, size_t nbits_per_idx_refine):
1195  IndexIVFPQ (quantizer, d, nlist, M, nbits_per_idx),
1196  refine_pq (d, M_refine, nbits_per_idx_refine),
1197  k_factor (4)
1198 {
1199  by_residual = true;
1200  set_typename();
1201 }
1202 
1203 IndexIVFPQR::IndexIVFPQR ():
1204  k_factor (1)
1205 {
1206  by_residual = true;
1207 }
1208 
1209 
1210 void IndexIVFPQR::set_typename()
1211 {
1212  std::stringstream s;
1213  s << "IvfPQR_" << pq.M << "x" << pq.nbits
1214  << "+" << refine_pq.M << "x" << refine_pq.nbits
1215  << "[" << nlist << ":" << quantizer->index_typename << "]";
1216  index_typename = s.str();
1217 
1218 }
1219 
1220 
1222 {
1224  refine_codes.clear();
1225 }
1226 
1227 
1228 
1229 
1230 void IndexIVFPQR::train_residual (idx_t n, const float *x)
1231 {
1232 
1233  float * residual_2 = new float [n * d];
1234 
1235  train_residual_o (n, x, residual_2);
1236 
1237  if (verbose)
1238  printf ("training %zdx%zd 2nd level PQ quantizer on %ld %dD-vectors\n",
1239  refine_pq.M, refine_pq.ksub, n, d);
1240 
1242  refine_pq.cp.verbose = verbose;
1243 
1244  refine_pq.train (n, residual_2);
1245  delete [] residual_2;
1246 
1247 }
1248 
1249 
1250 void IndexIVFPQR::add_with_ids (idx_t n, const float *x, const long *xids) {
1251  add_core (n, x, xids, nullptr);
1252 }
1253 
1254 void IndexIVFPQR::add_core (idx_t n, const float *x, const long *xids,
1255  const long *precomputed_idx) {
1256 
1257  float * residual_2 = new float [n * d];
1258 
1259  idx_t n0 = ntotal;
1260 
1261  add_core_o (n, x, xids, residual_2, precomputed_idx);
1262 
1264 
1266  residual_2, &refine_codes[n0 * refine_pq.code_size], n);
1267 
1268  delete [] residual_2;
1269 
1270 }
1271 
1272 
1274  idx_t n, const float *x, idx_t k,
1275  float *distances, idx_t *labels) const
1276 {
1277  FAISS_ASSERT (is_trained);
1278  long * idx = new long [n * nprobe];
1279  float * L1_dis = new float [n * nprobe];
1280  uint64_t t0;
1281  TIC;
1282  quantizer->search (n, x, nprobe, L1_dis, idx);
1283  indexIVFPQ_stats.assign_cycles += TOC;
1284 
1285  TIC;
1286  size_t k_coarse = long(k * k_factor);
1287  idx_t *coarse_labels = new idx_t [k_coarse * n];
1288  { // query with quantizer levels 1 and 2.
1289  float *coarse_distances = new float [k_coarse * n];
1290 
1291  faiss::float_maxheap_array_t res_coarse = {
1292  size_t(n), k_coarse, coarse_labels, coarse_distances};
1293  search_knn_with_key (n, x, idx, L1_dis, &res_coarse, true);
1294  delete [] coarse_distances;
1295  }
1296  delete [] L1_dis;
1297  indexIVFPQ_stats.search_cycles += TOC;
1298 
1299  TIC;
1300 
1301  // 3rd level refinement
1302  size_t n_refine = 0;
1303 #pragma omp parallel reduction(+ : n_refine)
1304  {
1305  // tmp buffers
1306  float *residual_1 = new float [2 * d];
1307  float *residual_2 = residual_1 + d;
1308 #pragma omp for
1309  for (idx_t i = 0; i < n; i++) {
1310  const float *xq = x + i * d;
1311  const long * shortlist = coarse_labels + k_coarse * i;
1312  float * heap_sim = distances + k * i;
1313  long * heap_ids = labels + k * i;
1314  maxheap_heapify (k, heap_sim, heap_ids);
1315 
1316  for (int j = 0; j < k_coarse; j++) {
1317  long sl = shortlist[j];
1318 
1319  if (sl == -1) continue;
1320 
1321  int list_no = sl >> 32;
1322  int ofs = sl & 0xffffffff;
1323 
1324  assert (list_no >= 0 && list_no < nlist);
1325  assert (ofs >= 0 && ofs < ids[list_no].size());
1326 
1327  // 1st level residual
1328  quantizer->compute_residual (xq, residual_1, list_no);
1329 
1330  // 2nd level residual
1331  const uint8_t * l2code = &codes[list_no][ofs * pq.code_size];
1332  pq.decode (l2code, residual_2);
1333  for (int l = 0; l < d; l++)
1334  residual_2[l] = residual_1[l] - residual_2[l];
1335 
1336  // 3rd level residual's approximation
1337  idx_t id = ids[list_no][ofs];
1338  assert (0 <= id && id < ntotal);
1340  residual_1);
1341 
1342  float dis = fvec_L2sqr (residual_1, residual_2, d);
1343 
1344  if (dis < heap_sim[0]) {
1345  maxheap_pop (k, heap_sim, heap_ids);
1346  maxheap_push (k, heap_sim, heap_ids, dis, id);
1347  }
1348  n_refine ++;
1349  }
1350  maxheap_reorder (k, heap_sim, heap_ids);
1351  }
1352  delete [] residual_1;
1353  }
1354  delete [] coarse_labels;
1355  delete [] idx;
1356  indexIVFPQ_stats.nrefine += n_refine;
1357  indexIVFPQ_stats.refine_cycles += TOC;
1358 }
1359 
1360 void IndexIVFPQR::reconstruct_n (idx_t i0, idx_t ni, float *recons) const
1361 {
1362  float *r3 = new float [d];
1363 
1364  IndexIVFPQ::reconstruct_n (i0, ni, recons);
1365 
1366  for (idx_t i = i0; i < i0 + ni; i++) {
1367  float *r = recons + i * d;
1369 
1370  for (int j = 0; j < d; j++)
1371  r[j] += r3[j];
1372 
1373  }
1374  delete [] r3;
1375 }
1376 
1378 {
1379  IndexIVFPQR &other = dynamic_cast<IndexIVFPQR &> (other_in);
1381  refine_codes.insert (refine_codes.end(),
1382  other.refine_codes.begin(), other.refine_codes.end());
1383  other.refine_codes.clear();
1384 }
1385 
1387 {
1388  FAISS_ASSERT(!"not implemented");
1389 }
1390 
1391 /*****************************************
1392  * IndexIVFPQCompact implementation
1393  ******************************************/
1394 
1395 IndexIVFPQCompact::IndexIVFPQCompact ()
1396 {
1397  alloc_type = Alloc_type_none;
1398  limits = nullptr;
1399  compact_ids = nullptr;
1400  compact_codes = nullptr;
1401 }
1402 
1403 
1404 IndexIVFPQCompact::IndexIVFPQCompact (const IndexIVFPQ &other)
1405 {
1406  FAISS_ASSERT (other.ntotal < (1UL << 31) ||
1407  !"IndexIVFPQCompact cannot store more than 2G images");
1408 
1409  // here it would be more convenient to just use the
1410  // copy-constructor, but it would copy the lists as well: too much
1411  // overhead...
1412 
1413  // copy fields from Index
1414  d = other.d;
1415  ntotal = other.ntotal;
1416  verbose = other.verbose;
1417  is_trained = other.is_trained;
1418  metric_type = other.metric_type;
1419 
1420  // copy fields from IndexIVF (except ids)
1421  nlist = other.nlist;
1422  nprobe = other.nprobe;
1423  quantizer = other.quantizer;
1424  quantizer_trains_alone = other.quantizer_trains_alone;
1425  own_fields = false;
1426  direct_map = other.direct_map;
1427 
1428  // copy fields from IndexIVFPQ (except codes)
1429  by_residual = other.by_residual;
1430  use_precomputed_table = other.use_precomputed_table;
1431  precomputed_table = other.precomputed_table;
1432  code_size = other.code_size;
1433  pq = other.pq;
1434  do_polysemous_training = other.do_polysemous_training;
1435  polysemous_training = nullptr;
1436 
1437  scan_table_threshold = other.scan_table_threshold;
1438  max_codes = other.max_codes;
1439  polysemous_ht = other.polysemous_ht;
1440 
1441  //allocate
1442  alloc_type = Alloc_type_new;
1443  limits = new uint32_t [nlist + 1];
1444  compact_ids = new uint32_t [ntotal];
1445  compact_codes = new uint8_t [ntotal * code_size];
1446 
1447 
1448  // copy content from other
1449  size_t ofs = 0;
1450  for (size_t i = 0; i < nlist; i++) {
1451  limits [i] = ofs;
1452  const std::vector<long> &other_ids = other.ids[i];
1453  for (size_t j = 0; j < other_ids.size(); j++) {
1454  long id = other_ids[j];
1455  FAISS_ASSERT (id < (1UL << 31) ||
1456  !"IndexIVFPQCompact cannot store ids > 2G");
1457  compact_ids[ofs + j] = id;
1458  }
1459  memcpy (compact_codes + ofs * code_size,
1460  other.codes[i].data(),
1461  other.codes[i].size());
1462  ofs += other_ids.size();
1463  }
1464  FAISS_ASSERT (ofs == ntotal);
1465  limits [nlist] = ofs;
1466 
1467 }
1468 
1469 void IndexIVFPQCompact::add (idx_t, const float *) {
1470  FAISS_ASSERT (!"cannot add to an IndexIVFPQCompact");
1471 }
1472 
1474  FAISS_ASSERT (!"cannot reset an IndexIVFPQCompact");
1475 }
1476 
1477 void IndexIVFPQCompact::train (idx_t, const float *) {
1478  FAISS_ASSERT (!"cannot train an IndexIVFPQCompact");
1479 }
1480 
1481 
1482 
1483 
1484 IndexIVFPQCompact::~IndexIVFPQCompact ()
1485 {
1486  if (alloc_type == Alloc_type_new) {
1487  delete [] limits;
1488  delete [] compact_codes;
1489  delete [] compact_ids;
1490  } else if (alloc_type == Alloc_type_mmap) {
1491  munmap (mmap_buffer, mmap_length);
1492 
1493  }
1494 
1495 }
1496 
1498  size_t nx,
1499  const float * qx,
1500  const long * keys,
1501  const float * coarse_dis,
1502  float_maxheap_array_t * res,
1503  bool store_pairs) const
1504 {
1505  const size_t k = res->k;
1506 
1507 #pragma omp parallel
1508  {
1509  InvertedListScanner<uint32_t> qt (*this);
1510  size_t stats_nlist = 0;
1511  size_t stats_ncode = 0;
1512  uint64_t init_query_cycles = 0;
1513  uint64_t scan_cycles = 0;
1514  uint64_t heap_cycles = 0;
1515 
1516 #pragma omp for
1517  for (size_t i = 0; i < nx; i++) {
1518  const float *qi = qx + i * d;
1519  const long * keysi = keys + i * nprobe;
1520  const float *coarse_dis_i = coarse_dis + i * nprobe;
1521  float * heap_sim = res->get_val (i);
1522  long * heap_ids = res->get_ids (i);
1523 
1524  uint64_t t0;
1525  TIC;
1526  maxheap_heapify (k, heap_sim, heap_ids);
1527  heap_cycles += TOC;
1528 
1529  TIC;
1530  qt.init_query (qi);
1531  init_query_cycles += TOC;
1532 
1533  size_t nscan = 0;
1534 
1535  for (size_t ik = 0; ik < nprobe; ik++) {
1536  long key = keysi[ik]; /* select the list */
1537  if (key < 0) {
1538  // not enough centroids for multiprobe
1539  continue;
1540  }
1541  if (key >= (long) nlist) {
1542  fprintf (stderr, "Invalid key=%ld nlist=%ld\n", key, nlist);
1543  throw;
1544  }
1545  size_t list_size = limits[key + 1] - limits[key];
1546  stats_nlist ++;
1547  nscan += list_size;
1548 
1549  if (list_size == 0) continue;
1550 
1551  qt.init_list (key, coarse_dis_i[ik],
1552  list_size, compact_ids + limits[key],
1553  compact_codes + limits[key] * code_size);
1554 
1555  TIC;
1556  if (polysemous_ht > 0) {
1557  qt.scan_list_polysemous
1558  (k, heap_sim, heap_ids, store_pairs);
1559  } else if (list_size > scan_table_threshold) {
1560  qt.scan_list_with_table (k, heap_sim, heap_ids, store_pairs);
1561  } else {
1562  qt.scan_on_the_fly_dist (k, heap_sim, heap_ids, store_pairs);
1563  }
1564  scan_cycles += TOC;
1565 
1566  if (max_codes && nscan >= max_codes) break;
1567  }
1568  stats_ncode += nscan;
1569  TIC;
1570  maxheap_reorder (k, heap_sim, heap_ids);
1571 
1572  if (metric_type == METRIC_INNER_PRODUCT) {
1573  for (size_t j = 0; j < k; j++) {
1574  heap_sim[i] = -heap_sim[i];
1575  }
1576  }
1577  heap_cycles += TOC;
1578  }
1579 
1580 #pragma omp critical
1581  {
1582  indexIVFPQ_stats.n_hamming_pass += qt.n_hamming_pass;
1583  indexIVFPQ_stats.nlist += stats_nlist;
1584  indexIVFPQ_stats.ncode += stats_ncode;
1585 
1586  indexIVFPQ_stats.init_query_cycles += init_query_cycles;
1587  indexIVFPQ_stats.init_list_cycles += qt.init_list_cycles;
1588  indexIVFPQ_stats.scan_cycles += scan_cycles - qt.init_list_cycles;
1589  indexIVFPQ_stats.heap_cycles += heap_cycles;
1590  }
1591 
1592  }
1593  indexIVFPQ_stats.nq += nx;
1594 }
1595 
1596 
1597 
1598 } // namespace faiss
uint32_t * compact_ids
size ntotal
Definition: IndexIVFPQ.h:262
uint8_t * compact_codes
size ntotal * code_size
Definition: IndexIVFPQ.h:263
void precompute_table()
build precomputed table
Definition: IndexIVFPQ.cpp:400
void copy_subset_to(IndexIVFPQ &other, int subset_type, long a1, long a2) const
Definition: IndexIVFPQ.cpp:341
size_t nbits
number of bits per quantization index
virtual void reconstruct(idx_t key, float *recons) const override
Definition: IndexIVFPQ.cpp:311
void decode(const uint8_t *code, float *x) const
decode a vector from a given code (or n vectors if third argument)
ProductQuantizer refine_pq
3rd level quantizer
Definition: IndexIVFPQ.h:203
float fvec_L2sqr(const float *x, const float *y, size_t d)
Squared L2 distance between two vectors.
Definition: utils.cpp:432
PolysemousTraining * polysemous_training
if NULL, use default
Definition: IndexIVFPQ.h:37
T * get_val(size_t key)
Return the list of values for a heap.
Definition: Heap.h:361
virtual void add(idx_t, const float *) override
the three following functions will fail at runtime
virtual void search_knn_with_key(size_t nx, const float *qx, const long *keys, const float *coarse_dis, float_maxheap_array_t *res, bool store_pairs=false) const override
virtual void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
const float * fvecs_maybe_subsample(size_t d, size_t *n, size_t nmax, const float *x, bool verbose, long seed)
Definition: utils.cpp:1793
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:48
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:24
bool quantizer_trains_alone
just pass over the trainset to quantizer
Definition: IndexIVF.h:51
virtual void set_typename() override
Definition: IndexIVF.cpp:207
virtual void merge_from_residuals(IndexIVF &other) override
used to implement merging
Definition: IndexIVFPQ.cpp:331
void decode_multiple(size_t n, const long *keys, const uint8_t *xcodes, float *x) const
inverse of encode_multiple
Definition: IndexIVFPQ.cpp:177
void train_residual_o(idx_t n, const float *x, float *residuals_2)
same as train_residual, also output 2nd level residuals
Definition: IndexIVFPQ.cpp:84
bool do_polysemous_training
reorder PQ centroids after training?
Definition: IndexIVFPQ.h:36
size_t scan_table_threshold
use table computation or on-the-fly?
Definition: IndexIVFPQ.h:40
size_t k
allocated size per heap
Definition: Heap.h:356
virtual void train_residual(idx_t n, const float *x) override
trains the two product quantizers
void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx=nullptr)
same as add_with_ids, but optionally use the precomputed list ids
uint32_t * limits
size nlist + 1
Definition: IndexIVFPQ.h:261
size_t dsub
dimensionality of each subvector
int seed
seed for the random number generator
Definition: Clustering.h:36
std::vector< float > precomputed_table
Definition: IndexIVFPQ.h:48
void fvec_madd(size_t n, const float *a, float bf, const float *b, float *c)
Definition: utils.cpp:1707
int polysemous_ht
Hamming thresh for polysemous filtering.
Definition: IndexIVFPQ.h:42
virtual void search_knn_with_key(size_t nx, const float *qx, const long *keys, const float *coarse_dis, float_maxheap_array_t *res, bool store_pairs=false) const
Definition: IndexIVFPQ.cpp:970
virtual void reset() override
removes all elements from the database.
virtual void add_with_ids(idx_t n, const float *x, const long *xids=nullptr) override
Definition: IndexIVFPQ.cpp:194
std::vector< std::vector< long > > ids
Inverted lists for indexes.
Definition: IndexIVF.h:56
void compute_codes(const float *x, uint8_t *codes, size_t n) const
same as compute_code for several vectors
int d
vector dimension
Definition: Index.h:66
Index * quantizer
quantizer that maps vectors to inverted lists
Definition: IndexIVF.h:50
size_t max_codes
max nb of codes to visit to do a query
Definition: IndexIVFPQ.h:41
std::vector< uint8_t > refine_codes
corresponding codes
Definition: IndexIVFPQ.h:204
size_t code_size
byte per indexed vector
virtual long remove_ids(const IDSelector &sel) override
virtual void train_residual(idx_t n, const float *x) override
trains the product quantizer
Definition: IndexIVFPQ.cpp:78
void encode_multiple(size_t n, long *keys, const float *x, uint8_t *codes, bool compute_keys=false) const
Definition: IndexIVFPQ.cpp:158
virtual void train(idx_t, const float *) override
Trains the quantizer and calls train_residual to train sub-quantizers.
bool own_fields
whether object owns the quantizer
Definition: IndexIVF.h:52
size_t ksub
number of centroids for each subquantizer
long idx_t
all indices are this type
Definition: Index.h:64
void compute_code(const float *x, uint8_t *code) const
Quantize one vector with the product quantizer.
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
virtual void reset() override
removes all elements from the database.
bool verbose
verbosity level
Definition: Index.h:68
virtual void reset() override
removes all elements from the database.
Definition: IndexIVF.cpp:87
double getmillisecs()
ms elapsed since some arbitrary epoch
Definition: utils.cpp:71
optimizes the order of indices in a ProductQuantizer
float fvec_norm_L2sqr(const float *x, size_t d)
Definition: utils.cpp:513
ClusteringParameters cp
parameters used during clustering
virtual void merge_from_residuals(IndexIVF &other) override
used to implement merging
bool by_residual
Encode residual or plain vector?
Definition: IndexIVFPQ.h:31
TI * get_ids(size_t key)
Correspponding identifiers.
Definition: Heap.h:364
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:74
ProductQuantizer pq
produces the codes
Definition: IndexIVFPQ.h:34
size_t M
number of subquantizers
size_t nlist
number of possible key values
Definition: IndexIVF.h:47
virtual void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
Definition: IndexIVFPQ.cpp:282
void add_core_o(idx_t n, const float *x, const long *xids, float *residuals_2, const long *precomputed_idx=nullptr)
Definition: IndexIVFPQ.cpp:200
int fvec_madd_and_argmin(size_t n, const float *a, float bf, const float *b, float *c)
Definition: utils.cpp:1781
size_t code_size
code size per vector in bytes
Definition: IndexIVFPQ.h:33
virtual long remove_ids(const IDSelector &sel) override
virtual void reset() override
removes all elements from the database.
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:71
float * get_centroids(size_t m, size_t i)
return the centroids associated with subvector m
bool maintain_direct_map
map for direct access to the elements. Enables reconstruct().
Definition: IndexIVF.h:59
void optimize_pq_for_hamming(ProductQuantizer &pq, size_t n, const float *x) const
int max_points_per_centroid
to limit size of dataset
Definition: Clustering.h:34
bool verbose
verbose during training?
virtual void add_with_ids(idx_t n, const float *x, const long *xids) override
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
size_t find_duplicates(idx_t *ids, size_t *lims) const
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:44
float k_factor
factor between k requested in search and the k requested from the IVFPQ
Definition: IndexIVFPQ.h:207
int use_precomputed_table
if by_residual, build precompute tables
Definition: IndexIVFPQ.h:32