Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/IndexIVFPQ.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 /* Copyright 2004-present Facebook. All Rights Reserved.
10  Inverted list structure.
11 */
12 
13 #include "IndexIVFPQ.h"
14 
15 #include <cmath>
16 #include <cstdio>
17 #include <cassert>
18 
19 #include <sys/mman.h>
20 
21 #include <algorithm>
22 
23 #include "Heap.h"
24 #include "utils.h"
25 
26 #include "Clustering.h"
27 #include "IndexFlat.h"
28 
29 #include "hamming.h"
30 
31 #include "FaissAssert.h"
32 
33 #include "AuxIndexStructures.h"
34 
35 namespace faiss {
36 
37 
38 
39 
40 
41 /*****************************************
42  * IndexIVFPQ implementation
43  ******************************************/
44 
45 IndexIVFPQ::IndexIVFPQ (Index * quantizer, size_t d, size_t nlist,
46  size_t M, size_t nbits_per_idx):
47  IndexIVF (quantizer, d, nlist, METRIC_L2),
48  pq (d, M, nbits_per_idx)
49 {
50  FAISS_THROW_IF_NOT (nbits_per_idx <= 8);
51  code_size = pq.code_size;
52  is_trained = false;
53  codes.resize (nlist);
54  by_residual = true;
55  use_precomputed_table = 0;
56  scan_table_threshold = 0;
57  max_codes = 0; // means unlimited
58 
59  polysemous_training = nullptr;
60  do_polysemous_training = false;
61  polysemous_ht = 0;
62 
63 }
64 
65 
66 void IndexIVFPQ::train_residual (idx_t n, const float *x)
67 {
68  train_residual_o (n, x, nullptr);
69 }
70 
71 
72 void IndexIVFPQ::train_residual_o (idx_t n, const float *x, float *residuals_2)
73 {
74  const float * x_in = x;
75 
77  d, (size_t*)&n, pq.cp.max_points_per_centroid * pq.ksub,
78  x, verbose, pq.cp.seed);
79 
80  ScopeDeleter<float> del_x (x_in == x ? nullptr : x);
81 
82  const float *trainset;
83  ScopeDeleter<float> del_residuals;
84  if (by_residual) {
85  if(verbose) printf("computing residuals\n");
86  idx_t * assign = new idx_t [n]; // assignement to coarse centroids
87  ScopeDeleter<idx_t> del (assign);
88  quantizer->assign (n, x, assign);
89  float *residuals = new float [n * d];
90  del_residuals.set (residuals);
91  for (idx_t i = 0; i < n; i++)
92  quantizer->compute_residual (x + i * d, residuals+i*d, assign[i]);
93 
94  trainset = residuals;
95  } else {
96  trainset = x;
97  }
98  if (verbose)
99  printf ("training %zdx%zd product quantizer on %ld vectors in %dD\n",
100  pq.M, pq.ksub, n, d);
101  pq.verbose = verbose;
102  pq.train (n, trainset);
103 
105  PolysemousTraining default_pt;
107  if (!pt) pt = &default_pt;
108  pt->optimize_pq_for_hamming (pq, n, trainset);
109  }
110 
111  // prepare second-level residuals for refine PQ
112  if (residuals_2) {
113  uint8_t *train_codes = new uint8_t [pq.code_size * n];
114  ScopeDeleter<uint8_t> del (train_codes);
115  pq.compute_codes (trainset, train_codes, n);
116 
117  for (idx_t i = 0; i < n; i++) {
118  const float *xx = trainset + i * d;
119  float * res = residuals_2 + i * d;
120  pq.decode (train_codes + i * pq.code_size, res);
121  for (int j = 0; j < d; j++)
122  res[j] = xx[j] - res[j];
123  }
124 
125  }
126 
127  if (by_residual) {
128  precompute_table ();
129  }
130 
131 }
132 
133 
134 /* produce a binary signature based on the residual vector */
135 void IndexIVFPQ::encode (long key, const float * x, uint8_t * code) const
136 {
137  if (by_residual) {
138  float residual_vec[d];
139  quantizer->compute_residual (x, residual_vec, key);
140  pq.compute_code (residual_vec, code);
141  }
142  else pq.compute_code (x, code);
143 }
144 
145 
146 
147 
148 
149 void IndexIVFPQ::encode_multiple (size_t n, long *keys,
150  const float * x, uint8_t * xcodes,
151  bool compute_keys) const
152 {
153  if (compute_keys)
154  quantizer->assign (n, x, keys);
155 
156  if (by_residual) {
157  float *residuals = new float [n * d];
158  ScopeDeleter<float> del (residuals);
159  // TODO: parallelize?
160  for (size_t i = 0; i < n; i++)
161  quantizer->compute_residual (x + i * d, residuals + i * d, keys[i]);
162  pq.compute_codes (residuals, xcodes, n);
163  } else {
164  pq.compute_codes (x, xcodes, n);
165  }
166 }
167 
168 void IndexIVFPQ::decode_multiple (size_t n, const long *keys,
169  const uint8_t * xcodes, float * x) const
170 {
171  pq.decode (xcodes, x, n);
172  if (by_residual) {
173  std::vector<float> centroid (d);
174  for (size_t i = 0; i < n; i++) {
175  quantizer->reconstruct (keys[i], centroid.data());
176  float *xi = x + i * d;
177  for (size_t j = 0; j < d; j++) {
178  xi [j] += centroid [j];
179  }
180  }
181  }
182 }
183 
184 
185 void IndexIVFPQ::add_with_ids (idx_t n, const float * x, const long *xids)
186 {
187  add_core_o (n, x, xids, nullptr);
188 }
189 
190 
191 void IndexIVFPQ::add_core_o (idx_t n, const float * x, const long *xids,
192  float *residuals_2, const long *precomputed_idx)
193 {
194  FAISS_THROW_IF_NOT (is_trained);
195  double t0 = getmillisecs ();
196  const long * idx;
197  ScopeDeleter<long> del_idx;
198 
199  if (precomputed_idx) {
200  idx = precomputed_idx;
201  } else {
202  long * idx0 = new long [n];
203  del_idx.set (idx0);
204  quantizer->assign (n, x, idx0);
205  idx = idx0;
206  }
207 
208  double t1 = getmillisecs ();
209  uint8_t * xcodes = new uint8_t [n * code_size];
210  ScopeDeleter<uint8_t> del_xcodes (xcodes);
211 
212  const float *to_encode = nullptr;
213  ScopeDeleter<float> del_to_encode;
214 
215  if (by_residual) {
216  float *residuals = new float [n * d];
217  // TODO: parallelize?
218  for (size_t i = 0; i < n; i++) {
219  if (idx[i] < 0)
220  memset (residuals + i * d, 0, sizeof(*residuals) * d);
221  else
222  quantizer->compute_residual (
223  x + i * d, residuals + i * d, idx[i]);
224  }
225  to_encode = residuals;
226  del_to_encode.set (to_encode);
227  } else {
228  to_encode = x;
229  }
230  pq.compute_codes (to_encode, xcodes, n);
231 
232  double t2 = getmillisecs ();
233  // TODO: parallelize?
234  size_t n_ignore = 0;
235  for (size_t i = 0; i < n; i++) {
236  idx_t key = idx[i];
237  if (key < 0) {
238  n_ignore ++;
239  if (residuals_2)
240  memset (residuals_2, 0, sizeof(*residuals_2) * d);
241  continue;
242  }
243  idx_t id = xids ? xids[i] : ntotal + i;
244  ids[key].push_back (id);
245  uint8_t *code = xcodes + i * code_size;
246  for (size_t j = 0; j < code_size; j++)
247  codes[key].push_back (code[j]);
248 
249  if (residuals_2) {
250  float *res2 = residuals_2 + i * d;
251  const float *xi = to_encode + i * d;
252  pq.decode (code, res2);
253  for (int j = 0; j < d; j++)
254  res2[j] = xi[j] - res2[j];
255  }
256 
258  direct_map.push_back (key << 32 | (ids[key].size() - 1));
259  }
260 
261 
262  double t3 = getmillisecs ();
263  if(verbose) {
264  char comment[100] = {0};
265  if (n_ignore > 0)
266  snprintf (comment, 100, "(%ld vectors ignored)", n_ignore);
267  printf(" add_core times: %.3f %.3f %.3f %s\n",
268  t1 - t0, t2 - t1, t3 - t2, comment);
269  }
270  ntotal += n;
271 }
272 
273 void IndexIVFPQ::reconstruct_n (idx_t i0, idx_t ni, float *recons) const
274 {
275  FAISS_THROW_IF_NOT (ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
276 
277  std::vector<float> centroid (d);
278 
279  for (int key = 0; key < nlist; key++) {
280  const std::vector<long> & idlist = ids[key];
281  const uint8_t * code_line = codes[key].data();
282 
283  for (long ofs = 0; ofs < idlist.size(); ofs++) {
284  long id = idlist[ofs];
285  if (!(id >= i0 && id < i0 + ni)) continue;
286  float *r = recons + d * (id - i0);
287  if (by_residual) {
288  quantizer->reconstruct (key, centroid.data());
289  pq.decode (code_line + ofs * pq.code_size, r);
290  for (int j = 0; j < d; j++) {
291  r[j] += centroid[j];
292  }
293  }
294  else {
295  pq.decode (code_line + ofs * pq.code_size, r);
296  }
297  }
298  }
299 }
300 
301 
302 void IndexIVFPQ::reconstruct (idx_t key, float * recons) const
303 {
304  FAISS_THROW_IF_NOT (direct_map.size() == ntotal);
305  int list_no = direct_map[key] >> 32;
306  int ofs = direct_map[key] & 0xffffffff;
307 
308  quantizer->reconstruct (list_no, recons);
309  const uint8_t * code = &(codes[list_no][ofs * pq.code_size]);
310 
311  for (size_t m = 0; m < pq.M; m++) {
312  float * out = recons + m * pq.dsub;
313  const float * cent = pq.get_centroids (m, code[m]);
314  for (size_t i = 0; i < pq.dsub; i++) {
315  out[i] += cent[i];
316  }
317  }
318 }
319 
320 
321 
323 {
324  IndexIVFPQ &other = dynamic_cast<IndexIVFPQ &> (other_in);
325  for (int i = 0; i < nlist; i++) {
326  codes[i].insert (codes[i].end(),
327  other.codes[i].begin(), other.codes[i].end());
328  other.codes[i].clear();
329  }
330 }
331 
332 void IndexIVFPQ::copy_subset_to (IndexIVFPQ & other, int subset_type,
333  long a1, long a2) const
334 {
335  FAISS_THROW_IF_NOT (nlist == other.nlist);
336  FAISS_THROW_IF_NOT (!other.maintain_direct_map);
337  size_t code_size = pq.code_size;
338  for (long list_no = 0; list_no < nlist; list_no++) {
339  const std::vector<idx_t> & ids_in = ids[list_no];
340  std::vector<idx_t> & ids_out = other.ids[list_no];
341  const std::vector<uint8_t> & codes_in = codes[list_no];
342  std::vector<uint8_t> & codes_out = other.codes[list_no];
343 
344  for (long i = 0; i < ids_in.size(); i++) {
345  idx_t id = ids_in[i];
346  if (subset_type == 0 && a1 <= id && id < a2) {
347  ids_out.push_back (id);
348  codes_out.insert (codes_out.end(),
349  codes_in.begin() + i * code_size,
350  codes_in.begin() + (i + 1) * code_size);
351  other.ntotal++;
352  }
353  }
354  }
355 }
356 
357 
358 
359 
360 
361 /** Precomputed tables for residuals
362  *
363  * During IVFPQ search with by_residual, we compute
364  *
365  * d = || x - y_C - y_R ||^2
366  *
367  * where x is the query vector, y_C the coarse centroid, y_R the
368  * refined PQ centroid. The expression can be decomposed as:
369  *
370  * d = || x - y_C ||^2 + || y_R ||^2 + 2 * (y_C|y_R) - 2 * (x|y_R)
371  * --------------- --------------------------- -------
372  * term 1 term 2 term 3
373  *
374  * When using multiprobe, we use the following decomposition:
375  * - term 1 is the distance to the coarse centroid, that is computed
376  * during the 1st stage search.
377  * - term 2 can be precomputed, as it does not involve x. However,
378  * because of the PQ, it needs nlist * M * ksub storage. This is why
379  * use_precomputed_table is off by default
380  * - term 3 is the classical non-residual distance table.
381  *
382  * Since y_R defined by a product quantizer, it is split across
383  * subvectors and stored separately for each subvector. If the coarse
384  * quantizer is a MultiIndexQuantizer then the table can be stored
385  * more compactly.
386  *
387  * At search time, the tables for term 2 and term 3 are added up. This
388  * is faster when the length of the lists is > ksub * M.
389  */
390 
392 {
393 
394 
395  if (use_precomputed_table == 0) { // then choose the type of table
396  if (quantizer->metric_type == METRIC_INNER_PRODUCT) {
397  fprintf(stderr, "IndexIVFPQ::precompute_table: WARN precomputed "
398  "tables not supported for inner product quantizers\n");
399  return;
400  }
401  const MultiIndexQuantizer *miq =
402  dynamic_cast<const MultiIndexQuantizer *> (quantizer);
403  if (miq && pq.M % miq->pq.M == 0)
405  else
407  } // otherwise assume user has set appropriate flag on input
408 
409 
410  // squared norms of the PQ centroids
411  std::vector<float> r_norms (pq.M * pq.ksub, NAN);
412  for (int m = 0; m < pq.M; m++)
413  for (int j = 0; j < pq.ksub; j++)
414  r_norms [m * pq.ksub + j] =
416 
417  if (use_precomputed_table == 1) {
418 
419  precomputed_table.resize (nlist * pq.M * pq.ksub);
420  std::vector<float> centroid (d);
421 
422  for (size_t i = 0; i < nlist; i++) {
423  quantizer->reconstruct (i, centroid.data());
424 
425  float *tab = &precomputed_table[i * pq.M * pq.ksub];
426  pq.compute_inner_prod_table (centroid.data(), tab);
427  fvec_madd (pq.M * pq.ksub, r_norms.data(), 2.0, tab, tab);
428  }
429  } else if (use_precomputed_table == 2) {
430  const MultiIndexQuantizer *miq =
431  dynamic_cast<const MultiIndexQuantizer *> (quantizer);
432  FAISS_THROW_IF_NOT (miq);
433  const ProductQuantizer &cpq = miq->pq;
434  FAISS_THROW_IF_NOT (pq.M % cpq.M == 0);
435 
436  precomputed_table.resize(cpq.ksub * pq.M * pq.ksub);
437 
438  // reorder PQ centroid table
439  std::vector<float> centroids (d * cpq.ksub, NAN);
440 
441  for (int m = 0; m < cpq.M; m++) {
442  for (size_t i = 0; i < cpq.ksub; i++) {
443  memcpy (centroids.data() + i * d + m * cpq.dsub,
444  cpq.get_centroids (m, i),
445  sizeof (*centroids.data()) * cpq.dsub);
446  }
447  }
448 
449  pq.compute_inner_prod_tables (cpq.ksub, centroids.data (),
450  precomputed_table.data ());
451 
452  for (size_t i = 0; i < cpq.ksub; i++) {
453  float *tab = &precomputed_table[i * pq.M * pq.ksub];
454  fvec_madd (pq.M * pq.ksub, r_norms.data(), 2.0, tab, tab);
455  }
456 
457  }
458 }
459 
460 namespace {
461 
462 static uint64_t get_cycles () {
463  uint32_t high, low;
464  asm volatile("rdtsc \n\t"
465  : "=a" (low),
466  "=d" (high));
467  return ((uint64_t)high << 32) | (low);
468 }
469 
470 #define TIC t0 = get_cycles()
471 #define TOC get_cycles () - t0
472 
473 
474 
475 /** QueryTables manages the various ways of searching an
476  * IndexIVFPQ. The code contains a lot of branches, depending on:
477  * - metric_type: are we computing L2 or Inner product similarity?
478  * - by_residual: do we encode raw vectors or residuals?
479  * - use_precomputed_table: are x_R|x_C tables precomputed?
480  * - polysemous_ht: are we filtering with polysemous codes?
481  */
482 struct QueryTables {
483 
484  /*****************************************************
485  * General data from the IVFPQ
486  *****************************************************/
487 
488  const IndexIVFPQ & ivfpq;
489 
490  // copied from IndexIVFPQ for easier access
491  int d;
492  const ProductQuantizer & pq;
493  MetricType metric_type;
494  bool by_residual;
495  int use_precomputed_table;
496 
497  // pre-allocated data buffers
498  float * sim_table, * sim_table_2;
499  float * residual_vec, *decoded_vec;
500 
501  // single data buffer
502  std::vector<float> mem;
503 
504  // for table pointers
505  std::vector<const float *> sim_table_ptrs;
506 
507  explicit QueryTables (const IndexIVFPQ & ivfpq):
508  ivfpq(ivfpq),
509  d(ivfpq.d),
510  pq (ivfpq.pq),
511  metric_type (ivfpq.metric_type),
512  by_residual (ivfpq.by_residual),
513  use_precomputed_table (ivfpq.use_precomputed_table)
514  {
515  mem.resize (pq.ksub * pq.M * 2 + d *2);
516  sim_table = mem.data();
517  sim_table_2 = sim_table + pq.ksub * pq.M;
518  residual_vec = sim_table_2 + pq.ksub * pq.M;
519  decoded_vec = residual_vec + d;
520 
521  // for polysemous
522  if (ivfpq.polysemous_ht != 0) {
523  q_code.resize (pq.code_size);
524  }
525  init_list_cycles = 0;
526  sim_table_ptrs.resize (pq.M);
527  }
528 
529  /*****************************************************
530  * What we do when query is known
531  *****************************************************/
532 
533  // field specific to query
534  const float * qi;
535 
536  // query-specific intialization
537  void init_query (const float * qi) {
538  this->qi = qi;
539  if (metric_type == METRIC_INNER_PRODUCT)
540  init_query_IP ();
541  else
542  init_query_L2 ();
543  if (!by_residual && ivfpq.polysemous_ht != 0)
544  pq.compute_code (qi, q_code.data());
545  }
546 
547  void init_query_IP () {
548  // precompute some tables specific to the query qi
549  pq.compute_inner_prod_table (qi, sim_table);
550  // we compute negated inner products for use with the maxheap
551  for (int i = 0; i < pq.ksub * pq.M; i++) {
552  sim_table[i] = - sim_table[i];
553  }
554  }
555 
556  void init_query_L2 () {
557  if (!by_residual) {
558  pq.compute_distance_table (qi, sim_table);
559  } else if (use_precomputed_table) {
560  pq.compute_inner_prod_table (qi, sim_table_2);
561  }
562  }
563 
564  /*****************************************************
565  * When inverted list is known: prepare computations
566  *****************************************************/
567 
568  // fields specific to list
569  Index::idx_t key;
570  float coarse_dis;
571  std::vector<uint8_t> q_code;
572 
573  uint64_t init_list_cycles;
574 
575  /// once we know the query and the centroid, we can prepare the
576  /// sim_table that will be used for accumulation
577  /// and dis0, the initial value
578  float precompute_list_tables () {
579  float dis0 = 0;
580  uint64_t t0; TIC;
581  if (by_residual) {
582  if (metric_type == METRIC_INNER_PRODUCT)
583  dis0 = precompute_list_tables_IP ();
584  else
585  dis0 = precompute_list_tables_L2 ();
586  }
587  init_list_cycles += TOC;
588  return dis0;
589  }
590 
591  float precompute_list_table_pointers () {
592  float dis0 = 0;
593  uint64_t t0; TIC;
594  if (by_residual) {
595  if (metric_type == METRIC_INNER_PRODUCT)
596  FAISS_THROW_MSG ("not implemented");
597  else
598  dis0 = precompute_list_table_pointers_L2 ();
599  }
600  init_list_cycles += TOC;
601  return dis0;
602  }
603 
604  /*****************************************************
605  * compute tables for inner prod
606  *****************************************************/
607 
608  float precompute_list_tables_IP ()
609  {
610  // prepare the sim_table that will be used for accumulation
611  // and dis0, the initial value
612  ivfpq.quantizer->reconstruct (key, decoded_vec);
613  // decoded_vec = centroid
614  float dis0 = -fvec_inner_product (qi, decoded_vec, d);
615 
616  if (ivfpq.polysemous_ht) {
617  for (int i = 0; i < d; i++) {
618  residual_vec [i] = qi[i] - decoded_vec[i];
619  }
620  pq.compute_code (residual_vec, q_code.data());
621  }
622  return dis0;
623  }
624 
625 
626  /*****************************************************
627  * compute tables for L2 distance
628  *****************************************************/
629 
630  float precompute_list_tables_L2 ()
631  {
632  float dis0 = 0;
633 
634  if (use_precomputed_table == 0) {
635  ivfpq.quantizer->compute_residual (qi, residual_vec, key);
636  pq.compute_distance_table (residual_vec, sim_table);
637  } else if (use_precomputed_table == 1) {
638  dis0 = coarse_dis;
639 
640  fvec_madd (pq.M * pq.ksub,
641  &ivfpq.precomputed_table [key * pq.ksub * pq.M],
642  -2.0, sim_table_2,
643  sim_table);
644  } else if (use_precomputed_table == 2) {
645  dis0 = coarse_dis;
646 
647  const MultiIndexQuantizer *miq =
648  dynamic_cast<const MultiIndexQuantizer *> (ivfpq.quantizer);
649  FAISS_THROW_IF_NOT (miq);
650  const ProductQuantizer &cpq = miq->pq;
651  int Mf = pq.M / cpq.M;
652 
653  const float *qtab = sim_table_2; // query-specific table
654  float *ltab = sim_table; // (output) list-specific table
655 
656  long k = key;
657  for (int cm = 0; cm < cpq.M; cm++) {
658  // compute PQ index
659  int ki = k & ((uint64_t(1) << cpq.nbits) - 1);
660  k >>= cpq.nbits;
661 
662  // get corresponding table
663  const float *pc = &ivfpq.precomputed_table
664  [(ki * pq.M + cm * Mf) * pq.ksub];
665 
666  if (ivfpq.polysemous_ht == 0) {
667 
668  // sum up with query-specific table
669  fvec_madd (Mf * pq.ksub,
670  pc,
671  -2.0, qtab,
672  ltab);
673  ltab += Mf * pq.ksub;
674  qtab += Mf * pq.ksub;
675  } else {
676  for (int m = cm * Mf; m < (cm + 1) * Mf; m++) {
677  q_code[m] = fvec_madd_and_argmin
678  (pq.ksub, pc, -2, qtab, ltab);
679  pc += pq.ksub;
680  ltab += pq.ksub;
681  qtab += pq.ksub;
682  }
683  }
684 
685  }
686  }
687 
688  return dis0;
689  }
690 
691  float precompute_list_table_pointers_L2 ()
692  {
693  float dis0 = 0;
694 
695  if (use_precomputed_table == 1) {
696  dis0 = coarse_dis;
697 
698  const float * s = &ivfpq.precomputed_table [key * pq.ksub * pq.M];
699  for (int m = 0; m < pq.M; m++) {
700  sim_table_ptrs [m] = s;
701  s += pq.ksub;
702  }
703  } else if (use_precomputed_table == 2) {
704  dis0 = coarse_dis;
705 
706  const MultiIndexQuantizer *miq =
707  dynamic_cast<const MultiIndexQuantizer *> (ivfpq.quantizer);
708  FAISS_THROW_IF_NOT (miq);
709  const ProductQuantizer &cpq = miq->pq;
710  int Mf = pq.M / cpq.M;
711 
712  long k = key;
713  int m0 = 0;
714  for (int cm = 0; cm < cpq.M; cm++) {
715  int ki = k & ((uint64_t(1) << cpq.nbits) - 1);
716  k >>= cpq.nbits;
717 
718  const float *pc = &ivfpq.precomputed_table
719  [(ki * pq.M + cm * Mf) * pq.ksub];
720 
721  for (int m = m0; m < m0 + Mf; m++) {
722  sim_table_ptrs [m] = pc;
723  pc += pq.ksub;
724  }
725  m0 += Mf;
726  }
727  } else {
728  FAISS_THROW_MSG ("need precomputed tables");
729  }
730 
731  if (ivfpq.polysemous_ht) {
732  FAISS_THROW_MSG ("not implemented");
733  // Not clear that it makes sense to implemente this,
734  // because it costs M * ksub, which is what we wanted to
735  // avoid with the tables pointers.
736  }
737 
738  return dis0;
739  }
740 
741 
742 };
743 
744 
745 /*****************************************************
746  * Scaning the codes.
747  * The scanning functions call their favorite precompute_*
748  * function to precompute the tables they need.
749  *****************************************************/
750 template <typename IDType>
751 struct InvertedListScanner: QueryTables {
752 
753  const uint8_t * __restrict list_codes;
754  const IDType * list_ids;
755  size_t list_size;
756 
757  explicit InvertedListScanner (const IndexIVFPQ & ivfpq):
758  QueryTables (ivfpq)
759  {
760  FAISS_THROW_IF_NOT (pq.byte_per_idx == 1);
761  n_hamming_pass = 0;
762  }
763 
764  /// list_specific intialization
765  void init_list (Index::idx_t key, float coarse_dis,
766  size_t list_size_in, const IDType *list_ids_in,
767  const uint8_t *list_codes_in) {
768  this->key = key;
769  this->coarse_dis = coarse_dis;
770  list_size = list_size_in;
771  list_codes = list_codes_in;
772  list_ids = list_ids_in;
773  }
774 
775  /*****************************************************
776  * Scaning the codes: simple PQ scan.
777  *****************************************************/
778 
779  /// version of the scan where we use precomputed tables
780  void scan_list_with_table (
781  size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
782  {
783  float dis0 = precompute_list_tables ();
784 
785  for (size_t j = 0; j < list_size; j++) {
786 
787  float dis = dis0;
788  const float *tab = sim_table;
789 
790  for (size_t m = 0; m < pq.M; m++) {
791  dis += tab[*list_codes++];
792  tab += pq.ksub;
793  }
794 
795  if (dis < heap_sim[0]) {
796  maxheap_pop (k, heap_sim, heap_ids);
797  long id = store_pairs ? (key << 32 | j) : list_ids[j];
798  maxheap_push (k, heap_sim, heap_ids, dis, id);
799  }
800  }
801  }
802 
803 
804  /// tables are not precomputed, but pointers are provided to the
805  /// relevant X_c|x_r tables
806  void scan_list_with_pointer (
807  size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
808  {
809 
810  float dis0 = precompute_list_table_pointers ();
811 
812  for (size_t j = 0; j < list_size; j++) {
813 
814  float dis = dis0;
815  const float *tab = sim_table_2;
816 
817  for (size_t m = 0; m < pq.M; m++) {
818  int ci = *list_codes++;
819  dis += sim_table_ptrs [m][ci] - 2 * tab [ci];
820  tab += pq.ksub;
821  }
822 
823  if (dis < heap_sim[0]) {
824  maxheap_pop (k, heap_sim, heap_ids);
825  long id = store_pairs ? (key << 32 | j) : list_ids[j];
826  maxheap_push (k, heap_sim, heap_ids, dis, id);
827  }
828  }
829 
830  }
831 
832  /// nothing is precomputed: access residuals on-the-fly
833  void scan_on_the_fly_dist (
834  size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
835  {
836 
837  if (by_residual && use_precomputed_table) {
838  scan_list_with_pointer (k, heap_sim, heap_ids, store_pairs);
839  return;
840  }
841 
842  const float *dvec;
843  float dis0 = 0;
844 
845  if (by_residual) {
846  if (metric_type == METRIC_INNER_PRODUCT) {
847  ivfpq.quantizer->reconstruct (key, residual_vec);
848  dis0 = fvec_inner_product (residual_vec, qi, d);
849  } else {
850  ivfpq.quantizer->compute_residual (qi, residual_vec, key);
851  }
852  dvec = residual_vec;
853  } else {
854  dvec = qi;
855  dis0 = 0;
856  }
857 
858  for (size_t j = 0; j < list_size; j++) {
859 
860  pq.decode (list_codes, decoded_vec);
861  list_codes += pq.code_size;
862 
863  float dis;
864  if (metric_type == METRIC_INNER_PRODUCT) {
865  dis = -dis0 - fvec_inner_product (decoded_vec, qi, d);
866  } else {
867  dis = fvec_L2sqr (decoded_vec, dvec, d);
868  }
869 
870  if (dis < heap_sim[0]) {
871  maxheap_pop (k, heap_sim, heap_ids);
872  long id = store_pairs ? (key << 32 | j) : list_ids[j];
873  maxheap_push (k, heap_sim, heap_ids, dis, id);
874  }
875  }
876  }
877 
878  /*****************************************************
879  * Scanning codes with polysemous filtering
880  *****************************************************/
881 
882  // code for the query
883  size_t n_hamming_pass;
884 
885 
886  template <class HammingComputer>
887  void scan_list_polysemous_hc (
888  size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
889  {
890  float dis0 = precompute_list_tables ();
891  int ht = ivfpq.polysemous_ht;
892 
893  int code_size = pq.code_size;
894 
895  HammingComputer hc (q_code.data(), code_size);
896 
897  for (size_t j = 0; j < list_size; j++) {
898  const uint8_t *b_code = list_codes;
899  int hd = hc.hamming (b_code);
900  if (hd < ht) {
901  n_hamming_pass ++;
902 
903  float dis = dis0;
904  const float *tab = sim_table;
905 
906  for (size_t m = 0; m < pq.M; m++) {
907  dis += tab[*b_code++];
908  tab += pq.ksub;
909  }
910 
911  if (dis < heap_sim[0]) {
912  maxheap_pop (k, heap_sim, heap_ids);
913  long id = store_pairs ? (key << 32 | j) : list_ids[j];
914  maxheap_push (k, heap_sim, heap_ids, dis, id);
915  }
916  }
917  list_codes += code_size;
918  }
919  }
920 
921  void scan_list_polysemous (
922  size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
923  {
924  switch (pq.code_size) {
925 #define HANDLE_CODE_SIZE(cs) \
926  case cs: \
927  scan_list_polysemous_hc <HammingComputer ## cs> \
928  (k, heap_sim, heap_ids, store_pairs); \
929  break
930  HANDLE_CODE_SIZE(4);
931  HANDLE_CODE_SIZE(8);
932  HANDLE_CODE_SIZE(16);
933  HANDLE_CODE_SIZE(20);
934  HANDLE_CODE_SIZE(32);
935  HANDLE_CODE_SIZE(64);
936 #undef HANDLE_CODE_SIZE
937  default:
938  if (pq.code_size % 8 == 0)
939  scan_list_polysemous_hc <HammingComputerM8>
940  (k, heap_sim, heap_ids, store_pairs);
941  else
942  scan_list_polysemous_hc <HammingComputerM4>
943  (k, heap_sim, heap_ids, store_pairs);
944  break;
945  }
946  }
947 
948 };
949 
950 
951 
952 
953 } // anonymous namespace
954 
955 
956 IndexIVFPQStats indexIVFPQ_stats;
957 
958 void IndexIVFPQStats::reset () {
959  memset (this, 0, sizeof (*this));
960 }
961 
962 
964  size_t nx,
965  const float * qx,
966  const long * keys,
967  const float * coarse_dis,
968  float_maxheap_array_t * res,
969  bool store_pairs) const
970 {
971  const size_t k = res->k;
972 
973 #pragma omp parallel
974  {
975  InvertedListScanner<long> qt (*this);
976  size_t stats_nlist = 0;
977  size_t stats_ncode = 0;
978  uint64_t init_query_cycles = 0;
979  uint64_t scan_cycles = 0;
980  uint64_t heap_cycles = 0;
981 
982 #pragma omp for
983  for (size_t i = 0; i < nx; i++) {
984  const float *qi = qx + i * d;
985  const long * keysi = keys + i * nprobe;
986  const float *coarse_dis_i = coarse_dis + i * nprobe;
987  float * heap_sim = res->get_val (i);
988  long * heap_ids = res->get_ids (i);
989 
990  uint64_t t0;
991  TIC;
992  maxheap_heapify (k, heap_sim, heap_ids);
993  heap_cycles += TOC;
994 
995  TIC;
996  qt.init_query (qi);
997  init_query_cycles += TOC;
998 
999  size_t nscan = 0;
1000 
1001  for (size_t ik = 0; ik < nprobe; ik++) {
1002  long key = keysi[ik]; /* select the list */
1003  if (key < 0) {
1004  // not enough centroids for multiprobe
1005  continue;
1006  }
1007  if (key >= (long) nlist) {
1008  fprintf (stderr, "Invalid key=%ld nlist=%ld\n", key, nlist);
1009  throw;
1010  }
1011  size_t list_size = ids[key].size();
1012  stats_nlist ++;
1013  nscan += list_size;
1014 
1015  if (list_size == 0) continue;
1016 
1017  qt.init_list (key, coarse_dis_i[ik],
1018  list_size, ids[key].data(),
1019  codes[key].data());
1020 
1021  TIC;
1022  if (polysemous_ht > 0) {
1023  qt.scan_list_polysemous
1024  (k, heap_sim, heap_ids, store_pairs);
1025  } else if (list_size > scan_table_threshold) {
1026  qt.scan_list_with_table (k, heap_sim, heap_ids, store_pairs);
1027  } else {
1028  qt.scan_on_the_fly_dist (k, heap_sim, heap_ids, store_pairs);
1029  }
1030  scan_cycles += TOC;
1031 
1032  if (max_codes && nscan >= max_codes) break;
1033  }
1034  stats_ncode += nscan;
1035  TIC;
1036  maxheap_reorder (k, heap_sim, heap_ids);
1037 
1038  if (metric_type == METRIC_INNER_PRODUCT) {
1039  for (size_t j = 0; j < k; j++)
1040  heap_sim[j] = -heap_sim[j];
1041  }
1042  heap_cycles += TOC;
1043  }
1044 
1045 #pragma omp critical
1046  {
1047  indexIVFPQ_stats.n_hamming_pass += qt.n_hamming_pass;
1048  indexIVFPQ_stats.nlist += stats_nlist;
1049  indexIVFPQ_stats.ncode += stats_ncode;
1050 
1051  indexIVFPQ_stats.init_query_cycles += init_query_cycles;
1052  indexIVFPQ_stats.init_list_cycles += qt.init_list_cycles;
1053  indexIVFPQ_stats.scan_cycles += scan_cycles - qt.init_list_cycles;
1054  indexIVFPQ_stats.heap_cycles += heap_cycles;
1055  }
1056 
1057  }
1058  indexIVFPQ_stats.nq += nx;
1059 }
1060 
1061 
1062 void IndexIVFPQ::search (idx_t n, const float *x, idx_t k,
1063  float *distances, idx_t *labels) const
1064 {
1065  long * idx = new long [n * nprobe];
1066  ScopeDeleter<long> del (idx);
1067  float * coarse_dis = new float [n * nprobe];
1068  ScopeDeleter<float> del2 (coarse_dis);
1069 
1070  uint64_t t0;
1071  TIC;
1072  quantizer->search (n, x, nprobe, coarse_dis, idx);
1073  indexIVFPQ_stats.assign_cycles += TOC;
1074 
1075  TIC;
1076  float_maxheap_array_t res = { size_t(n), size_t(k), labels, distances};
1077 
1078  search_knn_with_key (n, x, idx, coarse_dis, &res);
1079  indexIVFPQ_stats.search_cycles += TOC;
1080 }
1081 
1082 
1084 {
1085  IndexIVF::reset();
1086  for (size_t key = 0; key < nlist; key++) {
1087  codes[key].clear();
1088  }
1089 }
1090 
1092 {
1093  FAISS_THROW_IF_NOT_MSG (!maintain_direct_map,
1094  "direct map remove not implemented");
1095  long nremove = 0;
1096 #pragma omp parallel for reduction(+: nremove)
1097  for (long i = 0; i < nlist; i++) {
1098  std::vector<idx_t> & idsi = ids[i];
1099  uint8_t * codesi = codes[i].data();
1100 
1101  long l = idsi.size(), j = 0;
1102  while (j < l) {
1103  if (sel.is_member (idsi[j])) {
1104  l--;
1105  idsi [j] = idsi [l];
1106  memmove (codesi + j * code_size,
1107  codesi + l * code_size, code_size);
1108  } else {
1109  j++;
1110  }
1111  }
1112  if (l < idsi.size()) {
1113  nremove += idsi.size() - l;
1114  idsi.resize (l);
1115  codes[i].resize (l * code_size);
1116  }
1117  }
1118  ntotal -= nremove;
1119  return nremove;
1120 }
1121 
1122 
1123 IndexIVFPQ::IndexIVFPQ ()
1124 {
1125  // initialize some runtime values
1128  do_polysemous_training = false;
1129  polysemous_ht = 0;
1130  max_codes = 0;
1131  polysemous_training = nullptr;
1132 }
1133 
1134 
1135 struct CodeCmp {
1136  const uint8_t *tab;
1137  size_t code_size;
1138  bool operator () (int a, int b) const {
1139  return cmp (a, b) > 0;
1140  }
1141  int cmp (int a, int b) const {
1142  return memcmp (tab + a * code_size, tab + b * code_size,
1143  code_size);
1144  }
1145 };
1146 
1147 
1148 size_t IndexIVFPQ::find_duplicates (idx_t *dup_ids, size_t *lims) const
1149 {
1150  size_t ngroup = 0;
1151  lims[0] = 0;
1152  for (size_t list_no = 0; list_no < nlist; list_no++) {
1153  size_t n = ids[list_no].size();
1154  std::vector<int> ord (n);
1155  for (int i = 0; i < n; i++) ord[i] = i;
1156  CodeCmp cs = { codes[list_no].data(), code_size };
1157  std::sort (ord.begin(), ord.end(), cs);
1158 
1159  const idx_t *list_ids = ids[list_no].data();
1160  int prev = -1; // all elements from prev to i-1 are equal
1161  for (int i = 0; i < n; i++) {
1162  if (prev >= 0 && cs.cmp (ord [prev], ord [i]) == 0) {
1163  // same as previous => remember
1164  if (prev + 1 == i) { // start new group
1165  ngroup++;
1166  lims[ngroup] = lims[ngroup - 1];
1167  dup_ids [lims [ngroup]++] = list_ids [ord [prev]];
1168  }
1169  dup_ids [lims [ngroup]++] = list_ids [ord [i]];
1170  } else { // not same as previous.
1171  prev = i;
1172  }
1173  }
1174  }
1175  return ngroup;
1176 }
1177 
1178 
1179 
1180 
1181 /*****************************************
1182  * IndexIVFPQR implementation
1183  ******************************************/
1184 
1185 IndexIVFPQR::IndexIVFPQR (
1186  Index * quantizer, size_t d, size_t nlist,
1187  size_t M, size_t nbits_per_idx,
1188  size_t M_refine, size_t nbits_per_idx_refine):
1189  IndexIVFPQ (quantizer, d, nlist, M, nbits_per_idx),
1190  refine_pq (d, M_refine, nbits_per_idx_refine),
1191  k_factor (4)
1192 {
1193  by_residual = true;
1194 }
1195 
1196 IndexIVFPQR::IndexIVFPQR ():
1197  k_factor (1)
1198 {
1199  by_residual = true;
1200 }
1201 
1202 
1203 
1205 {
1207  refine_codes.clear();
1208 }
1209 
1210 
1211 
1212 
1213 void IndexIVFPQR::train_residual (idx_t n, const float *x)
1214 {
1215 
1216  float * residual_2 = new float [n * d];
1217  ScopeDeleter <float> del(residual_2);
1218 
1219  train_residual_o (n, x, residual_2);
1220 
1221  if (verbose)
1222  printf ("training %zdx%zd 2nd level PQ quantizer on %ld %dD-vectors\n",
1223  refine_pq.M, refine_pq.ksub, n, d);
1224 
1226  refine_pq.cp.verbose = verbose;
1227 
1228  refine_pq.train (n, residual_2);
1229 
1230 }
1231 
1232 
1233 void IndexIVFPQR::add_with_ids (idx_t n, const float *x, const long *xids) {
1234  add_core (n, x, xids, nullptr);
1235 }
1236 
1237 void IndexIVFPQR::add_core (idx_t n, const float *x, const long *xids,
1238  const long *precomputed_idx) {
1239 
1240  float * residual_2 = new float [n * d];
1241  ScopeDeleter <float> del(residual_2);
1242 
1243  idx_t n0 = ntotal;
1244 
1245  add_core_o (n, x, xids, residual_2, precomputed_idx);
1246 
1248 
1250  residual_2, &refine_codes[n0 * refine_pq.code_size], n);
1251 
1252 
1253 }
1254 
1255 
1257  idx_t n, const float *x, idx_t k,
1258  float *distances, idx_t *labels) const
1259 {
1260  FAISS_THROW_IF_NOT (is_trained);
1261  long * idx = new long [n * nprobe];
1262  ScopeDeleter<long> del (idx);
1263  float * L1_dis = new float [n * nprobe];
1264  ScopeDeleter<float> del2 (L1_dis);
1265  uint64_t t0;
1266  TIC;
1267  quantizer->search (n, x, nprobe, L1_dis, idx);
1268  indexIVFPQ_stats.assign_cycles += TOC;
1269 
1270  TIC;
1271  size_t k_coarse = long(k * k_factor);
1272  idx_t *coarse_labels = new idx_t [k_coarse * n];
1273  ScopeDeleter<idx_t> del3 (coarse_labels);
1274  { // query with quantizer levels 1 and 2.
1275  float *coarse_distances = new float [k_coarse * n];
1276  ScopeDeleter<float> del(coarse_distances);
1277 
1278  faiss::float_maxheap_array_t res_coarse = {
1279  size_t(n), k_coarse, coarse_labels, coarse_distances};
1280  search_knn_with_key (n, x, idx, L1_dis, &res_coarse, true);
1281  }
1282 
1283 
1284  indexIVFPQ_stats.search_cycles += TOC;
1285 
1286  TIC;
1287 
1288  // 3rd level refinement
1289  size_t n_refine = 0;
1290 #pragma omp parallel reduction(+ : n_refine)
1291  {
1292  // tmp buffers
1293  float *residual_1 = new float [2 * d];
1294  ScopeDeleter<float> del (residual_1);
1295  float *residual_2 = residual_1 + d;
1296 #pragma omp for
1297  for (idx_t i = 0; i < n; i++) {
1298  const float *xq = x + i * d;
1299  const long * shortlist = coarse_labels + k_coarse * i;
1300  float * heap_sim = distances + k * i;
1301  long * heap_ids = labels + k * i;
1302  maxheap_heapify (k, heap_sim, heap_ids);
1303 
1304  for (int j = 0; j < k_coarse; j++) {
1305  long sl = shortlist[j];
1306 
1307  if (sl == -1) continue;
1308 
1309  int list_no = sl >> 32;
1310  int ofs = sl & 0xffffffff;
1311 
1312  assert (list_no >= 0 && list_no < nlist);
1313  assert (ofs >= 0 && ofs < ids[list_no].size());
1314 
1315  // 1st level residual
1316  quantizer->compute_residual (xq, residual_1, list_no);
1317 
1318  // 2nd level residual
1319  const uint8_t * l2code = &codes[list_no][ofs * pq.code_size];
1320  pq.decode (l2code, residual_2);
1321  for (int l = 0; l < d; l++)
1322  residual_2[l] = residual_1[l] - residual_2[l];
1323 
1324  // 3rd level residual's approximation
1325  idx_t id = ids[list_no][ofs];
1326  assert (0 <= id && id < ntotal);
1328  residual_1);
1329 
1330  float dis = fvec_L2sqr (residual_1, residual_2, d);
1331 
1332  if (dis < heap_sim[0]) {
1333  maxheap_pop (k, heap_sim, heap_ids);
1334  maxheap_push (k, heap_sim, heap_ids, dis, id);
1335  }
1336  n_refine ++;
1337  }
1338  maxheap_reorder (k, heap_sim, heap_ids);
1339  }
1340  }
1341  indexIVFPQ_stats.nrefine += n_refine;
1342  indexIVFPQ_stats.refine_cycles += TOC;
1343 }
1344 
1345 void IndexIVFPQR::reconstruct_n (idx_t i0, idx_t ni, float *recons) const
1346 {
1347  std::vector<float> r3 (d);
1348 
1349  IndexIVFPQ::reconstruct_n (i0, ni, recons);
1350 
1351  for (idx_t i = i0; i < i0 + ni; i++) {
1352  float *r = recons + i * d;
1353  refine_pq.decode (&refine_codes [i * refine_pq.code_size], r3.data());
1354 
1355  for (int j = 0; j < d; j++)
1356  r[j] += r3[j];
1357 
1358  }
1359 
1360 }
1361 
1363 {
1364  IndexIVFPQR &other = dynamic_cast<IndexIVFPQR &> (other_in);
1366  refine_codes.insert (refine_codes.end(),
1367  other.refine_codes.begin(), other.refine_codes.end());
1368  other.refine_codes.clear();
1369 }
1370 
1371 long IndexIVFPQR::remove_ids(const IDSelector& /*sel*/) {
1372  FAISS_THROW_MSG("not implemented");
1373  return 0;
1374 }
1375 
1376 /*****************************************
1377  * IndexIVFPQCompact implementation
1378  ******************************************/
1379 
1380 IndexIVFPQCompact::IndexIVFPQCompact ()
1381 {
1382  alloc_type = Alloc_type_none;
1383  limits = nullptr;
1384  compact_ids = nullptr;
1385  compact_codes = nullptr;
1386 }
1387 
1388 
1389 IndexIVFPQCompact::IndexIVFPQCompact (const IndexIVFPQ &other)
1390 {
1391  FAISS_THROW_IF_NOT_MSG (other.ntotal < (1UL << 31),
1392  "IndexIVFPQCompact cannot store more than 2G images");
1393 
1394  // here it would be more convenient to just use the
1395  // copy-constructor, but it would copy the lists as well: too much
1396  // overhead...
1397 
1398  // copy fields from Index
1399  d = other.d;
1400  ntotal = other.ntotal;
1401  verbose = other.verbose;
1402  is_trained = other.is_trained;
1403  metric_type = other.metric_type;
1404 
1405  // copy fields from IndexIVF (except ids)
1406  nlist = other.nlist;
1407  nprobe = other.nprobe;
1408  quantizer = other.quantizer;
1409  quantizer_trains_alone = other.quantizer_trains_alone;
1410  own_fields = false;
1411  direct_map = other.direct_map;
1412 
1413  // copy fields from IndexIVFPQ (except codes)
1414  by_residual = other.by_residual;
1415  use_precomputed_table = other.use_precomputed_table;
1416  precomputed_table = other.precomputed_table;
1417  code_size = other.code_size;
1418  pq = other.pq;
1419  do_polysemous_training = other.do_polysemous_training;
1420  polysemous_training = nullptr;
1421 
1422  scan_table_threshold = other.scan_table_threshold;
1423  max_codes = other.max_codes;
1424  polysemous_ht = other.polysemous_ht;
1425 
1426  //allocate
1427  alloc_type = Alloc_type_new;
1428  limits = new uint32_t [nlist + 1];
1429  compact_ids = new uint32_t [ntotal];
1430  compact_codes = new uint8_t [ntotal * code_size];
1431 
1432 
1433  // copy content from other
1434  size_t ofs = 0;
1435  for (size_t i = 0; i < nlist; i++) {
1436  limits [i] = ofs;
1437  const std::vector<long> &other_ids = other.ids[i];
1438  for (size_t j = 0; j < other_ids.size(); j++) {
1439  long id = other_ids[j];
1440  FAISS_THROW_IF_NOT_MSG (id < (1UL << 31),
1441  "IndexIVFPQCompact cannot store ids > 2G");
1442  compact_ids[ofs + j] = id;
1443  }
1444  memcpy (compact_codes + ofs * code_size,
1445  other.codes[i].data(),
1446  other.codes[i].size());
1447  ofs += other_ids.size();
1448  }
1449  FAISS_THROW_IF_NOT (ofs == ntotal);
1450  limits [nlist] = ofs;
1451 
1452 }
1453 
1454 void IndexIVFPQCompact::add (idx_t, const float *) {
1455  FAISS_THROW_MSG ("cannot add to an IndexIVFPQCompact");
1456 }
1457 
1459  FAISS_THROW_MSG ("cannot reset an IndexIVFPQCompact");
1460 }
1461 
1462 void IndexIVFPQCompact::train (idx_t, const float *) {
1463  FAISS_THROW_MSG ("cannot train an IndexIVFPQCompact");
1464 }
1465 
1466 
1467 
1468 
1469 IndexIVFPQCompact::~IndexIVFPQCompact ()
1470 {
1471  if (alloc_type == Alloc_type_new) {
1472  delete [] limits;
1473  delete [] compact_codes;
1474  delete [] compact_ids;
1475  } else if (alloc_type == Alloc_type_mmap) {
1476  munmap (mmap_buffer, mmap_length);
1477 
1478  }
1479 
1480 }
1481 
1483  size_t nx,
1484  const float * qx,
1485  const long * keys,
1486  const float * coarse_dis,
1487  float_maxheap_array_t * res,
1488  bool store_pairs) const
1489 {
1490  const size_t k = res->k;
1491 
1492 #pragma omp parallel
1493  {
1494  InvertedListScanner<uint32_t> qt (*this);
1495  size_t stats_nlist = 0;
1496  size_t stats_ncode = 0;
1497  uint64_t init_query_cycles = 0;
1498  uint64_t scan_cycles = 0;
1499  uint64_t heap_cycles = 0;
1500 
1501 #pragma omp for
1502  for (size_t i = 0; i < nx; i++) {
1503  const float *qi = qx + i * d;
1504  const long * keysi = keys + i * nprobe;
1505  const float *coarse_dis_i = coarse_dis + i * nprobe;
1506  float * heap_sim = res->get_val (i);
1507  long * heap_ids = res->get_ids (i);
1508 
1509  uint64_t t0;
1510  TIC;
1511  maxheap_heapify (k, heap_sim, heap_ids);
1512  heap_cycles += TOC;
1513 
1514  TIC;
1515  qt.init_query (qi);
1516  init_query_cycles += TOC;
1517 
1518  size_t nscan = 0;
1519 
1520  for (size_t ik = 0; ik < nprobe; ik++) {
1521  long key = keysi[ik]; /* select the list */
1522  if (key < 0) {
1523  // not enough centroids for multiprobe
1524  continue;
1525  }
1526  if (key >= (long) nlist) {
1527  fprintf (stderr, "Invalid key=%ld nlist=%ld\n", key, nlist);
1528  throw;
1529  }
1530  size_t list_size = limits[key + 1] - limits[key];
1531  stats_nlist ++;
1532  nscan += list_size;
1533 
1534  if (list_size == 0) continue;
1535 
1536  qt.init_list (key, coarse_dis_i[ik],
1537  list_size, compact_ids + limits[key],
1538  compact_codes + limits[key] * code_size);
1539 
1540  TIC;
1541  if (polysemous_ht > 0) {
1542  qt.scan_list_polysemous
1543  (k, heap_sim, heap_ids, store_pairs);
1544  } else if (list_size > scan_table_threshold) {
1545  qt.scan_list_with_table (k, heap_sim, heap_ids, store_pairs);
1546  } else {
1547  qt.scan_on_the_fly_dist (k, heap_sim, heap_ids, store_pairs);
1548  }
1549  scan_cycles += TOC;
1550 
1551  if (max_codes && nscan >= max_codes) break;
1552  }
1553  stats_ncode += nscan;
1554  TIC;
1555  maxheap_reorder (k, heap_sim, heap_ids);
1556 
1557  if (metric_type == METRIC_INNER_PRODUCT) {
1558  for (size_t j = 0; j < k; j++) {
1559  heap_sim[i] = -heap_sim[i];
1560  }
1561  }
1562  heap_cycles += TOC;
1563  }
1564 
1565 #pragma omp critical
1566  {
1567  indexIVFPQ_stats.n_hamming_pass += qt.n_hamming_pass;
1568  indexIVFPQ_stats.nlist += stats_nlist;
1569  indexIVFPQ_stats.ncode += stats_ncode;
1570 
1571  indexIVFPQ_stats.init_query_cycles += init_query_cycles;
1572  indexIVFPQ_stats.init_list_cycles += qt.init_list_cycles;
1573  indexIVFPQ_stats.scan_cycles += scan_cycles - qt.init_list_cycles;
1574  indexIVFPQ_stats.heap_cycles += heap_cycles;
1575  }
1576 
1577  }
1578  indexIVFPQ_stats.nq += nx;
1579 }
1580 
1581 
1582 
1583 } // namespace faiss
uint32_t * compact_ids
size ntotal
Definition: IndexIVFPQ.h:256
uint8_t * compact_codes
size ntotal * code_size
Definition: IndexIVFPQ.h:257
void precompute_table()
build precomputed table
Definition: IndexIVFPQ.cpp:391
void copy_subset_to(IndexIVFPQ &other, int subset_type, long a1, long a2) const
Definition: IndexIVFPQ.cpp:332
void reconstruct(idx_t key, float *recons) const override
Definition: IndexIVFPQ.cpp:302
void decode(const uint8_t *code, float *x) const
decode a vector from a given code (or n vectors if third argument)
ProductQuantizer refine_pq
3rd level quantizer
Definition: IndexIVFPQ.h:199
float fvec_L2sqr(const float *x, const float *y, size_t d)
Squared L2 distance between two vectors.
Definition: utils.cpp:481
PolysemousTraining * polysemous_training
if NULL, use default
Definition: IndexIVFPQ.h:36
T * get_val(size_t key)
Return the list of values for a heap.
Definition: Heap.h:360
void add(idx_t, const float *) override
the three following functions will fail at runtime
void search_knn_with_key(size_t nx, const float *qx, const long *keys, const float *coarse_dis, float_maxheap_array_t *res, bool store_pairs=false) const override
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
const float * fvecs_maybe_subsample(size_t d, size_t *n, size_t nmax, const float *x, bool verbose, long seed)
Definition: utils.cpp:1941
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:47
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:23
bool quantizer_trains_alone
just pass over the trainset to quantizer
Definition: IndexIVF.h:50
void merge_from_residuals(IndexIVF &other) override
used to implement merging
Definition: IndexIVFPQ.cpp:322
void decode_multiple(size_t n, const long *keys, const uint8_t *xcodes, float *x) const
inverse of encode_multiple
Definition: IndexIVFPQ.cpp:168
void train_residual_o(idx_t n, const float *x, float *residuals_2)
same as train_residual, also output 2nd level residuals
Definition: IndexIVFPQ.cpp:72
bool do_polysemous_training
reorder PQ centroids after training?
Definition: IndexIVFPQ.h:35
size_t scan_table_threshold
use table computation or on-the-fly?
Definition: IndexIVFPQ.h:39
size_t k
allocated size per heap
Definition: Heap.h:355
void train_residual(idx_t n, const float *x) override
trains the two product quantizers
void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx=nullptr)
same as add_with_ids, but optionally use the precomputed list ids
uint32_t * limits
size nlist + 1
Definition: IndexIVFPQ.h:255
size_t dsub
dimensionality of each subvector
int seed
seed for the random number generator
Definition: Clustering.h:35
std::vector< float > precomputed_table
Definition: IndexIVFPQ.h:47
void fvec_madd(size_t n, const float *a, float bf, const float *b, float *c)
Definition: utils.cpp:1855
int polysemous_ht
Hamming thresh for polysemous filtering.
Definition: IndexIVFPQ.h:41
virtual void search_knn_with_key(size_t nx, const float *qx, const long *keys, const float *coarse_dis, float_maxheap_array_t *res, bool store_pairs=false) const
Definition: IndexIVFPQ.cpp:963
void reset() override
removes all elements from the database.
void add_with_ids(idx_t n, const float *x, const long *xids=nullptr) override
Definition: IndexIVFPQ.cpp:185
std::vector< std::vector< long > > ids
Inverted lists for indexes.
Definition: IndexIVF.h:55
void compute_codes(const float *x, uint8_t *codes, size_t n) const
same as compute_code for several vectors
int d
vector dimension
Definition: Index.h:64
Index * quantizer
quantizer that maps vectors to inverted lists
Definition: IndexIVF.h:49
size_t max_codes
max nb of codes to visit to do a query
Definition: IndexIVFPQ.h:40
std::vector< uint8_t > refine_codes
corresponding codes
Definition: IndexIVFPQ.h:200
size_t code_size
byte per indexed vector
long remove_ids(const IDSelector &sel) override
void train_residual(idx_t n, const float *x) override
trains the product quantizer
Definition: IndexIVFPQ.cpp:66
void encode_multiple(size_t n, long *keys, const float *x, uint8_t *codes, bool compute_keys=false) const
Definition: IndexIVFPQ.cpp:149
void train(idx_t, const float *) override
Trains the quantizer and calls train_residual to train sub-quantizers.
bool own_fields
whether object owns the quantizer
Definition: IndexIVF.h:51
size_t ksub
number of centroids for each subquantizer
long idx_t
all indices are this type
Definition: Index.h:62
void compute_code(const float *x, uint8_t *code) const
Quantize one vector with the product quantizer.
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:65
void reset() override
removes all elements from the database.
bool verbose
verbosity level
Definition: Index.h:66
void reset() override
removes all elements from the database.
Definition: IndexIVF.cpp:93
double getmillisecs()
ms elapsed since some arbitrary epoch
Definition: utils.cpp:70
optimizes the order of indices in a ProductQuantizer
float fvec_norm_L2sqr(const float *x, size_t d)
Definition: utils.cpp:538
ClusteringParameters cp
parameters used during clustering
void merge_from_residuals(IndexIVF &other) override
used to implement merging
bool by_residual
Encode residual or plain vector?
Definition: IndexIVFPQ.h:30
TI * get_ids(size_t key)
Correspponding identifiers.
Definition: Heap.h:363
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:72
ProductQuantizer pq
produces the codes
Definition: IndexIVFPQ.h:33
size_t M
number of subquantizers
size_t nlist
number of possible key values
Definition: IndexIVF.h:46
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
Definition: IndexIVFPQ.cpp:273
void add_core_o(idx_t n, const float *x, const long *xids, float *residuals_2, const long *precomputed_idx=nullptr)
Definition: IndexIVFPQ.cpp:191
int fvec_madd_and_argmin(size_t n, const float *a, float bf, const float *b, float *c)
Definition: utils.cpp:1929
size_t code_size
code size per vector in bytes
Definition: IndexIVFPQ.h:32
long remove_ids(const IDSelector &sel) override
void reset() override
removes all elements from the database.
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:69
float * get_centroids(size_t m, size_t i)
return the centroids associated with subvector m
bool maintain_direct_map
map for direct access to the elements. Enables reconstruct().
Definition: IndexIVF.h:58
void optimize_pq_for_hamming(ProductQuantizer &pq, size_t n, const float *x) const
int max_points_per_centroid
to limit size of dataset
Definition: Clustering.h:33
bool verbose
verbose during training?
void add_with_ids(idx_t n, const float *x, const long *xids) override
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
size_t find_duplicates(idx_t *ids, size_t *lims) const
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:43
float k_factor
factor between k requested in search and the k requested from the IVFPQ
Definition: IndexIVFPQ.h:203
int use_precomputed_table
if by_residual, build precompute tables
Definition: IndexIVFPQ.h:31