Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/IndexPQ.cpp
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 /* Copyright 2004-present Facebook. All Rights Reserved.
11  Index based on product quantiztion.
12 */
13 
14 #include "IndexPQ.h"
15 
16 
17 #include <cstddef>
18 #include <cstring>
19 #include <cstdio>
20 
21 #include <algorithm>
22 
23 #include "FaissAssert.h"
24 #include "hamming.h"
25 
26 namespace faiss {
27 
28 /*********************************************************
29  * IndexPQ implementation
30  ********************************************************/
31 
32 
33 IndexPQ::IndexPQ (int d, size_t M, size_t nbits, MetricType metric):
34  Index(d, metric), pq(d, M, nbits)
35 {
36  is_trained = false;
37  do_polysemous_training = false;
38  set_typename();
39  polysemous_ht = nbits * M + 1;
40  search_type = ST_PQ;
41  encode_signs = false;
42 }
43 
44 IndexPQ::IndexPQ ()
45 {
46  metric_type = METRIC_L2;
47  is_trained = false;
48  do_polysemous_training = false;
49  set_typename();
50  polysemous_ht = pq.nbits * pq.M + 1;
51  search_type = ST_PQ;
52  encode_signs = false;
53 }
54 
55 
56 void IndexPQ::set_typename ()
57 {
58  std::stringstream s;
59  s << "PQ_" << pq.M << "x" << pq.nbits;
60  index_typename = s.str();
61 }
62 
63 
64 void IndexPQ::train (idx_t n, const float *x)
65 {
66  if (!do_polysemous_training) { // standard training
67  pq.train(n, x);
68  } else {
69  idx_t ntrain_perm = polysemous_training.ntrain_permutation;
70 
71  if (ntrain_perm > n / 4)
72  ntrain_perm = n / 4;
73  if (verbose) {
74  printf ("PQ training on %ld points, remains %ld points: "
75  "training polysemous on %s\n",
76  n - ntrain_perm, ntrain_perm,
77  ntrain_perm == 0 ? "centroids" : "these");
78  }
79  pq.train(n - ntrain_perm, x);
80 
82  pq, ntrain_perm, x + (n - ntrain_perm) * d);
83  }
84  is_trained = true;
85 }
86 
87 
88 void IndexPQ::add (idx_t n, const float *x)
89 {
90  FAISS_ASSERT (is_trained);
91  codes.resize ((n + ntotal) * pq.code_size);
93  ntotal += n;
94 }
95 
96 
97 
99 {
100  codes.clear();
101  ntotal = 0;
102 }
103 
104 void IndexPQ::reconstruct_n (idx_t i0, idx_t ni, float *recons) const
105 {
106  FAISS_ASSERT (ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
107  for (idx_t i = 0; i < ni; i++) {
108  const uint8_t * code = &codes[(i0 + i) * pq.code_size];
109  pq.decode (code, recons + i * d);
110  }
111 }
112 
113 
114 void IndexPQ::reconstruct (idx_t key, float * recons) const
115 {
116  FAISS_ASSERT (key >= 0 && key < ntotal);
117  pq.decode (&codes[key * pq.code_size], recons);
118 }
119 
120 
121 
122 
123 
124 
125 
126 /*****************************************
127  * IndexPQ polysemous search routines
128  ******************************************/
129 
130 
131 
132 
133 
134 void IndexPQ::search (idx_t n, const float *x, idx_t k,
135  float *distances, idx_t *labels) const
136 {
137  FAISS_ASSERT (is_trained);
138  if (search_type == ST_PQ) { // Simple PQ search
139 
140  if (metric_type == METRIC_L2) {
141  float_maxheap_array_t res = {
142  size_t(n), size_t(k), labels, distances };
143  pq.search (x, n, codes.data(), ntotal, &res, true);
144  } else {
145  float_minheap_array_t res = {
146  size_t(n), size_t(k), labels, distances };
147  pq.search_ip (x, n, codes.data(), ntotal, &res, true);
148  }
149  indexPQ_stats.nq += n;
150  indexPQ_stats.ncode += n * ntotal;
151 
152  } else if (search_type == ST_polysemous ||
153  search_type == ST_polysemous_generalize) {
154 
155  FAISS_ASSERT (metric_type == METRIC_L2);
156 
157  search_core_polysemous (n, x, k, distances, labels);
158 
159  } else { // code-to-code distances
160 
161  uint8_t * q_codes = new uint8_t [n * pq.code_size];
162 
163  if (!encode_signs) {
164 
165  pq.compute_codes (x, q_codes, n);
166 
167 
168 
169  } else {
170  FAISS_ASSERT (d == pq.nbits * pq.M);
171  memset (q_codes, 0, n * pq.code_size);
172  for (size_t i = 0; i < n; i++) {
173  const float *xi = x + i * d;
174  uint8_t *code = q_codes + i * pq.code_size;
175  for (int j = 0; j < d; j++)
176  if (xi[j] > 0) code [j>>3] |= 1 << (j & 7);
177  }
178  }
179 
180  if (search_type == ST_SDC) {
181 
182  float_maxheap_array_t res = {
183  size_t(n), size_t(k), labels, distances};
184 
185  pq.search_sdc (q_codes, n, codes.data(), ntotal, &res, true);
186 
187  } else {
188  int * idistances = new int [n * k];
189 
190  int_maxheap_array_t res = {
191  size_t (n), size_t (k), labels, idistances};
192 
193  if (search_type == ST_HE) {
194 
195  hammings_knn (&res, q_codes, codes.data(),
196  ntotal, pq.code_size, true);
197 
198  } else if (search_type == ST_generalized_HE) {
199 
200  generalized_hammings_knn (&res, q_codes, codes.data(),
201  ntotal, pq.code_size, true);
202  }
203 
204  // convert distances to floats
205  for (int i = 0; i < k * n; i++)
206  distances[i] = idistances[i];
207  delete [] idistances;
208  }
209 
210  delete [] q_codes;
211  indexPQ_stats.nq += n;
212  indexPQ_stats.ncode += n * ntotal;
213  }
214 }
215 
216 
217 
218 
219 
220 void IndexPQStats::reset()
221 {
222  nq = ncode = n_hamming_pass = 0;
223 }
224 
225 IndexPQStats indexPQ_stats;
226 
227 
228 template <class HammingComputer>
229 static size_t polysemous_inner_loop (
230  const IndexPQ & index,
231  const float *dis_table_qi, const uint8_t *q_code,
232  size_t k, float *heap_dis, long *heap_ids)
233 {
234 
235  int M = index.pq.M;
236  int code_size = index.pq.code_size;
237  int ksub = index.pq.ksub;
238  size_t ntotal = index.ntotal;
239  int ht = index.polysemous_ht;
240 
241  const uint8_t *b_code = index.codes.data();
242 
243  size_t n_pass_i = 0;
244 
245  HammingComputer hc (q_code, code_size);
246 
247  for (long bi = 0; bi < ntotal; bi++) {
248  int hd = hc.hamming (b_code);
249 
250  if (hd < ht) {
251  n_pass_i ++;
252 
253  float dis = 0;
254  const float * dis_table = dis_table_qi;
255  for (int m = 0; m < M; m++) {
256  dis += dis_table [b_code[m]];
257  dis_table += ksub;
258  }
259 
260  if (dis < heap_dis[0]) {
261  maxheap_pop (k, heap_dis, heap_ids);
262  maxheap_push (k, heap_dis, heap_ids, dis, bi);
263  }
264  }
265  b_code += code_size;
266  }
267  return n_pass_i;
268 }
269 
270 
271 void IndexPQ::search_core_polysemous (idx_t n, const float *x, idx_t k,
272  float *distances, idx_t *labels) const
273 {
274  FAISS_ASSERT (pq.code_size % 8 == 0);
275  FAISS_ASSERT (pq.byte_per_idx == 1);
276 
277  // PQ distance tables
278  float * dis_tables = new float [n * pq.ksub * pq.M];
279  pq.compute_distance_tables (n, x, dis_tables);
280 
281  // Hamming embedding queries
282  uint8_t * q_codes = new uint8_t [n * pq.code_size];
283 
284  if (false) {
285  pq.compute_codes (x, q_codes, n);
286  } else {
287 #pragma omp parallel for
288  for (idx_t qi = 0; qi < n; qi++) {
290  (dis_tables + qi * pq.M * pq.ksub,
291  q_codes + qi * pq.code_size);
292  }
293  }
294 
295  size_t n_pass = 0;
296 
297 #pragma omp parallel for reduction (+: n_pass)
298  for (idx_t qi = 0; qi < n; qi++) {
299  const uint8_t * q_code = q_codes + qi * pq.code_size;
300 
301  const float * dis_table_qi = dis_tables + qi * pq.M * pq.ksub;
302 
303  long * heap_ids = labels + qi * k;
304  float *heap_dis = distances + qi * k;
305  maxheap_heapify (k, heap_dis, heap_ids);
306 
307  if (search_type == ST_polysemous) {
308 
309  switch (pq.code_size) {
310  case 4:
311  n_pass += polysemous_inner_loop<HammingComputer4>
312  (*this, dis_table_qi, q_code, k, heap_dis, heap_ids);
313  break;
314  case 8:
315  n_pass += polysemous_inner_loop<HammingComputer8>
316  (*this, dis_table_qi, q_code, k, heap_dis, heap_ids);
317  break;
318  case 16:
319  n_pass += polysemous_inner_loop<HammingComputer16>
320  (*this, dis_table_qi, q_code, k, heap_dis, heap_ids);
321  break;
322  case 32:
323  n_pass += polysemous_inner_loop<HammingComputer32>
324  (*this, dis_table_qi, q_code, k, heap_dis, heap_ids);
325  break;
326  case 20:
327  n_pass += polysemous_inner_loop<HammingComputer20>
328  (*this, dis_table_qi, q_code, k, heap_dis, heap_ids);
329  break;
330  default:
331  if (pq.code_size % 8 == 0)
332  n_pass += polysemous_inner_loop<HammingComputerM8>
333  (*this, dis_table_qi, q_code, k, heap_dis, heap_ids);
334  else
335  n_pass += polysemous_inner_loop<HammingComputerM4>
336  (*this, dis_table_qi, q_code, k, heap_dis, heap_ids);
337  break;
338  }
339  } else {
340  switch (pq.code_size) {
341  case 8:
342  n_pass += polysemous_inner_loop<GenHammingComputer8>
343  (*this, dis_table_qi, q_code, k, heap_dis, heap_ids);
344  break;
345  case 16:
346  n_pass += polysemous_inner_loop<GenHammingComputer16>
347  (*this, dis_table_qi, q_code, k, heap_dis, heap_ids);
348  break;
349  case 32:
350  n_pass += polysemous_inner_loop<GenHammingComputer32>
351  (*this, dis_table_qi, q_code, k, heap_dis, heap_ids);
352  break;
353  default:
354  n_pass += polysemous_inner_loop<GenHammingComputerM8>
355  (*this, dis_table_qi, q_code, k, heap_dis, heap_ids);
356  break;
357  }
358  }
359  maxheap_reorder (k, heap_dis, heap_ids);
360  }
361 
362  indexPQ_stats.nq += n;
363  indexPQ_stats.ncode += n * ntotal;
364  indexPQ_stats.n_hamming_pass += n_pass;
365 
366  delete [] q_codes;
367  delete [] dis_tables;
368 
369 }
370 
371 
372 
373 
374 /*****************************************
375  * Stats of IndexPQ codes
376  ******************************************/
377 
378 
379 
380 
381 void IndexPQ::hamming_distance_table (idx_t n, const float *x,
382  int32_t *dis) const
383 {
384  uint8_t * q_codes = new uint8_t [n * pq.code_size];
385 
386  pq.compute_codes (x, q_codes, n);
387 
388  hammings (q_codes, codes.data(), n, ntotal, pq.code_size, dis);
389 
390  delete [] q_codes;
391 }
392 
393 
395  idx_t nb, const float *xb,
396  long *hist)
397 {
398  FAISS_ASSERT (metric_type == METRIC_L2);
399  FAISS_ASSERT (pq.code_size % 8 == 0);
400  FAISS_ASSERT (pq.byte_per_idx == 1);
401 
402  // Hamming embedding queries
403  uint8_t * q_codes = new uint8_t [n * pq.code_size];
404  pq.compute_codes (x, q_codes, n);
405 
406  uint8_t * b_codes ;
407 
408  if (xb) {
409  b_codes = new uint8_t [nb * pq.code_size];
410  pq.compute_codes (xb, b_codes, nb);
411  } else {
412  nb = ntotal;
413  b_codes = codes.data();
414  }
415  int nbits = pq.M * pq.nbits;
416  memset (hist, 0, sizeof(*hist) * (nbits + 1));
417  size_t bs = 256;
418 
419 #pragma omp parallel
420  {
421  std::vector<long> histi (nbits + 1);
422  hamdis_t *distances = new hamdis_t [nb * bs];
423 #pragma omp for
424  for (size_t q0 = 0; q0 < n; q0 += bs) {
425  // printf ("dis stats: %ld/%ld\n", q0, n);
426  size_t q1 = q0 + bs;
427  if (q1 > n) q1 = n;
428 
429  hammings (q_codes + q0 * pq.code_size, b_codes,
430  q1 - q0, nb,
431  pq.code_size, distances);
432 
433  for (size_t i = 0; i < nb * (q1 - q0); i++)
434  histi [distances [i]]++;
435  }
436 #pragma omp critical
437  {
438  for (int i = 0; i <= nbits; i++)
439  hist[i] += histi[i];
440  }
441  delete [] distances;
442  }
443 
444  delete [] q_codes;
445  if (xb)
446  delete [] b_codes;
447 
448 }
449 
450 
451 
452 
453 
454 
455 
456 
457 
458 
459 
460 
461 
462 
463 
464 
465 
466 
467 
468 
469 /*****************************************
470  * MultiIndexQuantizer
471  ******************************************/
472 
473 
474 
475 template <typename T>
476 struct ArgSort {
477  const T * x;
478  bool operator() (size_t i, size_t j) {
479  return x[i] < x[j];
480  }
481 };
482 
483 
484 /** Array that maintains a permutation of its elements so that the
485  * array's elements are sorted
486  */
487 template <typename T>
488 struct SortedArray {
489  const T * x;
490  int N;
491  std::vector<int> perm;
492 
493  explicit SortedArray (int N) {
494  this->N = N;
495  perm.resize (N);
496  }
497 
498  void init (const T*x) {
499  this->x = x;
500  for (int n = 0; n < N; n++)
501  perm[n] = n;
502  ArgSort<T> cmp = {x };
503  std::sort (perm.begin(), perm.end(), cmp);
504  }
505 
506  // get smallest value
507  T get_0 () {
508  return x[perm[0]];
509  }
510 
511  // get delta between n-smallest and n-1 -smallest
512  T get_diff (int n) {
513  return x[perm[n]] - x[perm[n - 1]];
514  }
515 
516  // remap orders counted from smallest to indices in array
517  int get_ord (int n) {
518  return perm[n];
519  }
520 };
521 
522 
523 
524 /** Array has n values. Sort the k first ones and copy the other ones
525  * into elements k..n-1
526  */
527 template <class C>
528 void partial_sort (int k, int n,
529  const typename C::T * vals, typename C::TI * perm) {
530  // insert first k elts in heap
531  for (int i = 1; i < k; i++) {
532  indirect_heap_push<C> (i + 1, vals, perm, perm[i]);
533  }
534 
535  // insert next n - k elts in heap
536  for (int i = k; i < n; i++) {
537  typename C::TI id = perm[i];
538  typename C::TI top = perm[0];
539 
540  if (C::cmp(vals[top], vals[id])) {
541  indirect_heap_pop<C> (k, vals, perm);
542  indirect_heap_push<C> (k, vals, perm, id);
543  perm[i] = top;
544  } else {
545  // nothing, elt at i is good where it is.
546  }
547  }
548 
549  // order the k first elements in heap
550  for (int i = k - 1; i > 0; i--) {
551  typename C::TI top = perm[0];
552  indirect_heap_pop<C> (i + 1, vals, perm);
553  perm[i] = top;
554  }
555 }
556 
557 /** same as SortedArray, but only the k first elements are sorted */
558 template <typename T>
560  const T * x;
561  int N;
562 
563  // type of the heap: CMax = sort ascending
564  typedef CMax<T, int> HC;
565  std::vector<int> perm;
566 
567  int k; // k elements are sorted
568 
569  int initial_k, k_factor;
570 
571  explicit SemiSortedArray (int N) {
572  this->N = N;
573  perm.resize (N);
574  perm.resize (N);
575  initial_k = 3;
576  k_factor = 4;
577  }
578 
579  void init (const T*x) {
580  this->x = x;
581  for (int n = 0; n < N; n++)
582  perm[n] = n;
583  k = 0;
584  grow (initial_k);
585  }
586 
587  /// grow the sorted part of the array to size next_k
588  void grow (int next_k) {
589  if (next_k < N) {
590  partial_sort<HC> (next_k - k, N - k, x, &perm[k]);
591  k = next_k;
592  } else { // full sort of remainder of array
593  ArgSort<T> cmp = {x };
594  std::sort (perm.begin() + k, perm.end(), cmp);
595  k = N;
596  }
597  }
598 
599  // get smallest value
600  T get_0 () {
601  return x[perm[0]];
602  }
603 
604  // get delta between n-smallest and n-1 -smallest
605  T get_diff (int n) {
606  if (n >= k) {
607  // want to keep powers of 2 - 1
608  int next_k = (k + 1) * k_factor - 1;
609  grow (next_k);
610  }
611  return x[perm[n]] - x[perm[n - 1]];
612  }
613 
614  // remap orders counted from smallest to indices in array
615  int get_ord (int n) {
616  assert (n < k);
617  return perm[n];
618  }
619 };
620 
621 
622 
623 /*****************************************
624  * Find the k smallest sums of M terms, where each term is taken in a
625  * table x of n values.
626  *
627  * A combination of terms is encoded as a scalar 0 <= t < n^M. The
628  * combination t0 ... t(M-1) that correspond to the sum
629  *
630  * sum = x[0, t0] + x[1, t1] + .... + x[M-1, t(M-1)]
631  *
632  * is encoded as
633  *
634  * t = t0 + t1 * n + t2 * n^2 + ... + t(M-1) * n^(M-1)
635  *
636  * MinSumK is an object rather than a function, so that storage can be
637  * re-used over several computations with the same sizes. use_seen is
638  * good when there may be ties in the x array and it is a concern if
639  * occasionally several t's are returned.
640  *
641  * @param x size M * n, values to add up
642  * @parms k nb of results to retrieve
643  * @param M nb of terms
644  * @param n nb of distinct values
645  * @param sums output, size k, sorted
646  * @prarm terms output, size k, with encoding as above
647  *
648  ******************************************/
649 template <typename T, class SSA, bool use_seen>
650 struct MinSumK {
651  int K; ///< nb of sums to return
652  int M; ///< nb of elements to sum up
653  int N; ///< nb of possible elements for each of the M terms
654 
655  /** the heap.
656  * We use a heap to maintain a queue of sums, with the associated
657  * terms involved in the sum.
658  */
659  typedef CMin<T, long> HC;
660  size_t heap_capacity, heap_size;
661  T *bh_val;
662  long *bh_ids;
663 
664  std::vector <SSA> ssx;
665  std::vector <long> weights;
666 
667  // all results get pushed several times. When there are ties, they
668  // are popped interleaved with others, so it is not easy to
669  // identify them. Therefore, this bit array just marks elements
670  // that were seen before.
671  std::vector <uint8_t> seen;
672 
673  MinSumK (int K, int M, int N): K(K), M(M), N(N) {
674  heap_capacity = K * M;
675  // we'll do k steps, each step pushes at most M vals
676  bh_val = new T[heap_capacity];
677  bh_ids = new long[heap_capacity];
678 
679  weights.push_back (1);
680  for (int m = 1; m < M; m++)
681  weights.push_back(weights[m - 1] * N);
682 
683  if (use_seen) {
684  long n_ids = weights.back() * N;
685  seen.resize ((n_ids + 7) / 8);
686  }
687 
688  for (int m = 0; m < M; m++)
689  ssx.push_back (SSA(N));
690 
691  }
692 
693  bool is_seen (long i) {
694  return (seen[i >> 3] >> (i & 7)) & 1;
695  }
696 
697  void mark_seen (long i) {
698  if (use_seen)
699  seen [i >> 3] |= 1 << (i & 7);
700  }
701 
702  void run (const T *x, T * sums, long * terms) {
703  heap_size = 0;
704 
705  for (int m = 0; m < M; m++)
706  ssx[m].init(x + N * m);
707 
708  { // intial result: take min for all elements
709  T sum = 0;
710  terms[0] = 0;
711  mark_seen (0);
712  for (int m = 0; m < M; m++) {
713  sum += ssx[m].get_0();
714  }
715  sums[0] = sum;
716  for (int m = 0; m < M; m++) {
717  heap_push<HC> (++heap_size, bh_val, bh_ids,
718  sum + ssx[m].get_diff(1),
719  weights[m]);
720  }
721  }
722 
723  for (int k = 1; k < K; k++) {
724  // pop smallest value from heap
725  if (use_seen) {// skip already seen elements
726  while (is_seen (bh_ids[0])) {
727  assert (heap_size > 0);
728  heap_pop<HC> (heap_size--, bh_val, bh_ids);
729  }
730  }
731  assert (heap_size > 0);
732 
733  T sum = sums[k] = bh_val[0];
734  long ti = terms[k] = bh_ids[0];
735 
736  if (use_seen) {
737  mark_seen (ti);
738  heap_pop<HC> (heap_size--, bh_val, bh_ids);
739  } else {
740  do {
741  heap_pop<HC> (heap_size--, bh_val, bh_ids);
742  } while (heap_size > 0 && bh_ids[0] == ti);
743  }
744 
745  // enqueue followers
746  long ii = ti;
747  for (int m = 0; m < M; m++) {
748  long n = ii % N;
749  ii /= N;
750  if (n + 1 >= N) continue;
751 
752  enqueue_follower (ti, m, n, sum);
753  }
754  }
755 
756  /*
757  for (int k = 0; k < K; k++)
758  for (int l = k + 1; l < K; l++)
759  assert (terms[k] != terms[l]);
760  */
761 
762  // convert indices by applying permutation
763  for (int k = 0; k < K; k++) {
764  long ii = terms[k];
765  if (use_seen) {
766  // clear seen for reuse at next loop
767  seen[ii >> 3] = 0;
768  }
769  long ti = 0;
770  for (int m = 0; m < M; m++) {
771  long n = ii % N;
772  ti += weights[m] * ssx[m].get_ord(n);
773  ii /= N;
774  }
775  terms[k] = ti;
776  }
777  }
778 
779 
780  void enqueue_follower (long ti, int m, int n, T sum) {
781  T next_sum = sum + ssx[m].get_diff(n + 1);
782  long next_ti = ti + weights[m];
783  heap_push<HC> (++heap_size, bh_val, bh_ids, next_sum, next_ti);
784  }
785 
786 
787  ~MinSumK () {
788  delete [] bh_ids;
789  delete [] bh_val;
790  }
791 };
792 
793 
794 
795 
796 MultiIndexQuantizer::MultiIndexQuantizer (int d,
797  size_t M,
798  size_t nbits):
799  Index(d, METRIC_L2), pq(d, M, nbits)
800 {
801  is_trained = false;
802  set_typename();
803 }
804 
805 
806 void MultiIndexQuantizer::set_typename()
807 {
808  std::stringstream s;
809  s << "MI_" << pq.M << "x" << pq.nbits;
810  index_typename = s.str();
811 }
812 
813 
814 void MultiIndexQuantizer::train(idx_t n, const float *x)
815 {
816  pq.train (n, x);
817  is_trained = true;
818  // count virtual elements in index
819  ntotal = 1;
820  for (int m = 0; m < pq.M; m++)
821  ntotal *= pq.ksub;
822 }
823 
824 
825 void MultiIndexQuantizer::search (idx_t n, const float *x, idx_t k,
826  float *distances, idx_t *labels) const {
827 
828  float * dis_tables = new float [n * pq.ksub * pq.M];
829 
830  pq.compute_distance_tables (n, x, dis_tables);
831 
832  /// TODO: special version for k==1
833 
834 #pragma omp parallel
835  {
836  MinSumK <float, SemiSortedArray<float>, false> msk(k, pq.M, pq.ksub);
837 #pragma omp for
838  for (int i = 0; i < n; i++) {
839  msk.run (dis_tables + i * pq.ksub * pq.M,
840  distances + i * k, labels + i * k);
841 
842  }
843  }
844  delete [] dis_tables;
845 }
846 
847 
848 void MultiIndexQuantizer::reconstruct (idx_t key, float * recons) const
849 {
850  if (pq.byte_per_idx == 1) {
851  uint8_t code[pq.M];
852  long jj = key;
853  for (int m = 0; m < pq.M; m++) {
854  long n = jj % pq.ksub;
855  jj /= pq.ksub;
856  code[m] = n;
857  }
858  pq.decode (code, recons);
859  } else if (pq.byte_per_idx == 2) {
860  uint16_t code[pq.M];
861  long jj = key;
862  for (int m = 0; m < pq.M; m++) {
863  long n = jj % pq.ksub;
864  jj /= pq.ksub;
865  code[m] = n;
866  }
867  pq.decode ((uint8_t*)code, recons);
868  } else FAISS_ASSERT(!"only 1 or 2 bytes per index supported");
869 }
870 
871 
872 void MultiIndexQuantizer::add (idx_t n, const float *x)
873 {
874  FAISS_ASSERT (!"This index has virtual elements, it does not support add");
875 }
876 
878 {
879  FAISS_ASSERT (!"This index has virtual elements, "
880  "it does not support reset");
881 }
882 
883 
884 
885 
886 } // END namespace faiss
int M
nb of elements to sum up
Definition: IndexPQ.cpp:652
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
Definition: IndexPQ.h:35
size_t nbits
number of bits per quantization index
void decode(const uint8_t *code, float *x) const
decode a vector from a given code (or n vectors if third argument)
Hamming distance on codes.
Definition: IndexPQ.h:79
bool do_polysemous_training
false = standard PQ
Definition: IndexPQ.h:71
virtual void train(idx_t n, const float *x) override
Definition: IndexPQ.cpp:64
size_t byte_per_idx
nb bytes per code component (1 or 2)
void partial_sort(int k, int n, const typename C::T *vals, typename C::TI *perm)
Definition: IndexPQ.cpp:528
CMin< T, long > HC
Definition: IndexPQ.cpp:659
void grow(int next_k)
grow the sorted part of the array to size next_k
Definition: IndexPQ.cpp:588
void compute_distance_tables(size_t nx, const float *x, float *dis_tables) const
void generalized_hammings_knn(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t code_size, int ordered)
Definition: hamming.cpp:626
void compute_code_from_distance_table(const float *tab, uint8_t *code) const
void compute_codes(const float *x, uint8_t *codes, size_t n) const
same as compute_code for several vectors
int d
vector dimension
Definition: Index.h:66
void hamming_distance_histogram(idx_t n, const float *x, idx_t nb, const float *xb, long *dist_histogram)
Definition: IndexPQ.cpp:394
void search(const float *x, size_t nx, const uint8_t *codes, const size_t ncodes, float_maxheap_array_t *res, bool init_finalize_heap=true) const
size_t code_size
byte per indexed vector
Filter on generalized Hamming.
Definition: IndexPQ.h:83
virtual void reset()
removes all elements from the database.
Definition: IndexPQ.cpp:877
size_t ksub
number of centroids for each subquantizer
void search_ip(const float *x, size_t nx, const uint8_t *codes, const size_t ncodes, float_minheap_array_t *res, bool init_finalize_heap=true) const
long idx_t
all indices are this type
Definition: Index.h:64
virtual void train(idx_t n, const float *x)
Definition: IndexPQ.cpp:814
void hammings_knn(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t ncodes, int order)
Definition: hamming.cpp:471
virtual void add(idx_t n, const float *x)
add and reset will crash at runtime
Definition: IndexPQ.cpp:872
ProductQuantizer pq
The product quantizer used to encode the vectors.
Definition: IndexPQ.h:32
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
bool verbose
verbosity level
Definition: Index.h:68
virtual void add(idx_t n, const float *x) override
Definition: IndexPQ.cpp:88
int K
nb of sums to return
Definition: IndexPQ.cpp:651
void hamming_distance_table(idx_t n, const float *x, int32_t *dis) const
Definition: IndexPQ.cpp:381
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const
Definition: IndexPQ.cpp:825
virtual void reconstruct(idx_t key, float *recons) const override
Definition: IndexPQ.cpp:114
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:74
size_t M
number of subquantizers
int N
nb of possible elements for each of the M terms
Definition: IndexPQ.cpp:653
virtual void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
Definition: IndexPQ.cpp:104
asymmetric product quantizer (default)
Definition: IndexPQ.h:78
HE filter (using ht) + PQ combination.
Definition: IndexPQ.h:82
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:71
virtual void reset() override
removes all elements from the database.
Definition: IndexPQ.cpp:98
virtual void reconstruct(idx_t key, float *recons) const
Definition: IndexPQ.cpp:848
void optimize_pq_for_hamming(ProductQuantizer &pq, size_t n, const float *x) const
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexPQ.cpp:134
symmetric product quantizer (SDC)
Definition: IndexPQ.h:81
int polysemous_ht
Hamming threshold used for polysemy.
Definition: IndexPQ.h:93
PolysemousTraining polysemous_training
parameters used for the polysemous training
Definition: IndexPQ.h:74
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:44