Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/IndexIVFScalarQuantizer.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the CC-by-NC license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #include "IndexIVFScalarQuantizer.h"
10 
11 #include <cstdio>
12 #include <algorithm>
13 
14 #include <omp.h>
15 
16 #include <immintrin.h>
17 
18 #include "utils.h"
19 
20 #include "FaissAssert.h"
21 
22 namespace faiss {
23 
24 /*******************************************************************
25  * IndexIVFScalarQuantizer implementation
26  *
27  * The main source of complexity is to support combinations of 4
28  * variants without incurring runtime tests or virtual function calls:
29  *
30  * - 4 / 8 bits per code component
31  * - uniform / non-uniform
32  * - IP / L2 distance search
33  * - scalar / AVX distance computation
34  *
35  * The appropriate Quantizer object is returned via select_quantizer
36  * that hides the template mess.
37  ********************************************************************/
38 
39 #ifdef __AVX__
40 #define USE_AVX
41 #endif
42 
43 
44 namespace {
45 
46 typedef Index::idx_t idx_t;
47 typedef ScalarQuantizer::QuantizerType QuantizerType;
48 typedef ScalarQuantizer::RangeStat RangeStat;
49 
50 
51 /*******************************************************************
52  * Codec: converts between values in [0, 1] and an index in a code
53  * array. The "i" parameter is the vector component index (not byte
54  * index).
55  */
56 
57 struct Codec8bit {
58 
59  static void encode_component (float x, uint8_t *code, int i) {
60  code[i] = (int)(255 * x);
61  }
62 
63  static float decode_component (const uint8_t *code, int i) {
64  return (code[i] + 0.5f) / 255.0f;
65  }
66 
67 #ifdef USE_AVX
68  static __m256 decode_8_components (const uint8_t *code, int i) {
69  uint64_t c8 = *(uint64_t*)(code + i);
70  __m128i c4lo = _mm_cvtepu8_epi32 (_mm_set1_epi32(c8));
71  __m128i c4hi = _mm_cvtepu8_epi32 (_mm_set1_epi32(c8 >> 32));
72  // __m256i i8 = _mm256_set_m128i(c4lo, c4hi);
73  __m256i i8 = _mm256_castsi128_si256 (c4lo);
74  i8 = _mm256_insertf128_si256 (i8, c4hi, 1);
75  __m256 f8 = _mm256_cvtepi32_ps (i8);
76  __m256 half = _mm256_set1_ps (0.5f);
77  f8 += half;
78  __m256 one_255 = _mm256_set1_ps (1.f / 255.f);
79  return f8 * one_255;
80  }
81 #endif
82 };
83 
84 
85 struct Codec4bit {
86 
87  static void encode_component (float x, uint8_t *code, int i) {
88  code [i / 2] |= (int)(x * 15.0) << ((i & 1) << 2);
89  }
90 
91  static float decode_component (const uint8_t *code, int i) {
92  return (((code[i / 2] >> ((i & 1) << 2)) & 0xf) + 0.5f) / 15.0f;
93  }
94 
95 
96 #ifdef USE_AVX
97  static __m256 decode_8_components (const uint8_t *code, int i) {
98  uint32_t c4 = *(uint32_t*)(code + (i >> 1));
99  uint32_t mask = 0x0f0f0f0f;
100  uint32_t c4ev = c4 & mask;
101  uint32_t c4od = (c4 >> 4) & mask;
102 
103  // the 8 lower bytes of c8 contain the values
104  __m128i c8 = _mm_unpacklo_epi8 (_mm_set1_epi32(c4ev),
105  _mm_set1_epi32(c4od));
106  __m128i c4lo = _mm_cvtepu8_epi32 (c8);
107  __m128i c4hi = _mm_cvtepu8_epi32 (_mm_srli_si128(c8, 4));
108  __m256i i8 = _mm256_castsi128_si256 (c4lo);
109  i8 = _mm256_insertf128_si256 (i8, c4hi, 1);
110  __m256 f8 = _mm256_cvtepi32_ps (i8);
111  __m256 half = _mm256_set1_ps (0.5f);
112  f8 += half;
113  __m256 one_255 = _mm256_set1_ps (1.f / 15.f);
114  return f8 * one_255;
115  }
116 #endif
117 };
118 
119 
120 /*******************************************************************
121  * Similarity: gets vector components and computes a similarity wrt. a
122  * query vector stored in the object
123  */
124 
125 struct SimilarityL2 {
126  const float *y, *yi;
127  explicit SimilarityL2 (const float * y): y(y) {}
128 
129 
130  /******* scalar accumulator *******/
131 
132  float accu;
133 
134  void begin () {
135  accu = 0;
136  yi = y;
137  }
138 
139  void add_component (float x) {
140  float tmp = *yi++ - x;
141  accu += tmp * tmp;
142  }
143 
144  float result () {
145  return accu;
146  }
147 
148 #ifdef USE_AVX
149  /******* AVX accumulator *******/
150 
151  __m256 accu8;
152 
153  void begin_8 () {
154  accu8 = _mm256_setzero_ps();
155  yi = y;
156  }
157 
158  void add_8_components (__m256 x) {
159  __m256 yiv = _mm256_loadu_ps (yi);
160  yi += 8;
161  __m256 tmp = yiv - x;
162  accu8 += tmp * tmp;
163  }
164 
165  float result_8 () {
166  __m256 sum = _mm256_hadd_ps(accu8, accu8);
167  __m256 sum2 = _mm256_hadd_ps(sum, sum);
168  // now add the 0th and 4th component
169  return
170  _mm_cvtss_f32 (_mm256_castps256_ps128(sum2)) +
171  _mm_cvtss_f32 (_mm256_extractf128_ps(sum2, 1));
172  }
173 #endif
174 };
175 
176 struct SimilarityIP {
177  const float *y, *yi;
178  const float accu0;
179 
180  /******* scalar accumulator *******/
181 
182  float accu;
183 
184  SimilarityIP (const float * y, float accu0):
185  y (y), accu0 (accu0) {}
186 
187  void begin () {
188  accu = accu0;
189  yi = y;
190  }
191 
192  void add_component (float x) {
193  accu += *yi++ * x;
194  }
195 
196  float result () {
197  return accu;
198  }
199 
200 #ifdef USE_AVX
201  /******* AVX accumulator *******/
202 
203  __m256 accu8;
204 
205  void begin_8 () {
206  accu8 = _mm256_setzero_ps();
207  yi = y;
208  }
209 
210  void add_8_components (__m256 x) {
211  __m256 yiv = _mm256_loadu_ps (yi);
212  yi += 8;
213  accu8 += yiv * x;
214  }
215 
216  float result_8 () {
217  __m256 sum = _mm256_hadd_ps(accu8, accu8);
218  __m256 sum2 = _mm256_hadd_ps(sum, sum);
219  // now add the 0th and 4th component
220  return
221  accu0 +
222  _mm_cvtss_f32 (_mm256_castps256_ps128(sum2)) +
223  _mm_cvtss_f32 (_mm256_extractf128_ps(sum2, 1));
224  }
225 #endif
226 };
227 
228 
229 /*******************************************************************
230  * templatized distance functions
231  */
232 
233 
234 template<class Quantizer, class Similarity>
235 float compute_distance(const Quantizer & quant, Similarity & sim,
236  const uint8_t *code)
237 {
238  sim.begin();
239  for (size_t i = 0; i < quant.d; i++) {
240  float xi = quant.reconstruct_component (code, i);
241  sim.add_component (xi);
242  }
243  return sim.result();
244 }
245 
246 #ifdef USE_AVX
247 template<class Quantizer, class Similarity>
248 float compute_distance_8(const Quantizer & quant, Similarity & sim,
249  const uint8_t *code)
250 {
251  sim.begin_8();
252  for (size_t i = 0; i < quant.d; i += 8) {
253  __m256 xi = quant.reconstruct_8_components (code, i);
254  sim.add_8_components (xi);
255  }
256  return sim.result_8();
257 }
258 #endif
259 
260 
261 /*******************************************************************
262  * Quantizer range training
263  */
264 
265 static float sqr (float x) {
266  return x * x;
267 }
268 
269 
270 void train_Uniform(RangeStat rs, float rs_arg,
271  idx_t n, int k, const float *x,
272  std::vector<float> & trained)
273 {
274  trained.resize (2);
275  float & vmin = trained[0];
276  float & vmax = trained[1];
277 
278  if (rs == ScalarQuantizer::RS_minmax) {
279  vmin = HUGE_VAL; vmax = -HUGE_VAL;
280  for (size_t i = 0; i < n; i++) {
281  if (x[i] < vmin) vmin = x[i];
282  if (x[i] > vmax) vmax = x[i];
283  }
284  float vexp = (vmax - vmin) * rs_arg;
285  vmin -= vexp;
286  vmax += vexp;
287  } else if (rs == ScalarQuantizer::RS_meanstd) {
288  double sum = 0, sum2 = 0;
289  for (size_t i = 0; i < n; i++) {
290  sum += x[i];
291  sum2 += x[i] * x[i];
292  }
293  float mean = sum / n;
294  float var = sum2 / n - mean * mean;
295  float std = var <= 0 ? 1.0 : sqrt(var);
296 
297  vmin = mean - std * rs_arg ;
298  vmax = mean + std * rs_arg ;
299  } else if (rs == ScalarQuantizer::RS_quantiles) {
300  std::vector<float> x_copy(n);
301  memcpy(x_copy.data(), x, n * sizeof(*x));
302  // TODO just do a qucikselect
303  std::sort(x_copy.begin(), x_copy.end());
304  int o = int(rs_arg * n);
305  if (o < 0) o = 0;
306  if (o > n - o) o = n / 2;
307  vmin = x_copy[o];
308  vmax = x_copy[n - 1 - o];
309 
310  } else if (rs == ScalarQuantizer::RS_optim) {
311  float a, b;
312  float sx = 0;
313  {
314  vmin = HUGE_VAL, vmax = -HUGE_VAL;
315  for (size_t i = 0; i < n; i++) {
316  if (x[i] < vmin) vmin = x[i];
317  if (x[i] > vmax) vmax = x[i];
318  sx += x[i];
319  }
320  b = vmin;
321  a = (vmax - vmin) / (k - 1);
322  }
323  int verbose = false;
324  int niter = 2000;
325  float last_err = -1;
326  int iter_last_err = 0;
327  for (int it = 0; it < niter; it++) {
328  float sn = 0, sn2 = 0, sxn = 0, err1 = 0;
329 
330  for (idx_t i = 0; i < n; i++) {
331  float xi = x[i];
332  float ni = floor ((xi - b) / a + 0.5);
333  if (ni < 0) ni = 0;
334  if (ni >= k) ni = k - 1;
335  err1 += sqr (xi - (ni * a + b));
336  sn += ni;
337  sn2 += ni * ni;
338  sxn += ni * xi;
339  }
340 
341  if (err1 == last_err) {
342  iter_last_err ++;
343  if (iter_last_err == 16) break;
344  } else {
345  last_err = err1;
346  iter_last_err = 0;
347  }
348 
349  float det = sqr (sn) - sn2 * n;
350 
351  b = (sn * sxn - sn2 * sx) / det;
352  a = (sn * sx - n * sxn) / det;
353  if (verbose) {
354  printf ("it %d, err1=%g \r", it, err1);
355  fflush(stdout);
356  }
357  }
358  if (verbose) printf("\n");
359 
360  vmin = b;
361  vmax = b + a * (k - 1);
362 
363  } else {
364  FAISS_THROW_MSG ("Invalid qtype");
365  }
366  vmax -= vmin;
367 }
368 
369 void train_NonUniform(RangeStat rs, float rs_arg,
370  idx_t n, int d, int k, const float *x,
371  std::vector<float> & trained)
372 {
373  trained.resize (2 * d);
374  float * vmin = trained.data();
375  float * vmax = trained.data() + d;
376  if (rs == ScalarQuantizer::RS_minmax) {
377  memcpy (vmin, x, sizeof(*x) * d);
378  memcpy (vmax, x, sizeof(*x) * d);
379  for (size_t i = 1; i < n; i++) {
380  const float *xi = x + i * d;
381  for (size_t j = 0; j < d; j++) {
382  if (xi[j] < vmin[j]) vmin[j] = xi[j];
383  if (xi[j] > vmax[j]) vmax[j] = xi[j];
384  }
385  }
386  float *vdiff = vmax;
387  for (size_t j = 0; j < d; j++) {
388  float vexp = (vmax[j] - vmin[j]) * rs_arg;
389  vmin[j] -= vexp;
390  vmax[j] += vexp;
391  vdiff [j] = vmax[j] - vmin[j];
392  }
393  } else {
394  // transpose
395  std::vector<float> xt(n * d);
396  for (size_t i = 1; i < n; i++) {
397  const float *xi = x + i * d;
398  for (size_t j = 0; j < d; j++) {
399  xt[j * n + i] = xi[j];
400  }
401  }
402  std::vector<float> trained_d(2);
403 #pragma omp parallel for
404  for (size_t j = 0; j < d; j++) {
405  train_Uniform(rs, rs_arg,
406  n, k, xt.data() + j * n,
407  trained_d);
408  vmin[j] = trained_d[0];
409  vmax[j] = trained_d[1];
410  }
411  }
412 }
413 
414 
415 /*******************************************************************
416  * Quantizer: normalizes scalar vector components, then passes them
417  * through a codec
418  */
419 
420 
421 
422 struct Quantizer {
423  virtual void encode_vector(const float *x, uint8_t *code) const = 0;
424  virtual void decode_vector(const uint8_t *code, float *x) const = 0;
425 
426  virtual float compute_distance_L2 (SimilarityL2 &sim,
427  const uint8_t * codes) const = 0;
428  virtual float compute_distance_IP (SimilarityIP &sim,
429  const uint8_t * codes) const = 0;
430 
431  virtual ~Quantizer() {}
432 };
433 
434 
435 
436 
437 template<class Codec>
438 struct QuantizerUniform: Quantizer {
439  const size_t d;
440  const float vmin, vdiff;
441 
442  QuantizerUniform(size_t d, const std::vector<float> &trained):
443  d(d), vmin(trained[0]), vdiff(trained[1]) {
444  }
445 
446  void encode_vector(const float* x, uint8_t* code) const override {
447  for (size_t i = 0; i < d; i++) {
448  float xi = (x[i] - vmin) / vdiff;
449  if (xi < 0)
450  xi = 0;
451  if (xi > 1.0)
452  xi = 1.0;
453  Codec::encode_component(xi, code, i);
454  }
455  }
456 
457  void decode_vector(const uint8_t* code, float* x) const override {
458  for (size_t i = 0; i < d; i++) {
459  float xi = Codec::decode_component(code, i);
460  x[i] = vmin + xi * vdiff;
461  }
462  }
463 
464  float reconstruct_component (const uint8_t * code, int i) const
465  {
466  float xi = Codec::decode_component (code, i);
467  return vmin + xi * vdiff;
468  }
469 
470 #ifdef USE_AVX
471  __m256 reconstruct_8_components (const uint8_t * code, int i) const
472  {
473  __m256 xi = Codec::decode_8_components (code, i);
474  return _mm256_set1_ps(vmin) + xi * _mm256_set1_ps (vdiff);
475  }
476 #endif
477 
478  float compute_distance_L2(SimilarityL2& sim, const uint8_t* codes)
479  const override {
480  return compute_distance(*this, sim, codes);
481  }
482 
483  float compute_distance_IP(SimilarityIP& sim, const uint8_t* codes)
484  const override {
485  return compute_distance(*this, sim, codes);
486  }
487 };
488 
489 #ifdef USE_AVX
490 template<class Codec>
491 struct QuantizerUniform8: QuantizerUniform<Codec> {
492 
493  QuantizerUniform8 (size_t d, const std::vector<float> &trained):
494  QuantizerUniform<Codec> (d, trained) {}
495 
496  float compute_distance_L2(SimilarityL2& sim, const uint8_t* codes)
497  const override {
498  return compute_distance_8(*this, sim, codes);
499  }
500 
501  float compute_distance_IP(SimilarityIP& sim, const uint8_t* codes)
502  const override {
503  return compute_distance_8(*this, sim, codes);
504  }
505 };
506 #endif
507 
508 
509 
510 
511 
512 template<class Codec>
513 struct QuantizerNonUniform: Quantizer {
514  const size_t d;
515  const float *vmin, *vdiff;
516 
517  QuantizerNonUniform(size_t d, const std::vector<float> &trained):
518  d(d), vmin(trained.data()), vdiff(trained.data() + d) {}
519 
520  void encode_vector(const float* x, uint8_t* code) const override {
521  for (size_t i = 0; i < d; i++) {
522  float xi = (x[i] - vmin[i]) / vdiff[i];
523  if (xi < 0)
524  xi = 0;
525  if (xi > 1.0)
526  xi = 1.0;
527  Codec::encode_component(xi, code, i);
528  }
529  }
530 
531  void decode_vector(const uint8_t* code, float* x) const override {
532  for (size_t i = 0; i < d; i++) {
533  float xi = Codec::decode_component(code, i);
534  x[i] = vmin[i] + xi * vdiff[i];
535  }
536  }
537 
538  float reconstruct_component (const uint8_t * code, int i) const
539  {
540  float xi = Codec::decode_component (code, i);
541  return vmin[i] + xi * vdiff[i];
542  }
543 
544 #ifdef USE_AVX
545  __m256 reconstruct_8_components (const uint8_t * code, int i) const
546  {
547  __m256 xi = Codec::decode_8_components (code, i);
548  return _mm256_loadu_ps(vmin + i) + xi * _mm256_loadu_ps (vdiff + i);
549  }
550 #endif
551 
552  float compute_distance_L2(SimilarityL2& sim, const uint8_t* codes)
553  const override {
554  return compute_distance(*this, sim, codes);
555  }
556 
557  float compute_distance_IP(SimilarityIP& sim, const uint8_t* codes)
558  const override {
559  return compute_distance(*this, sim, codes);
560  }
561 };
562 
563 #ifdef USE_AVX
564 template<class Codec>
565 struct QuantizerNonUniform8: QuantizerNonUniform<Codec> {
566 
567  QuantizerNonUniform8 (size_t d, const std::vector<float> &trained):
568  QuantizerNonUniform<Codec> (d, trained) {}
569 
570  float compute_distance_L2(SimilarityL2& sim, const uint8_t* codes)
571  const override {
572  return compute_distance_8(*this, sim, codes);
573  }
574 
575  float compute_distance_IP(SimilarityIP& sim, const uint8_t* codes)
576  const override {
577  return compute_distance_8(*this, sim, codes);
578  }
579 };
580 #endif
581 
582 
583 
584 
585 
586 Quantizer *select_quantizer (
587  QuantizerType qtype,
588  size_t d, const std::vector<float> & trained)
589 {
590 #ifdef USE_AVX
591  if (d % 8 == 0) {
592  switch(qtype) {
594  return new QuantizerNonUniform8<Codec8bit>(d, trained);
596  return new QuantizerNonUniform8<Codec4bit>(d, trained);
598  return new QuantizerUniform8<Codec8bit>(d, trained);
599  case ScalarQuantizer::QT_4bit_uniform:
600  return new QuantizerUniform8<Codec4bit>(d, trained);
601  }
602  } else
603 #endif
604  {
605  switch(qtype) {
607  return new QuantizerNonUniform<Codec8bit>(d, trained);
609  return new QuantizerNonUniform<Codec4bit>(d, trained);
611  return new QuantizerUniform<Codec8bit>(d, trained);
612  case ScalarQuantizer::QT_4bit_uniform:
613  return new QuantizerUniform<Codec4bit>(d, trained);
614  }
615  }
616  FAISS_THROW_MSG ("unknown qtype");
617  return nullptr;
618 }
619 
620 Quantizer *select_quantizer (const ScalarQuantizer &sq)
621 {
622  return select_quantizer (sq.qtype, sq.d, sq.trained);
623 }
624 
625 
626 } // anonymous namespace
627 
628 
629 
630 /*******************************************************************
631  * ScalarQuantizer implementation
632  ********************************************************************/
633 
634 ScalarQuantizer::ScalarQuantizer
635  (size_t d, QuantizerType qtype):
636  qtype (qtype), rangestat(RS_minmax), rangestat_arg(0), d (d)
637 {
638  switch (qtype) {
639  case QT_8bit: case QT_8bit_uniform:
640  code_size = d;
641  break;
642  case QT_4bit: case QT_4bit_uniform:
643  code_size = (d + 1) / 2;
644  break;
645  }
646 
647 }
648 
649 ScalarQuantizer::ScalarQuantizer ():
650  qtype(QT_8bit),
651  rangestat(RS_minmax), rangestat_arg(0), d (0), code_size(0)
652 {}
653 
654 void ScalarQuantizer::train (size_t n, const float *x)
655 {
656  int bit_per_dim =
657  qtype == QT_4bit_uniform ? 4 :
658  qtype == QT_4bit ? 4 :
659  qtype == QT_8bit_uniform ? 8 :
660  qtype == QT_8bit ? 8 : -1;
661 
662  switch (qtype) {
663  case QT_4bit_uniform: case QT_8bit_uniform:
664  train_Uniform (rangestat, rangestat_arg,
665  n * d, 1 << bit_per_dim, x, trained);
666  break;
667  case QT_4bit: case QT_8bit:
668  train_NonUniform (rangestat, rangestat_arg,
669  n, d, 1 << bit_per_dim, x, trained);
670  break;
671  }
672 }
673 
674 void ScalarQuantizer::compute_codes (const float * x,
675  uint8_t * codes,
676  size_t n) const
677 {
678  Quantizer *squant = select_quantizer (*this);
679 #pragma omp parallel for
680  for (size_t i = 0; i < n; i++)
681  squant->encode_vector (x + i * d, codes + i * code_size);
682  delete squant;
683 }
684 
685 void ScalarQuantizer::decode (const uint8_t *codes, float *x, size_t n) const
686 {
687  Quantizer *squant = select_quantizer (*this);
688 #pragma omp parallel for
689  for (size_t i = 0; i < n; i++)
690  squant->decode_vector (codes + i * code_size, x + i * d);
691  delete squant;
692 }
693 
694 
695 
696 /*******************************************************************
697  * IndexIVFScalarQuantizer implementation
698  ********************************************************************/
699 
700 IndexIVFScalarQuantizer::IndexIVFScalarQuantizer
701  (Index *quantizer, size_t d, size_t nlist,
702  QuantizerType qtype, MetricType metric):
703  IndexIVF (quantizer, d, nlist, metric),
704  sq (d, qtype)
705 {
706  code_size = sq.code_size;
707  codes.resize(nlist);
708 }
709 
710 IndexIVFScalarQuantizer::IndexIVFScalarQuantizer ():
711  IndexIVF (), code_size (0)
712 {}
713 
714 void IndexIVFScalarQuantizer::train_residual (idx_t n, const float *x)
715 {
716  long * idx = new long [n];
717  ScopeDeleter<long> del (idx);
718  quantizer->assign (n, x, idx);
719  float *residuals = new float [n * d];
720  ScopeDeleter<float> del2 (residuals);
721 
722 #pragma omp parallel for
723  for (idx_t i = 0; i < n; i++) {
724  quantizer->compute_residual (x + i * d, residuals + i * d, idx[i]);
725  }
726 
727  sq.train (n, residuals);
728 
729 }
730 
731 
733  (idx_t n, const float * x, const long *xids)
734 {
735  FAISS_THROW_IF_NOT (is_trained);
736  long * idx = new long [n];
737  ScopeDeleter<long> del (idx);
738  quantizer->assign (n, x, idx);
739  size_t nadd = 0;
740  Quantizer *squant = select_quantizer (sq);
741  ScopeDeleter1<Quantizer> del2 (squant);
742 
743 #pragma omp parallel reduction(+: nadd)
744  {
745  std::vector<float> residual (d);
746  int nt = omp_get_num_threads();
747  int rank = omp_get_thread_num();
748 
749  for (size_t i = 0; i < n; i++) {
750 
751  long list_no = idx [i];
752  if (list_no >= 0 && list_no % nt == rank) {
753  long id = xids ? xids[i] : ntotal + i;
754 
755  assert (list_no < nlist);
756 
757  ids[list_no].push_back (id);
758  nadd++;
759  quantizer->compute_residual (
760  x + i * d, residual.data(), list_no);
761 
762  size_t cur_size = codes[list_no].size();
763  codes[list_no].resize (cur_size + code_size);
764 
765  squant->encode_vector (residual.data(),
766  codes[list_no].data() + cur_size);
767  }
768  }
769  }
770  ntotal += nadd;
771 }
772 
773 
774 void search_with_probes_ip (const IndexIVFScalarQuantizer & index,
775  const float *x,
776  const idx_t *cent_ids, const float *cent_dis,
777  const Quantizer & quant,
778  int k, float *simi, idx_t *idxi)
779 {
780  int nprobe = index.nprobe;
781  size_t code_size = index.code_size;
782  size_t d = index.d;
783  std::vector<float> decoded(d);
784  minheap_heapify (k, simi, idxi);
785  for (int i = 0; i < nprobe; i++) {
786  idx_t list_no = cent_ids[i];
787  if (list_no < 0) break;
788  float accu0 = cent_dis[i];
789 
790  const std::vector<idx_t> & ids = index.ids[list_no];
791  const uint8_t* codes = index.codes[list_no].data();
792 
793  SimilarityIP sim(x, accu0);
794 
795  for (size_t j = 0; j < ids.size(); j++) {
796 
797  float accu = quant.compute_distance_IP(sim, codes);
798 
799  if (accu > simi [0]) {
800  minheap_pop (k, simi, idxi);
801  minheap_push (k, simi, idxi, accu, ids[j]);
802  }
803  codes += code_size;
804  }
805 
806  }
807  minheap_reorder (k, simi, idxi);
808 }
809 
810 void search_with_probes_L2 (const IndexIVFScalarQuantizer & index,
811  const float *x_in,
812  const idx_t *cent_ids,
813  const Index *quantizer,
814  const Quantizer & quant,
815  int k, float *simi, idx_t *idxi)
816 {
817  int nprobe = index.nprobe;
818  size_t code_size = index.code_size;
819  size_t d = index.d;
820  std::vector<float> decoded(d), x(d);
821  maxheap_heapify (k, simi, idxi);
822  for (int i = 0; i < nprobe; i++) {
823  idx_t list_no = cent_ids[i];
824  if (list_no < 0) break;
825 
826  const std::vector<idx_t> & ids = index.ids[list_no];
827  const uint8_t* codes = index.codes[list_no].data();
828 
829  // shift of x_in wrt centroid
830  quantizer->compute_residual (x_in, x.data(), list_no);
831 
832  SimilarityL2 sim(x.data());
833 
834  for (size_t j = 0; j < ids.size(); j++) {
835 
836  float dis = quant.compute_distance_L2 (sim, codes);
837 
838  if (dis < simi [0]) {
839  maxheap_pop (k, simi, idxi);
840  maxheap_push (k, simi, idxi, dis, ids[j]);
841  }
842  codes += code_size;
843  }
844  }
845  maxheap_reorder (k, simi, idxi);
846 }
847 
848 
849 void IndexIVFScalarQuantizer::search (idx_t n, const float *x, idx_t k,
850  float *distances, idx_t *labels) const
851 {
852  FAISS_THROW_IF_NOT (is_trained);
853  idx_t *idx = new idx_t [n * nprobe];
854  ScopeDeleter<idx_t> del (idx);
855  float *dis = new float [n * nprobe];
856  ScopeDeleter<float> del2 (dis);
857 
858  quantizer->search (n, x, nprobe, dis, idx);
859 
860  Quantizer *squant = select_quantizer (sq);
861  ScopeDeleter1<Quantizer> del3(squant);
862 
863  if (metric_type == METRIC_INNER_PRODUCT) {
864 #pragma omp parallel for
865  for (size_t i = 0; i < n; i++) {
866  search_with_probes_ip (*this, x + i * d,
867  idx + i * nprobe, dis + i * nprobe, *squant,
868  k, distances + i * k, labels + i * k);
869  }
870  } else {
871 #pragma omp parallel for
872  for (size_t i = 0; i < n; i++) {
873  search_with_probes_L2 (*this, x + i * d,
874  idx + i * nprobe, quantizer, *squant,
875  k, distances + i * k, labels + i * k);
876  }
877  }
878 
879 }
880 
881 
883  IndexIVFScalarQuantizer &other =
884  dynamic_cast<IndexIVFScalarQuantizer &> (other_in);
885  for (int i = 0; i < nlist; i++) {
886  std::vector<uint8_t> & src = other.codes[i];
887  std::vector<uint8_t> & dest = codes[i];
888  dest.insert (dest.end(), src.begin (), src.end ());
889  src.clear ();
890  }
891 
892 }
893 
894 
895 }
size_t code_size
bytes per vector
void train_residual(idx_t n, const float *x) override
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:47
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:23
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void merge_from_residuals(IndexIVF &other) override
alternate optimization of reconstruction error
std::vector< std::vector< long > > ids
Inverted lists for indexes.
Definition: IndexIVF.h:55
void add_with_ids(idx_t n, const float *x, const long *xids) override
int d
vector dimension
Definition: Index.h:64
long idx_t
all indices are this type
Definition: Index.h:62
same, shared range for all dimensions
void decode(const uint8_t *code, float *x, size_t n) const
decode a vector from a given code (or n vectors if third argument)
void compute_codes(const float *x, uint8_t *codes, size_t n) const
same as compute_code for several vectors
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:72
[min - rs*(max-min), max + rs*(max-min)]
size_t nlist
number of possible key values
Definition: IndexIVF.h:46
std::vector< std::vector< uint8_t > > codes
inverted list codes.
size_t code_size
code size per vector in bytes
Definition: IndexIVFPQ.h:32
[mean - std * rs, mean + std * rs]
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:69
void compute_residual(const float *x, float *residual, idx_t key) const
Definition: Index.cpp:58
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:43