Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/IndexScalarQuantizer.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #include "IndexScalarQuantizer.h"
10 
11 #include <cstdio>
12 #include <algorithm>
13 
14 #include <malloc.h>
15 
16 #include <omp.h>
17 
18 #include <immintrin.h>
19 
20 #include "utils.h"
21 
22 #include "FaissAssert.h"
23 
24 namespace faiss {
25 
26 /*******************************************************************
27  * ScalarQuantizer implementation
28  *
29  * The main source of complexity is to support combinations of 4
30  * variants without incurring runtime tests or virtual function calls:
31  *
32  * - 4 / 8 bits per code component
33  * - uniform / non-uniform
34  * - IP / L2 distance search
35  * - scalar / AVX distance computation
36  *
37  * The appropriate Quantizer object is returned via select_quantizer
38  * that hides the template mess.
39  ********************************************************************/
40 
41 #ifdef __AVX__
42 #define USE_AVX
43 #endif
44 
45 
46 namespace {
47 
48 typedef Index::idx_t idx_t;
49 typedef ScalarQuantizer::QuantizerType QuantizerType;
50 typedef ScalarQuantizer::RangeStat RangeStat;
51 using DistanceComputer = ScalarQuantizer::DistanceComputer;
52 
53 
54 /*******************************************************************
55  * Codec: converts between values in [0, 1] and an index in a code
56  * array. The "i" parameter is the vector component index (not byte
57  * index).
58  */
59 
60 struct Codec8bit {
61 
62  static void encode_component (float x, uint8_t *code, int i) {
63  code[i] = (int)(255 * x);
64  }
65 
66  static float decode_component (const uint8_t *code, int i) {
67  return (code[i] + 0.5f) / 255.0f;
68  }
69 
70 #ifdef USE_AVX
71  static __m256 decode_8_components (const uint8_t *code, int i) {
72  uint64_t c8 = *(uint64_t*)(code + i);
73  __m128i c4lo = _mm_cvtepu8_epi32 (_mm_set1_epi32(c8));
74  __m128i c4hi = _mm_cvtepu8_epi32 (_mm_set1_epi32(c8 >> 32));
75  // __m256i i8 = _mm256_set_m128i(c4lo, c4hi);
76  __m256i i8 = _mm256_castsi128_si256 (c4lo);
77  i8 = _mm256_insertf128_si256 (i8, c4hi, 1);
78  __m256 f8 = _mm256_cvtepi32_ps (i8);
79  __m256 half = _mm256_set1_ps (0.5f);
80  f8 += half;
81  __m256 one_255 = _mm256_set1_ps (1.f / 255.f);
82  return f8 * one_255;
83  }
84 #endif
85 };
86 
87 
88 struct Codec4bit {
89 
90  static void encode_component (float x, uint8_t *code, int i) {
91  code [i / 2] |= (int)(x * 15.0) << ((i & 1) << 2);
92  }
93 
94  static float decode_component (const uint8_t *code, int i) {
95  return (((code[i / 2] >> ((i & 1) << 2)) & 0xf) + 0.5f) / 15.0f;
96  }
97 
98 
99 #ifdef USE_AVX
100  static __m256 decode_8_components (const uint8_t *code, int i) {
101  uint32_t c4 = *(uint32_t*)(code + (i >> 1));
102  uint32_t mask = 0x0f0f0f0f;
103  uint32_t c4ev = c4 & mask;
104  uint32_t c4od = (c4 >> 4) & mask;
105 
106  // the 8 lower bytes of c8 contain the values
107  __m128i c8 = _mm_unpacklo_epi8 (_mm_set1_epi32(c4ev),
108  _mm_set1_epi32(c4od));
109  __m128i c4lo = _mm_cvtepu8_epi32 (c8);
110  __m128i c4hi = _mm_cvtepu8_epi32 (_mm_srli_si128(c8, 4));
111  __m256i i8 = _mm256_castsi128_si256 (c4lo);
112  i8 = _mm256_insertf128_si256 (i8, c4hi, 1);
113  __m256 f8 = _mm256_cvtepi32_ps (i8);
114  __m256 half = _mm256_set1_ps (0.5f);
115  f8 += half;
116  __m256 one_255 = _mm256_set1_ps (1.f / 15.f);
117  return f8 * one_255;
118  }
119 #endif
120 };
121 
122 
123 
124 /*******************************************************************
125  * Quantizer: normalizes scalar vector components, then passes them
126  * through a codec
127  */
128 
129 
130 
131 struct Quantizer {
132  virtual void encode_vector(const float *x, uint8_t *code) const = 0;
133  virtual void decode_vector(const uint8_t *code, float *x) const = 0;
134 
135 
136  virtual ~Quantizer() {}
137 };
138 
139 
140 
141 
142 
143 template<class Codec>
144 struct QuantizerUniform: Quantizer {
145  const size_t d;
146  const float vmin, vdiff;
147 
148  QuantizerUniform(size_t d, const std::vector<float> &trained):
149  d(d), vmin(trained[0]), vdiff(trained[1])
150  {
151  }
152 
153  void encode_vector(const float* x, uint8_t* code) const override {
154  for (size_t i = 0; i < d; i++) {
155  float xi = (x[i] - vmin) / vdiff;
156  if (xi < 0)
157  xi = 0;
158  if (xi > 1.0)
159  xi = 1.0;
160  Codec::encode_component(xi, code, i);
161  }
162  }
163 
164  void decode_vector(const uint8_t* code, float* x) const override {
165  for (size_t i = 0; i < d; i++) {
166  float xi = Codec::decode_component(code, i);
167  x[i] = vmin + xi * vdiff;
168  }
169  }
170 
171  float reconstruct_component (const uint8_t * code, int i) const
172  {
173  float xi = Codec::decode_component (code, i);
174  return vmin + xi * vdiff;
175  }
176 
177 };
178 
179 
180 
181 #ifdef USE_AVX
182 
183 template<class Codec>
184 struct QuantizerUniform8: QuantizerUniform<Codec> {
185 
186  QuantizerUniform8 (size_t d, const std::vector<float> &trained):
187  QuantizerUniform<Codec> (d, trained) {}
188 
189  __m256 reconstruct_8_components (const uint8_t * code, int i) const
190  {
191  __m256 xi = Codec::decode_8_components (code, i);
192  return _mm256_set1_ps(this->vmin) + xi * _mm256_set1_ps (this->vdiff);
193  }
194 
195 };
196 
197 #endif
198 
199 
200 
201 template<class Codec>
202 struct QuantizerNonUniform: Quantizer {
203  const size_t d;
204  const float *vmin, *vdiff;
205 
206  QuantizerNonUniform(size_t d, const std::vector<float> &trained):
207  d(d), vmin(trained.data()), vdiff(trained.data() + d) {}
208 
209  void encode_vector(const float* x, uint8_t* code) const override {
210  for (size_t i = 0; i < d; i++) {
211  float xi = (x[i] - vmin[i]) / vdiff[i];
212  if (xi < 0)
213  xi = 0;
214  if (xi > 1.0)
215  xi = 1.0;
216  Codec::encode_component(xi, code, i);
217  }
218  }
219 
220  void decode_vector(const uint8_t* code, float* x) const override {
221  for (size_t i = 0; i < d; i++) {
222  float xi = Codec::decode_component(code, i);
223  x[i] = vmin[i] + xi * vdiff[i];
224  }
225  }
226 
227  float reconstruct_component (const uint8_t * code, int i) const
228  {
229  float xi = Codec::decode_component (code, i);
230  return vmin[i] + xi * vdiff[i];
231  }
232 
233 };
234 
235 
236 #ifdef USE_AVX
237 
238 template<class Codec>
239 struct QuantizerNonUniform8: QuantizerNonUniform<Codec> {
240 
241  QuantizerNonUniform8 (size_t d, const std::vector<float> &trained):
242  QuantizerNonUniform<Codec> (d, trained) {}
243 
244  __m256 reconstruct_8_components (const uint8_t * code, int i) const
245  {
246  __m256 xi = Codec::decode_8_components (code, i);
247  return _mm256_loadu_ps (this->vmin + i) + xi * _mm256_loadu_ps (this->vdiff + i);
248  }
249 
250 
251 };
252 
253 #endif
254 
255 Quantizer *select_quantizer (
256  QuantizerType qtype,
257  size_t d, const std::vector<float> & trained)
258 {
259 #ifdef USE_AVX
260  if (d % 8 == 0) {
261  switch(qtype) {
263  return new QuantizerNonUniform8<Codec8bit>(d, trained);
265  return new QuantizerNonUniform8<Codec4bit>(d, trained);
267  return new QuantizerUniform8<Codec8bit>(d, trained);
268  case ScalarQuantizer::QT_4bit_uniform:
269  return new QuantizerUniform8<Codec4bit>(d, trained);
270  }
271  } else
272 #endif
273  {
274  switch(qtype) {
276  return new QuantizerNonUniform<Codec8bit>(d, trained);
278  return new QuantizerNonUniform<Codec4bit>(d, trained);
280  return new QuantizerUniform<Codec8bit>(d, trained);
281  case ScalarQuantizer::QT_4bit_uniform:
282  return new QuantizerUniform<Codec4bit>(d, trained);
283  }
284  }
285  FAISS_THROW_MSG ("unknown qtype");
286  return nullptr;
287 }
288 
289 
290 Quantizer *select_quantizer (const ScalarQuantizer &sq)
291 {
292  return select_quantizer (sq.qtype, sq.d, sq.trained);
293 }
294 
295 
296 
297 
298 /*******************************************************************
299  * Quantizer range training
300  */
301 
302 static float sqr (float x) {
303  return x * x;
304 }
305 
306 
307 void train_Uniform(RangeStat rs, float rs_arg,
308  idx_t n, int k, const float *x,
309  std::vector<float> & trained)
310 {
311  trained.resize (2);
312  float & vmin = trained[0];
313  float & vmax = trained[1];
314 
315  if (rs == ScalarQuantizer::RS_minmax) {
316  vmin = HUGE_VAL; vmax = -HUGE_VAL;
317  for (size_t i = 0; i < n; i++) {
318  if (x[i] < vmin) vmin = x[i];
319  if (x[i] > vmax) vmax = x[i];
320  }
321  float vexp = (vmax - vmin) * rs_arg;
322  vmin -= vexp;
323  vmax += vexp;
324  } else if (rs == ScalarQuantizer::RS_meanstd) {
325  double sum = 0, sum2 = 0;
326  for (size_t i = 0; i < n; i++) {
327  sum += x[i];
328  sum2 += x[i] * x[i];
329  }
330  float mean = sum / n;
331  float var = sum2 / n - mean * mean;
332  float std = var <= 0 ? 1.0 : sqrt(var);
333 
334  vmin = mean - std * rs_arg ;
335  vmax = mean + std * rs_arg ;
336  } else if (rs == ScalarQuantizer::RS_quantiles) {
337  std::vector<float> x_copy(n);
338  memcpy(x_copy.data(), x, n * sizeof(*x));
339  // TODO just do a qucikselect
340  std::sort(x_copy.begin(), x_copy.end());
341  int o = int(rs_arg * n);
342  if (o < 0) o = 0;
343  if (o > n - o) o = n / 2;
344  vmin = x_copy[o];
345  vmax = x_copy[n - 1 - o];
346 
347  } else if (rs == ScalarQuantizer::RS_optim) {
348  float a, b;
349  float sx = 0;
350  {
351  vmin = HUGE_VAL, vmax = -HUGE_VAL;
352  for (size_t i = 0; i < n; i++) {
353  if (x[i] < vmin) vmin = x[i];
354  if (x[i] > vmax) vmax = x[i];
355  sx += x[i];
356  }
357  b = vmin;
358  a = (vmax - vmin) / (k - 1);
359  }
360  int verbose = false;
361  int niter = 2000;
362  float last_err = -1;
363  int iter_last_err = 0;
364  for (int it = 0; it < niter; it++) {
365  float sn = 0, sn2 = 0, sxn = 0, err1 = 0;
366 
367  for (idx_t i = 0; i < n; i++) {
368  float xi = x[i];
369  float ni = floor ((xi - b) / a + 0.5);
370  if (ni < 0) ni = 0;
371  if (ni >= k) ni = k - 1;
372  err1 += sqr (xi - (ni * a + b));
373  sn += ni;
374  sn2 += ni * ni;
375  sxn += ni * xi;
376  }
377 
378  if (err1 == last_err) {
379  iter_last_err ++;
380  if (iter_last_err == 16) break;
381  } else {
382  last_err = err1;
383  iter_last_err = 0;
384  }
385 
386  float det = sqr (sn) - sn2 * n;
387 
388  b = (sn * sxn - sn2 * sx) / det;
389  a = (sn * sx - n * sxn) / det;
390  if (verbose) {
391  printf ("it %d, err1=%g \r", it, err1);
392  fflush(stdout);
393  }
394  }
395  if (verbose) printf("\n");
396 
397  vmin = b;
398  vmax = b + a * (k - 1);
399 
400  } else {
401  FAISS_THROW_MSG ("Invalid qtype");
402  }
403  vmax -= vmin;
404 }
405 
406 void train_NonUniform(RangeStat rs, float rs_arg,
407  idx_t n, int d, int k, const float *x,
408  std::vector<float> & trained)
409 {
410  trained.resize (2 * d);
411  float * vmin = trained.data();
412  float * vmax = trained.data() + d;
413  if (rs == ScalarQuantizer::RS_minmax) {
414  memcpy (vmin, x, sizeof(*x) * d);
415  memcpy (vmax, x, sizeof(*x) * d);
416  for (size_t i = 1; i < n; i++) {
417  const float *xi = x + i * d;
418  for (size_t j = 0; j < d; j++) {
419  if (xi[j] < vmin[j]) vmin[j] = xi[j];
420  if (xi[j] > vmax[j]) vmax[j] = xi[j];
421  }
422  }
423  float *vdiff = vmax;
424  for (size_t j = 0; j < d; j++) {
425  float vexp = (vmax[j] - vmin[j]) * rs_arg;
426  vmin[j] -= vexp;
427  vmax[j] += vexp;
428  vdiff [j] = vmax[j] - vmin[j];
429  }
430  } else {
431  // transpose
432  std::vector<float> xt(n * d);
433  for (size_t i = 1; i < n; i++) {
434  const float *xi = x + i * d;
435  for (size_t j = 0; j < d; j++) {
436  xt[j * n + i] = xi[j];
437  }
438  }
439  std::vector<float> trained_d(2);
440 #pragma omp parallel for
441  for (size_t j = 0; j < d; j++) {
442  train_Uniform(rs, rs_arg,
443  n, k, xt.data() + j * n,
444  trained_d);
445  vmin[j] = trained_d[0];
446  vmax[j] = trained_d[1];
447  }
448  }
449 }
450 
451 
452 
453 /*******************************************************************
454  * Similarity: gets vector components and computes a similarity wrt. a
455  * query vector stored in the object. The data fields just encapsulate
456  * an accumulator.
457  */
458 
459 struct SimilarityL2 {
460  const float *y, *yi;
461 
462  explicit SimilarityL2 (const float * y): y(y) {}
463 
464  /******* scalar accumulator *******/
465 
466  float accu;
467 
468  void begin () {
469  accu = 0;
470  yi = y;
471  }
472 
473  void add_component (float x) {
474  float tmp = *yi++ - x;
475  accu += tmp * tmp;
476  }
477 
478  void add_component_2 (float x1, float x2) {
479  float tmp = x1 - x2;
480  accu += tmp * tmp;
481  }
482 
483  float result () {
484  return accu;
485  }
486 
487 #ifdef USE_AVX
488  __m256 accu8;
489 
490  void begin_8 () {
491  accu8 = _mm256_setzero_ps();
492  yi = y;
493  }
494 
495  void add_8_components (__m256 x) {
496  __m256 yiv = _mm256_loadu_ps (yi);
497  yi += 8;
498  __m256 tmp = yiv - x;
499  accu8 += tmp * tmp;
500  }
501 
502  void add_8_components_2 (__m256 x, __m256 y) {
503  __m256 tmp = y - x;
504  accu8 += tmp * tmp;
505  }
506 
507  float result_8 () {
508  __m256 sum = _mm256_hadd_ps(accu8, accu8);
509  __m256 sum2 = _mm256_hadd_ps(sum, sum);
510  // now add the 0th and 4th component
511  return
512  _mm_cvtss_f32 (_mm256_castps256_ps128(sum2)) +
513  _mm_cvtss_f32 (_mm256_extractf128_ps(sum2, 1));
514  }
515 #endif
516 
517 };
518 
519 
520 struct SimilarityIP {
521  const float *y, *yi;
522  /******* scalar accumulator *******/
523 
524  float accu;
525 
526  explicit SimilarityIP (const float * y):
527  y (y) {}
528 
529  void begin () {
530  accu = 0;
531  yi = y;
532  }
533 
534  void add_component (float x) {
535  accu += *yi++ * x;
536  }
537 
538  void add_component_2 (float x1, float x2) {
539  accu += x1 * x2;
540  }
541 
542  float result () {
543  return accu;
544  }
545 
546 #ifdef USE_AVX
547 
548  __m256 accu8;
549 
550  void begin_8 () {
551  accu8 = _mm256_setzero_ps();
552  yi = y;
553  }
554 
555  void add_8_components (__m256 x) {
556  __m256 yiv = _mm256_loadu_ps (yi);
557  yi += 8;
558  accu8 += yiv * x;
559  }
560 
561  void add_8_components_2 (__m256 x1, __m256 x2) {
562  accu8 += x1 * x2;
563  }
564 
565  float result_8 () {
566  __m256 sum = _mm256_hadd_ps(accu8, accu8);
567  __m256 sum2 = _mm256_hadd_ps(sum, sum);
568  // now add the 0th and 4th component
569  return
570  _mm_cvtss_f32 (_mm256_castps256_ps128(sum2)) +
571  _mm_cvtss_f32 (_mm256_extractf128_ps(sum2, 1));
572  }
573 #endif
574 };
575 
576 
577 /*******************************************************************
578  * DistanceComputer: combines a similarity and a quantizer to do
579  * code-to-vector or code-to-code comparisons
580  */
581 
582 
583 template<class Quantizer, class Similarity>
584 struct DCTemplate : ScalarQuantizer::DistanceComputer {
585 
586  Quantizer quant;
587 
588  DCTemplate(size_t d, const std::vector<float> &trained):
589  quant(d, trained)
590  {}
591 
592  float compute_distance (const float *x,
593  const uint8_t *code) override
594  {
595  Similarity sim(x);
596  sim.begin();
597  for (size_t i = 0; i < quant.d; i ++) {
598  float xi = quant.reconstruct_component (code, i);
599  sim.add_component (xi);
600  }
601  return sim.result();
602  }
603 
604  float compute_code_distance (const uint8_t *code1,
605  const uint8_t *code2) override
606  {
607  Similarity sim(nullptr);
608  sim.begin ();
609  for (size_t i = 0; i < quant.d; i ++) {
610  float x1 = quant.reconstruct_component (code1, i);
611  float x2 = quant.reconstruct_component (code2, i);
612  sim.add_component_2 (x1, x2);
613  }
614  return sim.result ();
615  }
616 
617 };
618 
619 #ifdef USE_AVX
620 
621 template<class Quantizer, class Similarity>
622 struct DCTemplate_8 : ScalarQuantizer::DistanceComputer {
623 
624  Quantizer quant;
625 
626  DCTemplate_8(size_t d, const std::vector<float> &trained):
627  quant(d, trained)
628  {}
629 
630  float compute_distance (const float *x,
631  const uint8_t *code) override
632  {
633  Similarity sim(x);
634  sim.begin_8();
635  for (size_t i = 0; i < quant.d; i += 8) {
636  __m256 xi = quant.reconstruct_8_components (code, i);
637  sim.add_8_components (xi);
638  }
639  return sim.result_8();
640  }
641 
642  float compute_code_distance (const uint8_t *code1,
643  const uint8_t *code2) override
644  {
645  Similarity sim(nullptr);
646  sim.begin_8 ();
647  for (size_t i = 0; i < quant.d; i += 8) {
648  __m256 x1 = quant.reconstruct_8_components (code1, i);
649  __m256 x2 = quant.reconstruct_8_components (code2, i);
650  sim.add_8_components_2 (x1, x2);
651  }
652  return sim.result_8 ();
653  }
654 
655 };
656 
657 
658 #endif
659 
660 
661 
662 template<class Sim>
663 DistanceComputer *select_distance_computer (
664  QuantizerType qtype,
665  size_t d, const std::vector<float> & trained)
666 {
667 #ifdef USE_AVX
668  if (d % 8 == 0) {
669  switch(qtype) {
671  return new DCTemplate_8<QuantizerNonUniform8
672  <Codec8bit>, Sim>(d, trained);
674  return new DCTemplate_8<QuantizerNonUniform8
675  <Codec4bit>, Sim>(d, trained);
677  return new DCTemplate_8<QuantizerUniform8
678  <Codec8bit>, Sim>(d, trained);
679  case ScalarQuantizer::QT_4bit_uniform:
680  return new DCTemplate_8<QuantizerUniform8
681  <Codec4bit>, Sim>(d, trained);
682  }
683  } else
684 #endif
685  {
686  switch(qtype) {
688  return new DCTemplate<QuantizerNonUniform
689  <Codec8bit>, Sim>(d, trained);
691  return new DCTemplate<QuantizerNonUniform
692  <Codec4bit>, Sim>(d, trained);
694  return new DCTemplate<QuantizerUniform
695  <Codec8bit>, Sim>(d, trained);
696  case ScalarQuantizer::QT_4bit_uniform:
697  return new DCTemplate<QuantizerUniform
698  <Codec4bit>, Sim>(d, trained);
699  }
700  }
701  FAISS_THROW_MSG ("unknown qtype");
702  return nullptr;
703 }
704 
705 
706 
707 
708 
709 } // anonymous namespace
710 
711 
712 
713 /*******************************************************************
714  * ScalarQuantizer implementation
715  ********************************************************************/
716 
717 ScalarQuantizer::ScalarQuantizer
718  (size_t d, QuantizerType qtype):
719  qtype (qtype), rangestat(RS_minmax), rangestat_arg(0), d (d)
720 {
721  switch (qtype) {
722  case QT_8bit: case QT_8bit_uniform:
723  code_size = d;
724  break;
725  case QT_4bit: case QT_4bit_uniform:
726  code_size = (d + 1) / 2;
727  break;
728  }
729 
730 }
731 
732 ScalarQuantizer::ScalarQuantizer ():
733  qtype(QT_8bit),
734  rangestat(RS_minmax), rangestat_arg(0), d (0), code_size(0)
735 {}
736 
737 void ScalarQuantizer::train (size_t n, const float *x)
738 {
739  int bit_per_dim =
740  qtype == QT_4bit_uniform ? 4 :
741  qtype == QT_4bit ? 4 :
742  qtype == QT_8bit_uniform ? 8 :
743  qtype == QT_8bit ? 8 : -1;
744 
745  switch (qtype) {
746  case QT_4bit_uniform: case QT_8bit_uniform:
747  train_Uniform (rangestat, rangestat_arg,
748  n * d, 1 << bit_per_dim, x, trained);
749  break;
750  case QT_4bit: case QT_8bit:
751  train_NonUniform (rangestat, rangestat_arg,
752  n, d, 1 << bit_per_dim, x, trained);
753  break;
754  }
755 }
756 
757 void ScalarQuantizer::compute_codes (const float * x,
758  uint8_t * codes,
759  size_t n) const
760 {
761  Quantizer *squant = select_quantizer (*this);
762  ScopeDeleter1<Quantizer> del(squant);
763 #pragma omp parallel for
764  for (size_t i = 0; i < n; i++)
765  squant->encode_vector (x + i * d, codes + i * code_size);
766 }
767 
768 void ScalarQuantizer::decode (const uint8_t *codes, float *x, size_t n) const
769 {
770  Quantizer *squant = select_quantizer (*this);
771  ScopeDeleter1<Quantizer> del(squant);
772 #pragma omp parallel for
773  for (size_t i = 0; i < n; i++)
774  squant->decode_vector (codes + i * code_size, x + i * d);
775 }
776 
777 
778 ScalarQuantizer::DistanceComputer *ScalarQuantizer::get_distance_computer (
779  MetricType metric)
780  const
781 {
782  if (metric == METRIC_L2) {
783  return select_distance_computer<SimilarityL2>(qtype, d, trained);
784  } else {
785  return select_distance_computer<SimilarityIP>(qtype, d, trained);
786  }
787 }
788 
789 
790 /*******************************************************************
791  * IndexScalarQuantizer implementation
792  ********************************************************************/
793 
794 IndexScalarQuantizer::IndexScalarQuantizer
796  MetricType metric):
797  Index(d, metric),
798  sq (d, qtype)
799 {
800  is_trained = false;
801  code_size = sq.code_size;
802 }
803 
804 
805 IndexScalarQuantizer::IndexScalarQuantizer ():
807 {}
808 
809 void IndexScalarQuantizer::train(idx_t n, const float* x)
810 {
811  sq.train(n, x);
812  is_trained = true;
813 }
814 
815 void IndexScalarQuantizer::add(idx_t n, const float* x)
816 {
817  FAISS_THROW_IF_NOT (is_trained);
818  codes.resize ((n + ntotal) * code_size);
819  sq.compute_codes (x, &codes[ntotal * code_size], n);
820  ntotal += n;
821 }
822 
823 
824 
825 namespace {
826 
827 template<class C>
828 void search_flat_scalar_quantizer(
829  const IndexScalarQuantizer & index,
830  idx_t n,
831  const float* x,
832  idx_t k,
833  float* distances,
834  idx_t* labels)
835 {
836  size_t code_size = index.code_size;
837  size_t d = index.d;
838 
839 #pragma omp parallel
840  {
841  DistanceComputer *dc =
842  index.sq.get_distance_computer(index.metric_type);
844 
845 #pragma omp for
846  for (size_t i = 0; i < n; i++) {
847  idx_t *idxi = labels + i * k;
848  float *simi = distances + i * k;
849  heap_heapify<C> (k, simi, idxi);
850 
851  const float *xi = x + i * d;
852  const uint8_t *ci = index.codes.data ();
853 
854  for (size_t j = 0; j < index.ntotal; j++) {
855  float accu = dc->compute_distance(xi, ci);
856  if (C::cmp (simi [0], accu)) {
857  heap_pop<C> (k, simi, idxi);
858  heap_push<C> (k, simi, idxi, accu, j);
859  }
860  ci += code_size;
861  }
862  heap_reorder<C> (k, simi, idxi);
863  }
864  }
865 
866 };
867 
868 }
869 
871  idx_t n,
872  const float* x,
873  idx_t k,
874  float* distances,
875  idx_t* labels) const
876 {
877  FAISS_THROW_IF_NOT (is_trained);
878  if (metric_type == METRIC_L2) {
879  search_flat_scalar_quantizer<CMax<float, idx_t> > (*this, n, x, k, distances, labels);
880  } else {
881  search_flat_scalar_quantizer<CMin<float, idx_t> > (*this, n, x, k, distances, labels);
882  }
883 }
884 
886 {
887  codes.clear();
888  ntotal = 0;
889 }
890 
892  idx_t i0, idx_t ni, float* recons) const
893 {
894  Quantizer *squant = select_quantizer (sq);
895  ScopeDeleter1<Quantizer> del (squant);
896  for (size_t i = 0; i < ni; i++) {
897  squant->decode_vector(&codes[(i + i0) * code_size], recons + i * d);
898  }
899 }
900 
901 void IndexScalarQuantizer::reconstruct(idx_t key, float* recons) const
902 {
903  reconstruct_n(key, 1, recons);
904 }
905 
906 
907 /*******************************************************************
908  * IndexIVFScalarQuantizer implementation
909  ********************************************************************/
910 
911 IndexIVFScalarQuantizer::IndexIVFScalarQuantizer
912  (Index *quantizer, size_t d, size_t nlist,
913  QuantizerType qtype, MetricType metric):
914  IndexIVF (quantizer, d, nlist, 0, metric),
915  sq (d, qtype)
916 {
917  code_size = sq.code_size;
918  // was not known at construction time
919  invlists->code_size = code_size;
920  is_trained = false;
921 }
922 
923 IndexIVFScalarQuantizer::IndexIVFScalarQuantizer ():
924  IndexIVF ()
925 {}
926 
927 void IndexIVFScalarQuantizer::train_residual (idx_t n, const float *x)
928 {
929  long * idx = new long [n];
930  ScopeDeleter<long> del (idx);
931  quantizer->assign (n, x, idx);
932  float *residuals = new float [n * d];
933  ScopeDeleter<float> del2 (residuals);
934 
935 #pragma omp parallel for
936  for (idx_t i = 0; i < n; i++) {
937  quantizer->compute_residual (x + i * d, residuals + i * d, idx[i]);
938  }
939 
940  sq.train (n, residuals);
941 
942 }
943 
944 
946  (idx_t n, const float * x, const long *xids)
947 {
948  FAISS_THROW_IF_NOT (is_trained);
949  long * idx = new long [n];
950  ScopeDeleter<long> del (idx);
951  quantizer->assign (n, x, idx);
952  size_t nadd = 0;
953  Quantizer *squant = select_quantizer (sq);
954  ScopeDeleter1<Quantizer> del2 (squant);
955 
956 #pragma omp parallel reduction(+: nadd)
957  {
958  std::vector<float> residual (d);
959  std::vector<uint8_t> one_code (code_size);
960  int nt = omp_get_num_threads();
961  int rank = omp_get_thread_num();
962 
963  // each thread takes care of a subset of lists
964  for (size_t i = 0; i < n; i++) {
965  long list_no = idx [i];
966  if (list_no >= 0 && list_no % nt == rank) {
967  long id = xids ? xids[i] : ntotal + i;
968 
969  quantizer->compute_residual (
970  x + i * d, residual.data(), list_no);
971 
972  squant->encode_vector (residual.data(), one_code.data());
973 
974  invlists->add_entry (list_no, id, one_code.data());
975 
976  nadd++;
977 
978  }
979  }
980  }
981  ntotal += nadd;
982 }
983 
984 namespace {
985 
986 
987 void search_with_probes_ip (const IndexIVFScalarQuantizer & index,
988  const float *x,
989  const idx_t *cent_ids, const float *cent_dis,
990  DistanceComputer & dc,
991  int k, float *simi, idx_t *idxi,
992  bool store_pairs)
993 {
994  int nprobe = index.nprobe;
995  size_t code_size = index.code_size;
996  size_t d = index.d;
997  std::vector<float> decoded(d);
998  minheap_heapify (k, simi, idxi);
999  size_t nscan = 0;
1000  for (int i = 0; i < nprobe; i++) {
1001  idx_t list_no = cent_ids[i];
1002  if (list_no < 0) break;
1003  float accu0 = cent_dis[i];
1004 
1005  const size_t list_size = index.invlists->list_size (list_no);
1006  const uint8_t * codes = index.invlists->get_codes (list_no);
1007  const idx_t * ids =
1008  store_pairs ? nullptr : index.invlists->get_ids (list_no);
1009 
1010  SimilarityIP sim(x);
1011 
1012  for (size_t j = 0; j < list_size; j++) {
1013 
1014  float accu = accu0 + dc.compute_distance(x, codes);
1015 
1016  if (accu > simi [0]) {
1017  minheap_pop (k, simi, idxi);
1018  long id = store_pairs ? (list_no << 32 | j) : ids[j];
1019  minheap_push (k, simi, idxi, accu, id);
1020  }
1021  codes += code_size;
1022  }
1023  nscan += list_size;
1024  if (index.max_codes && nscan > index.max_codes)
1025  break;
1026  }
1027  minheap_reorder (k, simi, idxi);
1028 }
1029 
1030 void search_with_probes_L2 (const IndexIVFScalarQuantizer & index,
1031  const float *x_in,
1032  const idx_t *cent_ids,
1033  const Index *quantizer,
1034  DistanceComputer & dc,
1035  int k, float *simi, idx_t *idxi,
1036  bool store_pairs)
1037 {
1038  int nprobe = index.nprobe;
1039  size_t code_size = index.code_size;
1040  size_t d = index.d;
1041  std::vector<float> x(d);
1042  maxheap_heapify (k, simi, idxi);
1043  size_t nscan = 0;
1044  for (int i = 0; i < nprobe; i++) {
1045  idx_t list_no = cent_ids[i];
1046  if (list_no < 0) break;
1047 
1048  const size_t list_size = index.invlists->list_size (list_no);
1049  const uint8_t * codes = index.invlists->get_codes (list_no);
1050  const idx_t * ids =
1051  store_pairs ? nullptr : index.invlists->get_ids (list_no);
1052 
1053  // shift of x_in wrt centroid
1054  quantizer->compute_residual (x_in, x.data(), list_no);
1055 
1056  for (size_t j = 0; j < list_size; j++) {
1057 
1058  float dis = dc.compute_distance (x.data(), codes);
1059 
1060  if (dis < simi [0]) {
1061  maxheap_pop (k, simi, idxi);
1062  long id = store_pairs ? (list_no << 32 | j) : ids[j];
1063  maxheap_push (k, simi, idxi, dis, id);
1064  }
1065  codes += code_size;
1066  }
1067  nscan += list_size;
1068  if (index.max_codes && nscan > index.max_codes)
1069  break;
1070  }
1071  maxheap_reorder (k, simi, idxi);
1072 }
1073 
1074 } // anonymous namespace
1075 
1077  idx_t n, const float *x, idx_t k,
1078  const idx_t *idx,
1079  const float *dis,
1080  float *distances, idx_t *labels,
1081  bool store_pairs) const
1082 {
1083  FAISS_THROW_IF_NOT (is_trained);
1084 
1085 
1086  if (metric_type == METRIC_INNER_PRODUCT) {
1087 #pragma omp parallel
1088  {
1089  DistanceComputer *dc = sq.get_distance_computer (metric_type);
1091 #pragma omp for
1092  for (size_t i = 0; i < n; i++) {
1093  search_with_probes_ip (*this, x + i * d,
1094  idx + i * nprobe, dis + i * nprobe, *dc,
1095  k, distances + i * k, labels + i * k,
1096  store_pairs);
1097  }
1098  }
1099  } else {
1100 #pragma omp parallel
1101  {
1102  DistanceComputer *dc = sq.get_distance_computer (metric_type);
1104 #pragma omp for
1105  for (size_t i = 0; i < n; i++) {
1106  search_with_probes_L2 (*this, x + i * d,
1107  idx + i * nprobe, quantizer, *dc,
1108  k, distances + i * k, labels + i * k,
1109  store_pairs);
1110  }
1111  }
1112  }
1113 
1114 }
1115 
1117  long offset,
1118  float* recons) const
1119 {
1120  std::vector<float> centroid(d);
1121  quantizer->reconstruct (list_no, centroid.data());
1122 
1123  const uint8_t* code = invlists->get_single_code (list_no, offset);
1124  sq.decode (code, recons, 1);
1125  for (int i = 0; i < d; ++i) {
1126  recons[i] += centroid[i];
1127  }
1128 }
1129 
1130 } // namespace faiss
size_t code_size
bytes per vector
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void train_residual(idx_t n, const float *x) override
virtual const idx_t * get_ids(size_t list_no) const =0
alternate optimization of reconstruction error
same, shared range for all dimensions
void reconstruct_from_offset(long list_no, long offset, float *recons) const override
void add(idx_t n, const float *x) override
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:173
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:34
virtual size_t list_size(size_t list_no) const =0
get the size of a list
void reset() override
removes all elements from the database.
void add_with_ids(idx_t n, const float *x, const long *xids) override
int d
vector dimension
Definition: Index.h:64
virtual const uint8_t * get_single_code(size_t list_no, size_t offset) const
Definition: IndexIVF.cpp:129
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
ScalarQuantizer sq
Used to encode the vectors.
long idx_t
all indices are this type
Definition: Index.h:62
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:65
void search_preassigned(idx_t n, const float *x, idx_t k, const idx_t *assign, const float *centroid_dis, float *distances, idx_t *labels, bool store_pairs) const override
[mean - std * rs, mean + std * rs]
void decode(const uint8_t *code, float *x, size_t n) const
decode a vector from a given code (or n vectors if third argument)
void compute_codes(const float *x, uint8_t *codes, size_t n) const
same as compute_code for several vectors
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:72
InvertedLists * invlists
Acess to the actual data.
Definition: IndexIVF.h:168
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
void reconstruct(idx_t key, float *recons) const override
[min - rs*(max-min), max + rs*(max-min)]
std::vector< float > trained
trained values (including the range)
virtual const uint8_t * get_codes(size_t list_no) const =0
Index * quantizer
quantizer that maps vectors to inverted lists
Definition: IndexIVF.h:33
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:69
void compute_residual(const float *x, float *residual, idx_t key) const
Definition: Index.cpp:86
size_t max_codes
max nb of codes to visit to do a query
Definition: IndexIVF.h:174
void train(idx_t n, const float *x) override
size_t d
dimension of input vectors
size_t code_size
code size per vector in bytes
Definition: IndexIVF.h:171
MetricType
Some algorithms support both an inner product version and a L2 search version.
Definition: Index.h:43