11 #include "IndexScalarQuantizer.h"
19 #include <immintrin.h>
24 #include "FaissAssert.h"
53 using DistanceComputer = ScalarQuantizer::DistanceComputer;
64 static void encode_component (
float x, uint8_t *code,
int i) {
65 code[i] = (int)(255 * x);
68 static float decode_component (
const uint8_t *code,
int i) {
69 return (code[i] + 0.5f) / 255.0f;
73 static __m256 decode_8_components (
const uint8_t *code,
int i) {
74 uint64_t c8 = *(uint64_t*)(code + i);
75 __m128i c4lo = _mm_cvtepu8_epi32 (_mm_set1_epi32(c8));
76 __m128i c4hi = _mm_cvtepu8_epi32 (_mm_set1_epi32(c8 >> 32));
78 __m256i i8 = _mm256_castsi128_si256 (c4lo);
79 i8 = _mm256_insertf128_si256 (i8, c4hi, 1);
80 __m256 f8 = _mm256_cvtepi32_ps (i8);
81 __m256 half = _mm256_set1_ps (0.5f);
83 __m256 one_255 = _mm256_set1_ps (1.f / 255.f);
92 static void encode_component (
float x, uint8_t *code,
int i) {
93 code [i / 2] |= (int)(x * 15.0) << ((i & 1) << 2);
96 static float decode_component (
const uint8_t *code,
int i) {
97 return (((code[i / 2] >> ((i & 1) << 2)) & 0xf) + 0.5f) / 15.0f;
102 static __m256 decode_8_components (
const uint8_t *code,
int i) {
103 uint32_t c4 = *(uint32_t*)(code + (i >> 1));
104 uint32_t mask = 0x0f0f0f0f;
105 uint32_t c4ev = c4 & mask;
106 uint32_t c4od = (c4 >> 4) & mask;
109 __m128i c8 = _mm_unpacklo_epi8 (_mm_set1_epi32(c4ev),
110 _mm_set1_epi32(c4od));
111 __m128i c4lo = _mm_cvtepu8_epi32 (c8);
112 __m128i c4hi = _mm_cvtepu8_epi32 (_mm_srli_si128(c8, 4));
113 __m256i i8 = _mm256_castsi128_si256 (c4lo);
114 i8 = _mm256_insertf128_si256 (i8, c4hi, 1);
115 __m256 f8 = _mm256_cvtepi32_ps (i8);
116 __m256 half = _mm256_set1_ps (0.5f);
118 __m256 one_255 = _mm256_set1_ps (1.f / 15.f);
128 uint16_t encode_fp16 (
float x) {
129 __m128 xf = _mm_set1_ps (x);
130 __m128i xi = _mm_cvtps_ph (
131 xf, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC);
132 return _mm_cvtsi128_si32 (xi) & 0xffff;
136 float decode_fp16 (uint16_t x) {
137 __m128i xi = _mm_set1_epi16 (x);
138 __m128 xf = _mm_cvtph_ps (xi);
139 return _mm_cvtss_f32 (xf);
147 float floatbits (uint32_t x) {
149 return *(
float*)xptr;
152 uint32_t intbits (
float f) {
154 return *(uint32_t*)fptr;
158 uint16_t encode_fp16 (
float f) {
162 uint32_t sign_mask = 0x80000000u;
165 uint32_t fint = intbits(f);
166 uint32_t sign = fint & sign_mask;
178 uint32_t f32infty = 255u << 23;
179 o = (fint > f32infty) ? 0x7e00u : 0x7c00u;
185 const uint32_t round_mask = ~0xfffu;
186 const uint32_t magic = 15u << 23;
206 float fscale = floatbits(fint & round_mask) * floatbits(magic);
207 fscale = std::min(fscale, floatbits((31u << 23) - 0x1000u));
208 int32_t fint2 = intbits(fscale) - round_mask;
213 return (o | (sign >> 16));
216 float decode_fp16 (uint16_t h) {
221 const uint32_t shifted_exp = 0x7c00u << 13;
223 int32_t o = ((int32_t)(h & 0x7fffu)) << 13;
224 int32_t exp = shifted_exp & o;
225 o += (int32_t)(127 - 15) << 23;
227 int32_t infnan_val = o + ((int32_t)(128 - 16) << 23);
228 int32_t zerodenorm_val = intbits(
229 floatbits(o + (1u<<23)) - floatbits(113u << 23));
230 int32_t reg_val = (exp == 0) ? zerodenorm_val : o;
232 int32_t sign_bit = ((int32_t)(h & 0x8000u)) << 16;
233 return floatbits(((exp == shifted_exp) ? infnan_val : reg_val) | sign_bit);
249 virtual void encode_vector(
const float *x, uint8_t *code)
const = 0;
250 virtual void decode_vector(
const uint8_t *code,
float *x)
const = 0;
252 virtual ~Quantizer() {}
256 template<
class Codec,
bool uniform,
int SIMD>
257 struct QuantizerTemplate {};
260 template<
class Codec>
261 struct QuantizerTemplate<Codec, true, 1>: Quantizer {
263 const float vmin, vdiff;
265 QuantizerTemplate(
size_t d,
const std::vector<float> &trained):
266 d(d), vmin(trained[0]), vdiff(trained[1])
270 void encode_vector(
const float* x, uint8_t* code)
const override {
271 for (
size_t i = 0; i < d; i++) {
272 float xi = (x[i] - vmin) / vdiff;
277 Codec::encode_component(xi, code, i);
281 void decode_vector(
const uint8_t* code,
float* x)
const override {
282 for (
size_t i = 0; i < d; i++) {
283 float xi = Codec::decode_component(code, i);
284 x[i] = vmin + xi * vdiff;
288 float reconstruct_component (
const uint8_t * code,
int i)
const
290 float xi = Codec::decode_component (code, i);
291 return vmin + xi * vdiff;
300 template<
class Codec>
301 struct QuantizerTemplate<Codec, true, 8>: QuantizerTemplate<Codec, true, 1> {
303 QuantizerTemplate (
size_t d,
const std::vector<float> &trained):
304 QuantizerTemplate<Codec, true, 1> (d, trained) {}
306 __m256 reconstruct_8_components (
const uint8_t * code,
int i)
const
308 __m256 xi = Codec::decode_8_components (code, i);
309 return _mm256_set1_ps(this->vmin) + xi * _mm256_set1_ps (this->vdiff);
318 template<
class Codec>
319 struct QuantizerTemplate<Codec, false, 1>: Quantizer {
321 const float *vmin, *vdiff;
323 QuantizerTemplate (
size_t d,
const std::vector<float> &trained):
324 d(d), vmin(trained.data()), vdiff(trained.data() + d) {}
326 void encode_vector(
const float* x, uint8_t* code)
const override {
327 for (
size_t i = 0; i < d; i++) {
328 float xi = (x[i] - vmin[i]) / vdiff[i];
333 Codec::encode_component(xi, code, i);
337 void decode_vector(
const uint8_t* code,
float* x)
const override {
338 for (
size_t i = 0; i < d; i++) {
339 float xi = Codec::decode_component(code, i);
340 x[i] = vmin[i] + xi * vdiff[i];
344 float reconstruct_component (
const uint8_t * code,
int i)
const
346 float xi = Codec::decode_component (code, i);
347 return vmin[i] + xi * vdiff[i];
355 template<
class Codec>
356 struct QuantizerTemplate<Codec, false, 8>: QuantizerTemplate<Codec, false, 1> {
358 QuantizerTemplate (
size_t d,
const std::vector<float> &trained):
359 QuantizerTemplate<Codec, false, 1> (d, trained) {}
361 __m256 reconstruct_8_components (
const uint8_t * code,
int i)
const
363 __m256 xi = Codec::decode_8_components (code, i);
364 return _mm256_loadu_ps (this->vmin + i) + xi * _mm256_loadu_ps (this->vdiff + i);
372 template<
int SIMDWIDTH>
373 struct QuantizerFP16 {};
377 struct QuantizerFP16<1>: Quantizer {
380 QuantizerFP16(
size_t d,
const std::vector<float> & ):
383 void encode_vector(
const float* x, uint8_t* code)
const override {
384 for (
size_t i = 0; i < d; i++) {
385 ((uint16_t*)code)[i] = encode_fp16(x[i]);
389 void decode_vector(
const uint8_t* code,
float* x)
const override {
390 for (
size_t i = 0; i < d; i++) {
391 x[i] = decode_fp16(((uint16_t*)code)[i]);
396 float reconstruct_component (
const uint8_t * code,
int i)
const
398 return decode_fp16(((uint16_t*)code)[i]);
406 struct QuantizerFP16<8>: QuantizerFP16<1> {
408 QuantizerFP16 (
size_t d,
const std::vector<float> &trained):
409 QuantizerFP16<1> (d, trained) {}
411 __m256 reconstruct_8_components (
const uint8_t * code,
int i)
const
413 __m128i codei = _mm_loadu_si128 ((
const __m128i*)(code + 2 * i));
414 return _mm256_cvtph_ps (codei);
422 template<
int SIMDWIDTH>
423 Quantizer *select_quantizer (
425 size_t d,
const std::vector<float> & trained)
429 return new QuantizerTemplate<Codec8bit, false, SIMDWIDTH>(d, trained);
431 return new QuantizerTemplate<Codec4bit, false, SIMDWIDTH>(d, trained);
433 return new QuantizerTemplate<Codec8bit, true, SIMDWIDTH>(d, trained);
434 case ScalarQuantizer::QT_4bit_uniform:
435 return new QuantizerTemplate<Codec4bit, true, SIMDWIDTH>(d, trained);
436 case ScalarQuantizer::QT_fp16:
437 return new QuantizerFP16<SIMDWIDTH> (d, trained);
439 FAISS_THROW_MSG (
"unknown qtype");
444 Quantizer *select_quantizer (
const ScalarQuantizer &sq)
448 return select_quantizer<8> (sq.qtype, sq.d, sq.trained);
452 return select_quantizer<1> (sq.qtype, sq.d, sq.trained);
463 static float sqr (
float x) {
468 void train_Uniform(RangeStat rs,
float rs_arg,
469 idx_t n,
int k,
const float *x,
470 std::vector<float> & trained)
473 float & vmin = trained[0];
474 float & vmax = trained[1];
477 vmin = HUGE_VAL; vmax = -HUGE_VAL;
478 for (
size_t i = 0; i < n; i++) {
479 if (x[i] < vmin) vmin = x[i];
480 if (x[i] > vmax) vmax = x[i];
482 float vexp = (vmax - vmin) * rs_arg;
486 double sum = 0, sum2 = 0;
487 for (
size_t i = 0; i < n; i++) {
491 float mean = sum / n;
492 float var = sum2 / n - mean * mean;
493 float std = var <= 0 ? 1.0 : sqrt(var);
495 vmin = mean - std * rs_arg ;
496 vmax = mean + std * rs_arg ;
498 std::vector<float> x_copy(n);
499 memcpy(x_copy.data(), x, n *
sizeof(*x));
501 std::sort(x_copy.begin(), x_copy.end());
502 int o = int(rs_arg * n);
504 if (o > n - o) o = n / 2;
506 vmax = x_copy[n - 1 - o];
512 vmin = HUGE_VAL, vmax = -HUGE_VAL;
513 for (
size_t i = 0; i < n; i++) {
514 if (x[i] < vmin) vmin = x[i];
515 if (x[i] > vmax) vmax = x[i];
519 a = (vmax - vmin) / (k - 1);
524 int iter_last_err = 0;
525 for (
int it = 0; it < niter; it++) {
526 float sn = 0, sn2 = 0, sxn = 0, err1 = 0;
528 for (idx_t i = 0; i < n; i++) {
530 float ni = floor ((xi - b) / a + 0.5);
532 if (ni >= k) ni = k - 1;
533 err1 += sqr (xi - (ni * a + b));
539 if (err1 == last_err) {
541 if (iter_last_err == 16)
break;
547 float det = sqr (sn) - sn2 * n;
549 b = (sn * sxn - sn2 * sx) / det;
550 a = (sn * sx - n * sxn) / det;
552 printf (
"it %d, err1=%g \r", it, err1);
556 if (verbose) printf(
"\n");
559 vmax = b + a * (k - 1);
562 FAISS_THROW_MSG (
"Invalid qtype");
567 void train_NonUniform(RangeStat rs,
float rs_arg,
568 idx_t n,
int d,
int k,
const float *x,
569 std::vector<float> & trained)
572 trained.resize (2 * d);
573 float * vmin = trained.data();
574 float * vmax = trained.data() + d;
576 memcpy (vmin, x,
sizeof(*x) * d);
577 memcpy (vmax, x,
sizeof(*x) * d);
578 for (
size_t i = 1; i < n; i++) {
579 const float *xi = x + i * d;
580 for (
size_t j = 0; j < d; j++) {
581 if (xi[j] < vmin[j]) vmin[j] = xi[j];
582 if (xi[j] > vmax[j]) vmax[j] = xi[j];
586 for (
size_t j = 0; j < d; j++) {
587 float vexp = (vmax[j] - vmin[j]) * rs_arg;
590 vdiff [j] = vmax[j] - vmin[j];
594 std::vector<float> xt(n * d);
595 for (
size_t i = 1; i < n; i++) {
596 const float *xi = x + i * d;
597 for (
size_t j = 0; j < d; j++) {
598 xt[j * n + i] = xi[j];
601 std::vector<float> trained_d(2);
602 #pragma omp parallel for
603 for (
size_t j = 0; j < d; j++) {
604 train_Uniform(rs, rs_arg,
605 n, k, xt.data() + j * n,
607 vmin[j] = trained_d[0];
608 vmax[j] = trained_d[1];
621 template<
int SIMDWIDTH>
622 struct SimilarityL2 {};
626 struct SimilarityL2<1> {
629 explicit SimilarityL2 (
const float * y): y(y) {}
640 void add_component (
float x) {
641 float tmp = *yi++ - x;
645 void add_component_2 (
float x1,
float x2) {
658 struct SimilarityL2<8> {
662 explicit SimilarityL2 (
const float * y): y(y) {}
666 accu8 = _mm256_setzero_ps();
670 void add_8_components (__m256 x) {
671 __m256 yiv = _mm256_loadu_ps (yi);
673 __m256 tmp = yiv - x;
677 void add_8_components_2 (__m256 x, __m256 y) {
683 __m256 sum = _mm256_hadd_ps(accu8, accu8);
684 __m256 sum2 = _mm256_hadd_ps(sum, sum);
687 _mm_cvtss_f32 (_mm256_castps256_ps128(sum2)) +
688 _mm_cvtss_f32 (_mm256_extractf128_ps(sum2, 1));
696 template<
int SIMDWIDTH>
697 struct SimilarityIP {};
701 struct SimilarityIP<1> {
706 explicit SimilarityIP (
const float * y):
714 void add_component (
float x) {
718 void add_component_2 (
float x1,
float x2) {
730 struct SimilarityIP<8> {
735 explicit SimilarityIP (
const float * y):
741 accu8 = _mm256_setzero_ps();
745 void add_8_components (__m256 x) {
746 __m256 yiv = _mm256_loadu_ps (yi);
751 void add_8_components_2 (__m256 x1, __m256 x2) {
756 __m256 sum = _mm256_hadd_ps(accu8, accu8);
757 __m256 sum2 = _mm256_hadd_ps(sum, sum);
760 _mm_cvtss_f32 (_mm256_castps256_ps128(sum2)) +
761 _mm_cvtss_f32 (_mm256_extractf128_ps(sum2, 1));
772 template<
class Quantizer,
class Similarity,
int SIMDWIDTH>
773 struct DCTemplate : ScalarQuantizer::DistanceComputer {};
776 template<
class Quantizer,
class Similarity>
777 struct DCTemplate<Quantizer, Similarity, 1> : DistanceComputer
782 DCTemplate(
size_t d,
const std::vector<float> &trained):
786 float compute_distance (
const float *x,
787 const uint8_t *code)
const override final
791 for (
size_t i = 0; i < quant.d; i ++) {
792 float xi = quant.reconstruct_component (code, i);
793 sim.add_component (xi);
798 float compute_code_distance (
const uint8_t *code1,
799 const uint8_t *code2)
const override final
801 Similarity sim(
nullptr);
803 for (
size_t i = 0; i < quant.d; i ++) {
804 float x1 = quant.reconstruct_component (code1, i);
805 float x2 = quant.reconstruct_component (code2, i);
806 sim.add_component_2 (x1, x2);
808 return sim.result ();
815 template<
class Quantizer,
class Similarity>
816 struct DCTemplate<Quantizer, Similarity, 8> : DistanceComputer
821 DCTemplate(
size_t d,
const std::vector<float> &trained):
825 float compute_distance (
const float *x,
826 const uint8_t *code)
const override final
830 for (
size_t i = 0; i < quant.d; i += 8) {
831 __m256 xi = quant.reconstruct_8_components (code, i);
832 sim.add_8_components (xi);
834 return sim.result_8();
837 float compute_code_distance (
const uint8_t *code1,
838 const uint8_t *code2)
const override final
840 Similarity sim(
nullptr);
842 for (
size_t i = 0; i < quant.d; i += 8) {
843 __m256 x1 = quant.reconstruct_8_components (code1, i);
844 __m256 x2 = quant.reconstruct_8_components (code2, i);
845 sim.add_8_components_2 (x1, x2);
847 return sim.result_8 ();
857 template<
class Sim,
int SIMDWIDTH>
858 DistanceComputer *select_distance_computer (
860 size_t d,
const std::vector<float> & trained)
864 return new DCTemplate<QuantizerTemplate<Codec8bit, true, SIMDWIDTH>,
865 Sim, SIMDWIDTH>(d, trained);
867 case ScalarQuantizer::QT_4bit_uniform:
868 return new DCTemplate<QuantizerTemplate<Codec4bit, true, SIMDWIDTH>,
869 Sim, SIMDWIDTH>(d, trained);
872 return new DCTemplate<QuantizerTemplate<Codec8bit, false, SIMDWIDTH>,
873 Sim, SIMDWIDTH>(d, trained);
876 return new DCTemplate<QuantizerTemplate<Codec4bit, false, SIMDWIDTH>,
877 Sim, SIMDWIDTH>(d, trained);
879 case ScalarQuantizer::QT_fp16:
880 return new DCTemplate<QuantizerFP16<SIMDWIDTH>,
881 Sim, SIMDWIDTH>(d, trained);
883 FAISS_THROW_MSG (
"unknown qtype");
897 ScalarQuantizer::ScalarQuantizer
898 (
size_t d, QuantizerType qtype):
899 qtype (qtype), rangestat(RS_minmax), rangestat_arg(0), d (d)
902 case QT_8bit:
case QT_8bit_uniform:
905 case QT_4bit:
case QT_4bit_uniform:
906 code_size = (d + 1) / 2;
915 ScalarQuantizer::ScalarQuantizer ():
917 rangestat(RS_minmax), rangestat_arg(0), d (0), code_size(0)
920 void ScalarQuantizer::train (
size_t n,
const float *x)
923 qtype == QT_4bit_uniform ? 4 :
930 train_Uniform (rangestat, rangestat_arg,
931 n * d, 1 << bit_per_dim, x, trained);
934 train_NonUniform (rangestat, rangestat_arg,
935 n, d, 1 << bit_per_dim, x, trained);
947 Quantizer *squant = select_quantizer (*
this);
950 #pragma omp parallel for
951 for (
size_t i = 0; i < n; i++)
952 squant->encode_vector (x + i * d, codes + i *
code_size);
957 Quantizer *squant = select_quantizer (*
this);
959 #pragma omp parallel for
960 for (
size_t i = 0; i < n; i++)
961 squant->decode_vector (codes + i *
code_size, x + i * d);
971 if (metric == METRIC_L2) {
972 return select_distance_computer<SimilarityL2<8>, 8>
975 return select_distance_computer<SimilarityIP<8>, 8>
981 if (metric == METRIC_L2) {
982 return select_distance_computer<SimilarityL2<1>, 1>
985 return select_distance_computer<SimilarityIP<1>, 1>
996 IndexScalarQuantizer::IndexScalarQuantizer
1003 code_size = sq.code_size;
1007 IndexScalarQuantizer::IndexScalarQuantizer ():
1021 sq.compute_codes (x, &
codes[
ntotal * code_size], n);
1030 void search_flat_scalar_quantizer(
1038 size_t code_size = index.code_size;
1041 #pragma omp parallel
1048 for (
size_t i = 0; i < n; i++) {
1049 idx_t *idxi = labels + i * k;
1050 float *simi = distances + i * k;
1051 heap_heapify<C> (k, simi, idxi);
1053 const float *xi = x + i * d;
1054 const uint8_t *ci = index.
codes.data ();
1056 for (
size_t j = 0; j < index.
ntotal; j++) {
1057 float accu = dc->compute_distance(xi, ci);
1058 if (C::cmp (simi [0], accu)) {
1059 heap_pop<C> (k, simi, idxi);
1060 heap_push<C> (k, simi, idxi, accu, j);
1064 heap_reorder<C> (k, simi, idxi);
1077 idx_t* labels)
const
1081 search_flat_scalar_quantizer<CMax<float, idx_t> > (*
this, n, x, k, distances, labels);
1083 search_flat_scalar_quantizer<CMin<float, idx_t> > (*
this, n, x, k, distances, labels);
1094 idx_t i0, idx_t ni,
float* recons)
const
1096 Quantizer *squant = select_quantizer (sq);
1098 for (
size_t i = 0; i < ni; i++) {
1099 squant->decode_vector(&
codes[(i + i0) * code_size], recons + i * d);
1113 IndexIVFScalarQuantizer::IndexIVFScalarQuantizer
1114 (
Index *quantizer,
size_t d,
size_t nlist,
1116 IndexIVF (quantizer, d, nlist, 0, metric),
1121 invlists->code_size = code_size;
1125 IndexIVFScalarQuantizer::IndexIVFScalarQuantizer ():
1131 const float * x_in = x;
1135 d, (
size_t*)&n, 100000,
1140 long * idx =
new long [n];
1143 float *residuals =
new float [n *
d];
1146 #pragma omp parallel for
1147 for (idx_t i = 0; i < n; i++) {
1151 sq.train (n, residuals);
1156 const idx_t *list_nos,
1157 uint8_t * codes)
const
1159 Quantizer *squant = select_quantizer (sq);
1161 memset(codes, 0, code_size * n);
1163 #pragma omp parallel
1165 std::vector<float> residual (d);
1169 for (
size_t i = 0; i < n; i++) {
1170 long list_no = list_nos [i];
1174 x + i * d, residual.data(), list_no);
1176 squant->encode_vector (residual.data(),
1179 memset (codes + i * code_size, 0, code_size);
1188 (idx_t n,
const float * x,
const long *xids)
1190 FAISS_THROW_IF_NOT (is_trained);
1191 long * idx =
new long [n];
1193 quantizer->
assign (n, x, idx);
1195 Quantizer *squant = select_quantizer (sq);
1198 #pragma omp parallel reduction(+: nadd)
1200 std::vector<float> residual (d);
1201 std::vector<uint8_t> one_code (code_size);
1202 int nt = omp_get_num_threads();
1203 int rank = omp_get_thread_num();
1206 for (
size_t i = 0; i < n; i++) {
1207 long list_no = idx [i];
1208 if (list_no >= 0 && list_no % nt == rank) {
1209 long id = xids ? xids[i] : ntotal + i;
1212 x + i * d, residual.data(), list_no);
1214 memset (one_code.data(), 0, code_size);
1215 squant->encode_vector (residual.data(), one_code.data());
1217 invlists->add_entry (list_no,
id, one_code.data());
1230 template<
bool store_pairs,
class Quantizer,
int SIMDWIDTH>
1233 DCTemplate<Quantizer, SimilarityIP<SIMDWIDTH>, SIMDWIDTH> dc;
1237 IVFSQScannerIP(
int d,
const std::vector<float> & trained,
1238 size_t code_size): dc(d, trained), code_size(code_size)
1243 void set_query (
const float *query)
override {
1250 void set_list (idx_t list_no,
float coarse_dis)
override {
1251 this->list_no = list_no;
1255 float distance_to_code (
const uint8_t *code)
const override {
1256 return accu0 + dc.compute_distance(x, code);
1259 size_t scan_codes (
size_t list_size,
1260 const uint8_t *codes,
1262 float *simi, idx_t *idxi,
1263 size_t k)
const override
1267 for (
size_t j = 0; j < list_size; j++) {
1269 float accu = accu0 + dc.compute_distance(x, codes);
1271 if (accu > simi [0]) {
1272 minheap_pop (k, simi, idxi);
1273 long id = store_pairs ? (list_no << 32 | j) : ids[j];
1274 minheap_push (k, simi, idxi, accu,
id);
1285 template<
bool store_pairs,
class Quantizer,
int SIMDWIDTH>
1286 struct IVFSQScannerL2: InvertedListScanner {
1288 DCTemplate<Quantizer, SimilarityL2<SIMDWIDTH>, SIMDWIDTH> dc;
1291 const Index *quantizer;
1293 std::vector<float> tmp;
1295 IVFSQScannerL2(
int d,
const std::vector<float> & trained,
1297 const Index *quantizer):
1298 dc(d, trained), code_size(code_size), quantizer(quantizer),
1304 void set_query (
const float *query)
override {
1310 void set_list (idx_t list_no,
float coarse_dis)
override {
1311 this->list_no = list_no;
1313 quantizer->compute_residual (x, tmp.data(), list_no);
1316 float distance_to_code (
const uint8_t *code)
const override {
1317 return dc.compute_distance (tmp.data(), code);
1320 size_t scan_codes (
size_t list_size,
1321 const uint8_t *codes,
1323 float *simi, idx_t *idxi,
1324 size_t k)
const override
1327 for (
size_t j = 0; j < list_size; j++) {
1329 float dis = dc.compute_distance (tmp.data(), codes);
1331 if (dis < simi [0]) {
1332 maxheap_pop (k, simi, idxi);
1333 long id = store_pairs ? (list_no << 32 | j) : ids[j];
1334 maxheap_push (k, simi, idxi, dis,
id);
1344 template<
class Quantizer,
int SIMDWIDTH>
1345 InvertedListScanner* sel2_InvertedListScanner
1346 (
const IndexIVFScalarQuantizer *ivf,
bool store_pairs)
1348 if (ivf->metric_type == METRIC_L2) {
1350 return new IVFSQScannerL2<true, Quantizer, SIMDWIDTH>
1351 (ivf->d, ivf->sq.trained, ivf->code_size, ivf->quantizer);
1353 return new IVFSQScannerL2<false, Quantizer, SIMDWIDTH>
1354 (ivf->d, ivf->sq.trained, ivf->code_size, ivf->quantizer);
1358 return new IVFSQScannerIP<true, Quantizer, SIMDWIDTH>
1359 (ivf->d, ivf->sq.trained, ivf->code_size);
1361 return new IVFSQScannerIP<false, Quantizer, SIMDWIDTH>
1362 (ivf->d, ivf->sq.trained, ivf->code_size);
1368 template<
int SIMDWIDTH>
1369 InvertedListScanner* select_InvertedListScanner
1370 (
const IndexIVFScalarQuantizer *ivf,
bool store_pairs)
1372 switch(ivf->sq.qtype) {
1374 return sel2_InvertedListScanner
1375 <QuantizerTemplate<Codec8bit, true, SIMDWIDTH>,
1376 SIMDWIDTH>(ivf, store_pairs);
1377 case ScalarQuantizer::QT_4bit_uniform:
1378 return sel2_InvertedListScanner
1379 <QuantizerTemplate<Codec4bit, true, SIMDWIDTH>,
1380 SIMDWIDTH>(ivf, store_pairs);
1382 return sel2_InvertedListScanner
1383 <QuantizerTemplate<Codec8bit, false, SIMDWIDTH>,
1384 SIMDWIDTH>(ivf, store_pairs);
1386 return sel2_InvertedListScanner
1387 <QuantizerTemplate<Codec4bit, false, SIMDWIDTH>,
1388 SIMDWIDTH>(ivf, store_pairs);
1389 case ScalarQuantizer::QT_fp16:
1390 return sel2_InvertedListScanner<QuantizerFP16<SIMDWIDTH>,
1391 SIMDWIDTH>(ivf, store_pairs);
1393 FAISS_THROW_MSG (
"unknown qtype");
1404 (
bool store_pairs)
const
1408 return select_InvertedListScanner<8> (
this, store_pairs);
1412 return select_InvertedListScanner<1> (
this, store_pairs);
1420 float* recons)
const
1422 std::vector<float> centroid(d);
1426 sq.decode (code, recons, 1);
1427 for (
int i = 0; i <
d; ++i) {
1428 recons[i] += centroid[i];
void encode_vectors(idx_t n, const float *x, const idx_t *list_nos, uint8_t *codes) const override
size_t code_size
bytes per vector
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void train_residual(idx_t n, const float *x) override
alternate optimization of reconstruction error
same, shared range for all dimensions
void reconstruct_from_offset(long list_no, long offset, float *recons) const override
void add(idx_t n, const float *x) override
const float * fvecs_maybe_subsample(size_t d, size_t *n, size_t nmax, const float *x, bool verbose, long seed)
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
void reset() override
removes all elements from the database.
void add_with_ids(idx_t n, const float *x, const long *xids) override
virtual const uint8_t * get_single_code(size_t list_no, size_t offset) const
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
ScalarQuantizer sq
Used to encode the vectors.
long idx_t
all indices are this type
idx_t ntotal
total nb of indexed vectors
[mean - std * rs, mean + std * rs]
void decode(const uint8_t *code, float *x, size_t n) const
decode a vector from a given code (or n vectors if third argument)
InvertedListScanner * get_InvertedListScanner(bool store_pairs) const override
get a scanner for this index (store_pairs means ignore labels)
void compute_codes(const float *x, uint8_t *codes, size_t n) const
same as compute_code for several vectors
MetricType metric_type
type of metric this index uses for search
InvertedLists * invlists
Acess to the actual data.
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
void reconstruct(idx_t key, float *recons) const override
[min - rs*(max-min), max + rs*(max-min)]
std::vector< float > trained
trained values (including the range)
Index * quantizer
quantizer that maps vectors to inverted lists
bool is_trained
set if the Index does not require training, or if training is done already
void compute_residual(const float *x, float *residual, idx_t key) const
virtual void reconstruct(idx_t key, float *recons) const
void train(idx_t n, const float *x) override
size_t d
dimension of input vectors
size_t code_size
code size per vector in bytes
MetricType
Some algorithms support both an inner product version and a L2 search version.