20 #include "FaissAssert.h"
23 #include "IndexFlat.h"
24 #include "VectorTransform.h"
28 #include "IndexIVFPQ.h"
29 #include "IndexIVFFlat.h"
30 #include "MetaIndexes.h"
31 #include "IndexScalarQuantizer.h"
32 #include "IndexHNSW.h"
33 #include "IndexBinaryFlat.h"
34 #include "IndexBinaryHNSW.h"
35 #include "IndexBinaryIVF.h"
40 AutoTuneCriterion::AutoTuneCriterion (idx_t nq, idx_t nnn):
41 nq (nq), nnn (nnn), gt_nnn (0)
46 int gt_nnn,
const float *gt_D_in,
const idx_t *gt_I_in)
50 gt_D.resize (nq * gt_nnn);
51 memcpy (
gt_D.data(), gt_D_in,
sizeof (
gt_D[0]) * nq * gt_nnn);
53 gt_I.resize (nq * gt_nnn);
54 memcpy (
gt_I.data(), gt_I_in,
sizeof (
gt_I[0]) * nq * gt_nnn);
59 OneRecallAtRCriterion::OneRecallAtRCriterion (idx_t nq, idx_t R):
65 FAISS_THROW_IF_NOT_MSG(
67 "ground truth not initialized");
69 for (idx_t q = 0; q <
nq; q++) {
71 const idx_t* I_line = I + q *
nnn;
72 for (
int i = 0; i < R; i++) {
73 if (I_line[i] == gt_nn) {
79 return n_ok / double(nq);
83 IntersectionCriterion::IntersectionCriterion (idx_t nq, idx_t R):
89 FAISS_THROW_IF_NOT_MSG(
91 "ground truth not initialized");
93 #pragma omp parallel for reduction(+: n_ok)
94 for (idx_t q = 0; q <
nq; q++) {
96 R, &
gt_I [q * gt_nnn],
99 return n_ok / double (nq * R);
106 OperatingPoints::OperatingPoints ()
130 if (perf > a.back().perf) {
133 }
else if (perf == a.back().perf) {
134 if (t < a.back ().t) {
142 for (i = 0; i < a.size(); i++) {
143 if (a[i].perf >= perf)
break;
145 assert (i < a.size());
147 if (a[i].perf == perf) {
150 a.insert (a.begin() + i, op);
157 int i = a.size() - 1;
159 if (a[i].t < a[i - 1].t)
160 a.erase (a.begin() + (i - 1));
169 const std::string & prefix)
172 for (
int i = 0; i < other.
all_pts.size(); i++) {
185 const std::vector<OperatingPoint> & a =
optimal_pts;
186 if (perf > a.back().perf)
return 1e50;
187 int i0 = -1, i1 = a.size() - 1;
188 while (i0 + 1 < i1) {
189 int imed = (i0 + i1 + 1) / 2;
190 if (a[imed].perf < perf) i0 = imed;
199 FILE *f = fopen(fname,
"w");
201 fprintf (stderr,
"cannot open %s", fname);
205 for (
int i = 0; i <
all_pts.size(); i++) {
207 fprintf (f,
"%g %g %s\n", op.
perf, op.
t, op.
key.c_str());
212 void OperatingPoints::optimal_to_gnuplot (
const char *fname)
const
214 FILE *f = fopen(fname,
"w");
216 fprintf (stderr,
"cannot open %s", fname);
220 double prev_perf = 0.0;
223 fprintf (f,
"%g %g\n", prev_perf, op.t);
224 fprintf (f,
"%g %g %s\n", op.perf, op.t, op.key.c_str());
232 const std::vector<OperatingPoint> &pts =
234 printf(
"Tested %ld operating points, %ld ones are optimal:\n",
237 for (
int i = 0; i < pts.size(); i++) {
239 const char *star =
"";
248 printf (
"cno=%ld key=%s perf=%.4f t=%.3f %s\n",
258 ParameterSpace::ParameterSpace ():
259 verbose (1), n_experiments (500),
260 batchsize (1<<30), thread_over_batches (false),
261 min_test_duration (0)
270 ParameterSpace::ParameterSpace (Index *index):
271 verbose (1), n_experiments (500),
272 batchsize (1<<30), thread_over_batches (false)
289 char buf[1000], *wp = buf;
293 size_t j = cno % pr.values.size();
294 cno /= pr.values.size();
296 wp, buf + 1000 - wp,
"%s%s=%g", i == 0 ?
"" :
",",
297 pr.name.c_str(), pr.values[j]);
299 return std::string (buf);
307 size_t j1 = c1 % nval;
308 size_t j2 = c2 % nval;
309 if (!(j1 >= j2))
return false;
318 #define DC(classname) \
319 const classname *ix = dynamic_cast<const classname *>(index)
327 for (
int i = 2; i <= pq.
code_size * 8 / 2; i+= 2)
328 pr.values.push_back(i);
336 if (pr.name == name) {
341 parameter_ranges.back ().name = name;
342 return parameter_ranges.back ();
354 for (
int i = 0; i <= 6; i++) {
355 pr.values.push_back (1 << i);
357 index = ix->base_index;
366 for (
int i = 0; i < 13; i++) {
367 size_t nprobe = 1 << i;
368 if (nprobe >= ix->nlist)
break;
369 pr.values.push_back (nprobe);
372 if (dynamic_cast<const IndexHNSW*>(ix->quantizer)) {
374 for (
int i = 2; i <= 9; i++) {
375 pr.values.push_back (1 << i);
381 init_pq_ParameterRange (ix->pq, pr);
385 init_pq_ParameterRange (ix->pq, pr);
393 for (
int i = 8; i < 20; i++) {
394 pr_max_codes.values.push_back (1 << i);
396 pr_max_codes.values.push_back (
397 std::numeric_limits<double>::infinity()
403 for (
int i = 0; i <= 6; i++) {
404 pr.values.push_back (1 << i);
407 if (dynamic_cast<const IndexHNSW*>(index)) {
409 for (
int i = 2; i <= 9; i++) {
410 pr.values.push_back (1 << i);
418 #define DC(classname) classname *ix = dynamic_cast<classname *>(index)
427 size_t j = cno % pr.values.size();
428 cno /= pr.values.size();
429 double val = pr.values [j];
436 Index *index,
const char *description_in)
const
438 char description[strlen(description_in) + 1];
440 memcpy (description, description_in, strlen(description_in) + 1);
442 for (
char *tok = strtok_r (description,
" ,", &ptr);
444 tok = strtok_r (
nullptr,
" ,", &ptr)) {
447 int ret = sscanf (tok,
"%100[^=]=%lf", name, &val);
448 FAISS_THROW_IF_NOT_FMT (
449 ret == 2,
"could not interpret parameters %s", tok);
456 Index * index,
const std::string & name,
double val)
const
459 printf(
" set %s=%g\n", name.c_str(), val);
461 if (name ==
"verbose") {
472 [
this, name, val](int,
Index* subIndex) {
482 [
this, name, val](int,
Index* subIndex) {
490 if (name ==
"k_factor_rf") {
491 ix->k_factor = int(val);
499 if (name ==
"verbose") {
504 if (name ==
"nprobe") {
509 ix->nprobe = int(val);
516 if (val >= ix->pq.code_size * 8) {
520 ix->polysemous_ht = int(val);
524 if (val >= ix->pq.code_size * 8) {
525 ix->polysemous_ht = 0;
527 ix->polysemous_ht = int(val);
533 if (name ==
"k_factor") {
539 if (name ==
"max_codes") {
541 ix->max_codes = std::isfinite(val) ? size_t(val) : 0;
546 if (name ==
"efSearch") {
548 ix->hnsw.efSearch = int(val);
553 dynamic_cast<IndexHNSW *>(ix->quantizer)) {
554 cq->hnsw.efSearch = int(val);
560 FAISS_THROW_FMT (
"ParameterSpace::set_index_parameter:"
561 "could not set parameter %s",
567 printf (
"ParameterSpace, %ld parameters, %ld combinations:\n",
571 printf (
" %s: ", pr.name.c_str ());
573 for (
int j = 0; j < pr.values.size(); j++) {
574 printf (
"%c %g", sep, pr.values [j]);
584 double *upper_bound_perf,
585 double *lower_bound_t)
const
588 if (op.
t > *lower_bound_t) *lower_bound_t = op.
t;
591 if (op.
perf < *upper_bound_perf) *upper_bound_perf = op.
perf;
598 size_t nq,
const float *xq,
602 FAISS_THROW_IF_NOT_MSG (nq == crit.
nq,
603 "criterion does not have the same nb of queries");
609 for (
size_t cno = 0; cno < n_comb; cno++) {
611 std::vector<Index::idx_t> I(nq * crit.
nnn);
612 std::vector<float> D(nq * crit.
nnn);
615 index->
search (nq, xq, crit.
nnn, D.data(), I.data());
618 double perf = crit.
evaluate (D.data(), I.data());
623 printf(
" %ld/%ld: %s perf=%.3f t=%.3f s %s\n", cno, n_comb,
632 if (n_exp > n_comb) n_exp = n_comb;
633 FAISS_THROW_IF_NOT (n_comb == 1 || n_exp > 2);
634 std::vector<int> perm (n_comb);
638 perm[1] = n_comb - 1;
639 rand_perm (&perm[2], n_comb - 2, 1234);
640 for (
int i = 2; i < perm.size(); i++) perm[i] ++;
643 for (
size_t xp = 0; xp < n_exp; xp++) {
644 size_t cno = perm[xp];
647 printf(
" %ld/%d: cno=%ld %s ", xp, n_exp, cno,
651 double lower_bound_t = 0.0;
652 double upper_bound_perf = 1.0;
653 for (
int i = 0; i < ops->
all_pts.size(); i++) {
655 &upper_bound_perf, &lower_bound_t);
657 double best_t = ops->
t_for_perf (upper_bound_perf);
659 printf (
"bounds [perf<=%.3f t>=%.3f] %s",
660 upper_bound_perf, lower_bound_t,
661 best_t <= lower_bound_t ?
"skip\n" :
"");
662 if (best_t <= lower_bound_t)
continue;
666 std::vector<Index::idx_t> I(nq * crit.
nnn);
667 std::vector<float> D(nq * crit.
nnn);
677 #pragma omp parallel for
678 for (
size_t q0 = 0; q0 < nq; q0 +=
batchsize) {
680 if (q1 > nq) q1 = nq;
681 index->
search (q1 - q0, xq + q0 * index->
d,
683 D.data() + q0 * crit.
nnn,
684 I.data() + q0 * crit.
nnn);
687 for (
size_t q0 = 0; q0 < nq; q0 +=
batchsize) {
689 if (q1 > nq) q1 = nq;
690 index->
search (q1 - q0, xq + q0 * index->
d,
692 D.data() + q0 * crit.
nnn,
693 I.data() + q0 * crit.
nnn);
703 double perf = crit.
evaluate (D.data(), I.data());
708 printf(
" perf %.3f t %.3f (%d runs) %s\n",
709 perf, t_search, nrun,
721 std::vector<VectorTransform *> chain;
723 for (
int i = 0; i < chain.size(); i++) {
731 char get_trains_alone(
const Index *coarse_quantizer) {
733 dynamic_cast<const MultiIndexQuantizer*
>(coarse_quantizer) ? 1 :
734 dynamic_cast<const IndexHNSWFlat*>(coarse_quantizer) ? 2 :
744 Index *coarse_quantizer =
nullptr;
745 Index *index =
nullptr;
746 bool add_idmap =
false;
747 bool make_IndexRefineFlat =
false;
751 char description[strlen(description_in) + 1];
753 memcpy (description, description_in, strlen(description_in) + 1);
757 for (
char *tok = strtok_r (description,
" ,", &ptr);
759 tok = strtok_r (
nullptr,
" ,", &ptr)) {
760 int d_out, opq_M, nbit, M, M2, pq_m, ncent;
761 std::string stok(tok);
767 Index *coarse_quantizer_1 =
nullptr;
768 Index *index_1 =
nullptr;
771 if (sscanf (tok,
"PCA%d", &d_out) == 1) {
774 }
else if (sscanf (tok,
"PCAR%d", &d_out) == 1) {
775 vt_1 =
new PCAMatrix (d, d_out, 0,
true);
777 }
else if (sscanf (tok,
"RR%d", &d_out) == 1) {
780 }
else if (sscanf (tok,
"PCAW%d", &d_out) == 1) {
781 vt_1 =
new PCAMatrix (d, d_out, -0.5,
false);
783 }
else if (sscanf (tok,
"PCAWR%d", &d_out) == 1) {
784 vt_1 =
new PCAMatrix (d, d_out, -0.5,
true);
786 }
else if (sscanf (tok,
"OPQ%d_%d", &opq_M, &d_out) == 2) {
789 }
else if (sscanf (tok,
"OPQ%d", &opq_M) == 1) {
791 }
else if (stok ==
"L2norm") {
795 }
else if (!coarse_quantizer &&
796 sscanf (tok,
"IVF%d_HNSW%d", &ncentroids, &M) == 2) {
797 FAISS_THROW_IF_NOT (metric == METRIC_L2);
800 }
else if (!coarse_quantizer &&
801 sscanf (tok,
"IVF%d", &ncentroids) == 1) {
802 if (metric == METRIC_L2) {
807 }
else if (!coarse_quantizer && sscanf (tok,
"IMI2x%d", &nbit) == 1) {
808 FAISS_THROW_IF_NOT_MSG (metric == METRIC_L2,
809 "MultiIndex not implemented for inner prod search");
811 ncentroids = 1 << (2 * nbit);
812 }
else if (stok ==
"IDMap") {
816 }
else if (!index && (stok ==
"Flat" || stok ==
"FlatDedup")) {
817 if (coarse_quantizer) {
819 IndexIVF *index_ivf = stok ==
"Flat" ?
821 coarse_quantizer, d, ncentroids, metric) :
823 coarse_quantizer, d, ncentroids, metric);
825 get_trains_alone (coarse_quantizer);
826 index_ivf->
cp.
spherical = metric == METRIC_INNER_PRODUCT;
827 del_coarse_quantizer.release ();
831 FAISS_THROW_IF_NOT_MSG (stok !=
"FlatDedup",
832 "dedup supported only for IVFFlat");
835 }
else if (!index && (stok ==
"SQ8" || stok ==
"SQ4" ||
840 stok ==
"SQfp16" ? ScalarQuantizer::QT_fp16 :
842 if (coarse_quantizer) {
845 coarse_quantizer, d, ncentroids, qt, metric);
847 get_trains_alone (coarse_quantizer);
848 del_coarse_quantizer.release ();
854 }
else if (!index && sscanf (tok,
"PQ%d+%d", &M, &M2) == 2) {
855 FAISS_THROW_IF_NOT_MSG(coarse_quantizer,
856 "PQ with + works only with an IVF");
857 FAISS_THROW_IF_NOT_MSG(metric == METRIC_L2,
858 "IVFPQR not implemented for inner product search");
860 coarse_quantizer, d, ncentroids, M, 8, M2, 8);
862 get_trains_alone (coarse_quantizer);
863 del_coarse_quantizer.release ();
866 }
else if (!index && (sscanf (tok,
"PQ%d", &M) == 1 ||
867 sscanf (tok,
"PQ%dnp", &M) == 1)) {
868 bool do_polysemous_training = stok.find(
"np") == std::string::npos;
869 if (coarse_quantizer) {
871 coarse_quantizer, d, ncentroids, M, 8);
873 get_trains_alone (coarse_quantizer);
875 index_ivf->
cp.
spherical = metric == METRIC_INNER_PRODUCT;
876 del_coarse_quantizer.release ();
886 sscanf (tok,
"HNSW%d_%d+PQ%d", &M, &ncent, &pq_m) == 3) {
893 sscanf (tok,
"HNSW%d_2x%d+PQ%d", &M, &nbit, &pq_m) == 3) {
902 sscanf (tok,
"HNSW%d_PQ%d", &M, &pq_m) == 2) {
905 sscanf (tok,
"HNSW%d", &M) == 1) {
908 sscanf (tok,
"HNSW%d_SQ%d", &M, &pq_m) == 2 &&
911 }
else if (stok ==
"RFlat") {
912 make_IndexRefineFlat =
true;
914 FAISS_THROW_FMT(
"could not parse token \"%s\" in %s\n",
915 tok, description_in);
918 if (index_1 && add_idmap) {
920 del_index.set (idmap);
927 vts.chain.push_back (vt_1);
930 if (coarse_quantizer_1) {
931 coarse_quantizer = coarse_quantizer_1;
932 del_coarse_quantizer.set (coarse_quantizer);
937 del_index.set (index);
941 FAISS_THROW_IF_NOT_FMT(index,
"descrption %s did not generate an index",
945 del_index.release ();
946 del_coarse_quantizer.release ();
949 fprintf(stderr,
"index_factory: WARNING: "
950 "IDMap option not used\n");
953 if (vts.chain.size() > 0) {
957 while (vts.chain.size() > 0) {
958 index_pt->prepend_transform (vts.chain.back ());
959 vts.chain.pop_back ();
964 if (make_IndexRefineFlat) {
973 IndexBinary *index_binary_factory(
int d,
const char *description)
975 IndexBinary *index =
nullptr;
980 if (sscanf(description,
"BIVF%d_HNSW%d", &ncentroids, &M) == 2) {
981 IndexBinaryIVF *index_ivf =
new IndexBinaryIVF(
982 new IndexBinaryHNSW(d, M), d, ncentroids
984 index_ivf->own_fields =
true;
987 }
else if (sscanf(description,
"BIVF%d", &ncentroids) == 1) {
988 IndexBinaryIVF *index_ivf =
new IndexBinaryIVF(
989 new IndexBinaryFlat(d), d, ncentroids
991 index_ivf->own_fields =
true;
994 }
else if (sscanf(description,
"BHNSW%d", &M) == 1) {
995 IndexBinaryHNSW *index_hnsw =
new IndexBinaryHNSW(d, M);
998 }
else if (std::string(description) ==
"BFlat") {
999 index =
new IndexBinaryFlat(d);
1002 FAISS_THROW_IF_NOT_FMT(index,
"descrption %s did not generate an index",
1013 MatrixStats::PerDimStats::PerDimStats():
1014 n(0), n_nan(0), n_inf(0), n0(0),
1015 min(HUGE_VALF), max(-HUGE_VALF),
1017 mean(NAN), stddev(NAN)
1021 void MatrixStats::PerDimStats::add (
float x)
1024 if (std::isnan(x)) {
1028 if (!std::isfinite(x)) {
1033 if (x < min) min = x;
1034 if (x > max) max = x;
1036 sum2 += (double)x * (
double)x;
1039 void MatrixStats::PerDimStats::compute_mean_std ()
1041 n_valid = n - n_nan - n_inf;
1042 mean = sum / n_valid;
1043 double var = sum2 / n_valid - mean * mean;
1044 if (var < 0) var = 0;
1049 void MatrixStats::do_comment (
const char *fmt, ...)
1055 size_t size = vsnprintf(buf, nbuf, fmt, ap);
1064 MatrixStats::MatrixStats (
size_t n,
size_t d,
const float *x):
1066 n_collision(0), n_valid(0), n0(0),
1067 min_norm2(HUGE_VAL), max_norm2(0)
1069 std::vector<char> comment_buf (10000);
1070 buf = comment_buf.data ();
1071 nbuf = comment_buf.size();
1073 do_comment (
"analyzing %ld vectors of size %ld\n", n, d);
1077 "indexing this many dimensions is hard, "
1078 "please consider dimensionality reducution (with PCAMatrix)\n");
1081 size_t nbytes =
sizeof (x[0]) * d;
1082 per_dim_stats.resize (d);
1084 for (
size_t i = 0; i < n; i++) {
1085 const float *xi = x + d * i;
1087 for (
size_t j = 0; j < d; j++) {
1088 per_dim_stats[j].add (xi[j]);
1089 sum2 += xi[j] * (double)xi[j];
1092 if (std::isfinite (sum2)) {
1097 if (sum2 < min_norm2) min_norm2 = sum2;
1098 if (sum2 > max_norm2) max_norm2 = sum2;
1103 uint64_t hash =
hash_bytes((
const uint8_t*)xi, nbytes);
1104 auto elt = occurrences.find (hash);
1105 if (elt == occurrences.end()) {
1106 Occurrence occ = {i, 1};
1107 occurrences[hash] = occ;
1109 if (!memcmp (xi, x + elt->second.first * d, nbytes)) {
1110 elt->second.count ++;
1121 do_comment (
"no NaN or Infs in data\n");
1123 do_comment (
"%ld vectors contain NaN or Inf "
1124 "(or have too large components), "
1125 "expect bad results with indexing!\n", n - n_valid);
1129 if (occurrences.size() == n) {
1130 do_comment (
"all vectors are distinct\n");
1132 do_comment (
"%ld vectors are distinct (%.2f%%)\n",
1134 occurrences.size() * 100.0 / n);
1136 if (n_collision > 0) {
1137 do_comment (
"%ld collisions in hash table, "
1138 "counts may be invalid\n", n_collision);
1141 Occurrence max = {0, 0};
1142 for (
auto it = occurrences.begin();
1143 it != occurrences.end(); ++it) {
1144 if (it->second.count > max.count) {
1148 do_comment (
"vector %ld has %ld copies\n", max.first, max.count);
1152 min_norm2 = sqrt (min_norm2);
1153 max_norm2 = sqrt (max_norm2);
1154 do_comment (
"range of L2 norms=[%g, %g] (%ld null vectors)\n",
1155 min_norm2, max_norm2, n0);
1157 if (max_norm2 < min_norm2 * 1.0001) {
1158 do_comment (
"vectors are normalized, inner product and "
1159 "L2 search are equivalent\n");
1162 if (max_norm2 > min_norm2 * 100) {
1163 do_comment (
"vectors have very large differences in norms, "
1164 "is this normal?\n");
1170 double max_std = 0, min_std = HUGE_VAL;
1172 size_t n_dangerous_range = 0, n_0_range = 0, n0 = 0;
1174 for (
size_t j = 0; j < d; j++) {
1175 PerDimStats &st = per_dim_stats[j];
1176 st.compute_mean_std ();
1179 if (st.max == st.min) {
1181 }
else if (st.max < 1.001 * st.min) {
1182 n_dangerous_range ++;
1185 if (st.stddev > max_std) max_std = st.stddev;
1186 if (st.stddev < min_std) min_std = st.stddev;
1192 do_comment (
"matrix contains no 0s\n");
1194 do_comment (
"matrix contains %.2f %% 0 entries\n",
1195 n0 * 100.0 / (n * d));
1198 if (n_0_range == 0) {
1199 do_comment (
"no constant dimensions\n");
1201 do_comment (
"%ld dimensions are constant: they can be removed\n",
1205 if (n_dangerous_range == 0) {
1206 do_comment (
"no dimension has a too large mean\n");
1208 do_comment (
"%ld dimensions are too large "
1209 "wrt. their variance, may loose precision "
1210 "in IndexFlatL2 (use CenteringTransform)\n",
1214 do_comment (
"stddevs per dimension are in [%g %g]\n", min_std, max_std);
1216 size_t n_small_var = 0;
1218 for (
size_t j = 0; j < d; j++) {
1219 const PerDimStats &st = per_dim_stats[j];
1220 if (st.stddev < max_std * 1e-4) {
1225 if (n_small_var > 0) {
1226 do_comment (
"%ld dimensions have negligible stddev wrt. "
1227 "the largest dimension, they could be ignored",
1232 comments = comment_buf.data ();
void explore(Index *index, size_t nq, const float *xq, const AutoTuneCriterion &crit, OperatingPoints *ops) const
std::vector< ParameterRange > parameter_ranges
all tunable parameters
std::string key
key that identifies this op pt
Randomly rotate a set of vectors.
long cno
integer identifer
bool do_polysemous_training
false = standard PQ
double evaluate(const float *D, const idx_t *I) const override
void display(bool only_optimal=true) const
easy-to-read output
double perf
performance measure (output of a Criterion)
double t_for_perf(double perf) const
get time required to obtain a given performance measure
idx_t nnn
nb of NNs that the query should request
double evaluate(const float *D, const idx_t *I) const override
bool add(double perf, double t, const std::string &key, size_t cno=0)
add a performance measure. Return whether it is an optimal point
bool do_polysemous_training
reorder PQ centroids after training?
size_t batchsize
maximum number of queries to submit at a time.
Level1Quantizer q1
first level quantizer
virtual double evaluate(const float *D, const idx_t *I) const =0
idx_t nq
nb of queries this criterion is evaluated on
std::vector< OperatingPoint > optimal_pts
optimal operating points, sorted by perf
void set_groundtruth(int gt_nnn, const float *gt_D_in, const idx_t *gt_I_in)
ParameterRange & add_range(const char *name)
add a new parameter (or return it if it exists)
idx_t gt_nnn
nb of GT NNs required to evaluate crterion
void all_to_gnuplot(const char *fname) const
output to a format easy to digest by gnuplot
bool own_fields
should the base index be deallocated?
size_t code_size
byte per indexed vector
char quantizer_trains_alone
uint64_t hash_bytes(const uint8_t *bytes, long n)
std::vector< OperatingPoint > all_pts
all operating points
size_t ranklist_intersection_size(size_t k1, const long *v1, size_t k2, const long *v2_in)
ClusteringParameters cp
to override default clustering params
bool verbose
verbosity level
double getmillisecs()
ms elapsed since some arbitrary epoch
std::vector< float > gt_D
Ground-truth distances (size nq * gt_nnn)
std::string combination_name(size_t cno) const
get string representation of the combination
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
void update_bounds(size_t cno, const OperatingPoint &op, double *upper_bound_perf, double *lower_bound_t) const
virtual void initialize(const Index *index)
initialize with reasonable parameters for the index
int verbose
verbosity during exploration
int merge_with(const OperatingPoints &other, const std::string &prefix="")
add operating points from other to this, with a prefix to the keys
virtual void set_index_parameter(Index *index, const std::string &name, double val) const
set one of the parameters
size_t n_combinations() const
nb of combinations, = product of values sizes
MetricType metric_type
type of metric this index uses for search
void set_index_parameters(Index *index, size_t cno) const
set a combination of parameters on an index
asymmetric product quantizer (default)
void display() const
print a description on stdout
HE filter (using ht) + PQ combination.
bool combination_ge(size_t c1, size_t c2) const
returns whether combinations c1 >= c2 in the tuple sense
bool spherical
do we want normalized centroids?
bool own_fields
whether object owns the quantizer
possible values of a parameter, sorted from least to most expensive/accurate
Index * index_factory(int d, const char *description_in, MetricType metric)
int n_experiments
nb of experiments during optimization (0 = try all combinations)
std::vector< idx_t > gt_I
Ground-truth indexes (size nq * gt_nnn)
double t
corresponding execution time (ms)
MetricType
Some algorithms support both an inner product version and a L2 search version.
bool own_fields
! the sub-index