Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/AutoTune.cpp
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 /* Copyright 2004-present Facebook. All Rights Reserved.
11  implementation of Hyper-parameter auto-tuning
12 */
13 
14 #include "AutoTune.h"
15 
16 #include "FaissAssert.h"
17 #include "utils.h"
18 
19 #include "IndexFlat.h"
20 #include "VectorTransform.h"
21 #include "IndexLSH.h"
22 #include "IndexPQ.h"
23 #include "IndexIVF.h"
24 #include "IndexIVFPQ.h"
25 #include "MetaIndexes.h"
26 
27 
28 
29 namespace faiss {
30 
31 
32 AutoTuneCriterion::AutoTuneCriterion (idx_t nq, idx_t nnn):
33  nq (nq), nnn (nnn), gt_nnn (0)
34 {}
35 
36 
38  int gt_nnn, const float *gt_D_in, const idx_t *gt_I_in)
39 {
40  this->gt_nnn = gt_nnn;
41  if (gt_D_in) { // allow null for this, as it is often not used
42  gt_D.resize (nq * gt_nnn);
43  memcpy (gt_D.data(), gt_D_in, sizeof (gt_D[0]) * nq * gt_nnn);
44  }
45  gt_I.resize (nq * gt_nnn);
46  memcpy (gt_I.data(), gt_I_in, sizeof (gt_I[0]) * nq * gt_nnn);
47 }
48 
49 
50 
51 OneRecallAtRCriterion::OneRecallAtRCriterion (idx_t nq, idx_t R):
52  AutoTuneCriterion(nq, R), R(R)
53 {}
54 
55 double OneRecallAtRCriterion::evaluate (const float *D, const idx_t *I) const
56 {
57  FAISS_ASSERT ((gt_I.size() == gt_nnn * nq && gt_nnn >= 1 && nnn >= R) ||
58  !"gound truth not initialized");
59  idx_t n_ok = 0;
60  for (idx_t q = 0; q < nq; q++) {
61  idx_t gt_nn = gt_I [q * gt_nnn];
62  const idx_t *I_line = I + q * nnn;
63  for (int i = 0; i < R; i++) {
64  if (I_line[i] == gt_nn) {
65  n_ok++;
66  break;
67  }
68  }
69  }
70  return n_ok / double (nq);
71 }
72 
73 
74 IntersectionCriterion::IntersectionCriterion (idx_t nq, idx_t R):
75  AutoTuneCriterion(nq, R), R(R)
76 {}
77 
78 double IntersectionCriterion::evaluate (const float *D, const idx_t *I) const
79 {
80  FAISS_ASSERT ((gt_I.size() == gt_nnn * nq && gt_nnn >= R && nnn >= R) ||
81  !"gound truth not initialized");
82  long n_ok = 0;
83 #pragma omp parallel for reduction(+: n_ok)
84  for (idx_t q = 0; q < nq; q++) {
86  R, &gt_I [q * gt_nnn],
87  R, I + q * nnn);
88  }
89  return n_ok / double (nq * R);
90 }
91 
92 /***************************************************************
93  * OperatingPoints
94  ***************************************************************/
95 
96 OperatingPoints::OperatingPoints ()
97 {
98  clear();
99 }
100 
102 {
103  all_pts.clear();
104  optimal_pts.clear();
105  /// default point: doing nothing gives 0 performance and takes 0 time
106  OperatingPoint op = {0, 0, "", -1};
107  optimal_pts.push_back(op);
108 }
109 
110 /// add a performance measure
111 bool OperatingPoints::add (double perf, double t, const std::string & key,
112  size_t cno)
113 {
114  OperatingPoint op = {perf, t, key, long(cno)};
115  all_pts.push_back (op);
116  if (perf == 0) {
117  return false; // no method for 0 accuracy is faster than doing nothing
118  }
119  std::vector<OperatingPoint> & a = optimal_pts;
120  if (perf > a.back().perf) {
121  // keep unconditionally
122  a.push_back (op);
123  } else if (perf == a.back().perf) {
124  if (t < a.back ().t) {
125  a.back() = op;
126  } else {
127  return false;
128  }
129  } else {
130  int i;
131  // stricto sensu this should be a bissection
132  for (i = 0; i < a.size(); i++) {
133  if (a[i].perf >= perf) break;
134  }
135  assert (i < a.size());
136  if (t < a[i].t) {
137  if (a[i].perf == perf) {
138  a[i] = op;
139  } else {
140  a.insert (a.begin() + i, op);
141  }
142  } else {
143  return false;
144  }
145  }
146  { // remove non-optimal points from array
147  int i = a.size() - 1;
148  while (i > 0) {
149  if (a[i].t < a[i - 1].t)
150  a.erase (a.begin() + (i - 1));
151  i--;
152  }
153  }
154  return true;
155 }
156 
157 
159  const std::string & prefix)
160 {
161  int n_add = 0;
162  for (int i = 0; i < other.all_pts.size(); i++) {
163  const OperatingPoint & op = other.all_pts[i];
164  if (add (op.perf, op.t, prefix + op.key, op.cno))
165  n_add++;
166  }
167  return n_add;
168 }
169 
170 
171 
172 /// get time required to obtain a given performance measure
173 double OperatingPoints::t_for_perf (double perf) const
174 {
175  const std::vector<OperatingPoint> & a = optimal_pts;
176  if (perf > a.back().perf) return 1e50;
177  int i0 = -1, i1 = a.size() - 1;
178  while (i0 + 1 < i1) {
179  int imed = (i0 + i1 + 1) / 2;
180  if (a[imed].perf < perf) i0 = imed;
181  else i1 = imed;
182  }
183  return a[i1].t;
184 }
185 
186 
187 void OperatingPoints::all_to_gnuplot (const char *fname) const
188 {
189  FILE *f = fopen(fname, "w");
190  if (!f) {
191  fprintf (stderr, "cannot open %s", fname);
192  perror("");
193  abort();
194  }
195  for (int i = 0; i < all_pts.size(); i++) {
196  const OperatingPoint & op = all_pts[i];
197  fprintf (f, "%g %g %s\n", op.perf, op.t, op.key.c_str());
198  }
199  fclose(f);
200 }
201 
202 void OperatingPoints::optimal_to_gnuplot (const char *fname) const
203 {
204  FILE *f = fopen(fname, "w");
205  if (!f) {
206  fprintf (stderr, "cannot open %s", fname);
207  perror("");
208  abort();
209  }
210  double prev_perf = 0.0;
211  for (int i = 0; i < optimal_pts.size(); i++) {
212  const OperatingPoint & op = optimal_pts[i];
213  fprintf (f, "%g %g\n", prev_perf, op.t);
214  fprintf (f, "%g %g %s\n", op.perf, op.t, op.key.c_str());
215  prev_perf = op.perf;
216  }
217  fclose(f);
218 }
219 
220 void OperatingPoints::display (bool only_optimal) const
221 {
222  const std::vector<OperatingPoint> &pts =
223  only_optimal ? optimal_pts : all_pts;
224  printf("Tested %ld operating points, %ld ones are optimal:\n",
225  all_pts.size(), optimal_pts.size());
226 
227  for (int i = 0; i < pts.size(); i++) {
228  const OperatingPoint & op = pts[i];
229  const char *star = "";
230  if (!only_optimal) {
231  for (int j = 0; j < optimal_pts.size(); j++) {
232  if (op.cno == optimal_pts[j].cno) {
233  star = "*";
234  break;
235  }
236  }
237  }
238  printf ("cno=%ld key=%s perf=%.4f t=%.3f %s\n",
239  op.cno, op.key.c_str(), op.perf, op.t, star);
240  }
241 
242 }
243 
244 /***************************************************************
245  * ParameterSpace
246  ***************************************************************/
247 
248 ParameterSpace::ParameterSpace ():
249  verbose (1), n_experiments (500),
250  batchsize (1<<30), thread_over_batches (false)
251 {
252 }
253 
254 /* not keeping this constructor as inheritors will call the parent
255  initialize()
256  */
257 
258 #if 0
259 ParameterSpace::ParameterSpace (Index *index):
260  verbose (1), n_experiments (500),
261  batchsize (1<<30), thread_over_batches (false)
262 {
263  initialize(index);
264 }
265 #endif
266 
268 {
269  size_t n = 1;
270  for (int i = 0; i < parameter_ranges.size(); i++)
271  n *= parameter_ranges[i].values.size();
272  return n;
273 }
274 
275 /// get string representation of the combination
276 std::string ParameterSpace::combination_name (size_t cno) const {
277  char buf[1000], *wp = buf;
278  *wp = 0;
279  for (int i = 0; i < parameter_ranges.size(); i++) {
280  const ParameterRange & pr = parameter_ranges[i];
281  size_t j = cno % pr.values.size();
282  cno /= pr.values.size();
283  wp += snprintf (
284  wp, buf + 1000 - wp, "%s%s=%g", i == 0 ? "" : ",",
285  pr.name.c_str(), pr.values[j]);
286  }
287  return std::string (buf);
288 }
289 
290 
291 bool ParameterSpace::combination_ge (size_t c1, size_t c2) const
292 {
293  for (int i = 0; i < parameter_ranges.size(); i++) {
294  int nval = parameter_ranges[i].values.size();
295  size_t j1 = c1 % nval;
296  size_t j2 = c2 % nval;
297  if (!(j1 >= j2)) return false;
298  c1 /= nval;
299  c2 /= nval;
300  }
301  return true;
302 }
303 
304 
305 
306 #define DC(classname) \
307  const classname *ix = dynamic_cast<const classname *>(index)
308 
309 static void init_pq_ParameterRange (const ProductQuantizer & pq,
310  ParameterRange & pr)
311 {
312  if (pq.code_size % 4 == 0) {
313  // Polysemous not supported for code sizes that are not a
314  // multiple of 4
315  for (int i = 2; i <= pq.code_size * 8 / 2; i+= 2)
316  pr.values.push_back(i);
317  }
318  pr.values.push_back (pq.code_size * 8);
319 }
320 
322 {
323  parameter_ranges.push_back (ParameterRange ());
324  parameter_ranges.back ().name = name;
325  return parameter_ranges.back ();
326 }
327 
328 
329 /// initialize with reasonable parameters for the index
330 void ParameterSpace::initialize (const Index * index)
331 {
332  if (DC (IndexPreTransform)) {
333  index = ix->index;
334  }
335  if (DC (IndexRefineFlat)) {
336  ParameterRange & pr = add_range("k_factor_rf");
337  for (int i = 0; i <= 6; i++) {
338  pr.values.push_back (1 << i);
339  }
340  index = ix->base_index;
341  }
342  if (DC (IndexPreTransform)) {
343  index = ix->index;
344  }
345 
346  if (DC (IndexIVF)) {
347  ParameterRange & pr = add_range("nprobe");
348  for (int i = 0; i < 13; i++) {
349  size_t nprobe = 1 << i;
350  if (nprobe >= ix->nlist) break;
351  pr.values.push_back (nprobe);
352  }
353  }
354  if (DC (IndexPQ)) {
355  ParameterRange & pr = add_range("ht");
356  init_pq_ParameterRange (ix->pq, pr);
357  }
358  if (DC (IndexIVFPQ)) {
359  ParameterRange & pr = add_range("ht");
360  init_pq_ParameterRange (ix->pq, pr);
361 
362  const MultiIndexQuantizer *miq =
363  dynamic_cast<const MultiIndexQuantizer *> (ix->quantizer);
364  if (miq) {
365  ParameterRange & pr_max_codes = add_range("max_codes");
366  for (int i = 8; i < 20; i++) {
367  pr_max_codes.values.push_back (1 << i);
368  }
369  pr_max_codes.values.push_back (1.0 / 0.0);
370  }
371  }
372  if (DC (IndexIVFPQR)) {
373  assert (ix);
374  ParameterRange & pr = add_range("k_factor");
375  for (int i = 0; i <= 6; i++) {
376  pr.values.push_back (1 << i);
377  }
378  }
379 }
380 
381 #undef DC
382 
383 // non-const version
384 #define DC(classname) classname *ix = dynamic_cast<classname *>(index)
385 
386 
387 /// set a combination of parameters on an index
388 void ParameterSpace::set_index_parameters (Index *index, size_t cno) const
389 {
390 
391  for (int i = 0; i < parameter_ranges.size(); i++) {
392  const ParameterRange & pr = parameter_ranges[i];
393  size_t j = cno % pr.values.size();
394  cno /= pr.values.size();
395  double val = pr.values [j];
396  set_index_parameter (index, pr.name, val);
397  }
398 }
399 
400 /// set a combination of parameters on an index
402  Index *index, const char *description_in) const
403 {
404  char description[strlen(description_in) + 1];
405  char *ptr;
406  memcpy (description, description_in, strlen(description_in) + 1);
407 
408  for (char *tok = strtok_r (description, " ,", &ptr);
409  tok;
410  tok = strtok_r (nullptr, " ,", &ptr)) {
411  char name[100];
412  double val;
413  FAISS_ASSERT (sscanf (tok, "%100[^=]=%lf", name, &val) == 2);
414  set_index_parameter (index, name, val);
415  }
416 
417 }
418 
420  Index * index, const std::string & name, double val) const
421 {
422  if (verbose > 1)
423  printf(" set %s=%g\n", name.c_str(), val);
424 
425  if (name == "verbose") {
426  index->verbose = int(val);
427  }
428  if (DC (IndexPreTransform)) {
429  index = ix->index;
430  }
431  if (name == "verbose") {
432  index->verbose = int(val);
433  }
434  if (DC (IndexRefineFlat)) {
435  if (name == "k_factor_rf") {
436  ix->k_factor = int(val);
437  return;
438  }
439  index = ix->base_index;
440  }
441  if (DC (IndexPreTransform)) {
442  index = ix->index;
443  }
444  if (name == "verbose") {
445  index->verbose = int(val);
446  return; // last verbose that we could find
447  }
448  if (name == "nprobe") {
449  DC(IndexIVF);
450  ix->nprobe = int(val);
451  } else if (name == "ht") {
452  if (DC (IndexPQ)) {
453  if (val >= ix->pq.code_size * 8) {
454  ix->search_type = IndexPQ::ST_PQ;
455  } else {
456  ix->search_type = IndexPQ::ST_polysemous;
457  ix->polysemous_ht = int(val);
458  }
459  } else if (DC (IndexIVFPQ)) {
460  if (val >= ix->pq.code_size * 8) {
461  ix->polysemous_ht = 0;
462  } else {
463  ix->polysemous_ht = int(val);
464  }
465  }
466  } else if (name == "k_factor") {
467  DC (IndexIVFPQR);
468  ix->k_factor = val;
469  } else if (name == "max_codes") {
470  DC (IndexIVFPQ);
471  ix->max_codes = finite(val) ? size_t(val) : 0;
472  } else {
473  fprintf(stderr,
474  "ParameterSpace::set_index_parameter:"
475  "could not set parameter %s\n",
476  name.c_str());
477  }
478 }
479 
481 {
482  printf ("ParameterSpace, %ld parameters, %ld combinations:\n",
483  parameter_ranges.size (), n_combinations ());
484  for (int i = 0; i < parameter_ranges.size(); i++) {
485  const ParameterRange & pr = parameter_ranges[i];
486  printf (" %s: ", pr.name.c_str ());
487  char sep = '[';
488  for (int j = 0; j < pr.values.size(); j++) {
489  printf ("%c %g", sep, pr.values [j]);
490  sep = ',';
491  }
492  printf ("]\n");
493  }
494 }
495 
496 
497 
498 void ParameterSpace::update_bounds (size_t cno, const OperatingPoint & op,
499  double *upper_bound_perf,
500  double *lower_bound_t) const
501 {
502  if (combination_ge (cno, op.cno)) {
503  if (op.t > *lower_bound_t) *lower_bound_t = op.t;
504  }
505  if (combination_ge (op.cno, cno)) {
506  if (op.perf < *upper_bound_perf) *upper_bound_perf = op.perf;
507  }
508 }
509 
510 
511 
513  size_t nq, const float *xq,
514  const AutoTuneCriterion & crit,
515  OperatingPoints * ops) const
516 {
517  FAISS_ASSERT (nq == crit.nq ||
518  !"criterion does not have the same nb of queries");
519 
520  size_t n_comb = n_combinations ();
521 
522  if (n_experiments == 0) {
523 
524  for (size_t cno = 0; cno < n_comb; cno++) {
525  set_index_parameters (index, cno);
526  std::vector<Index::idx_t> I(nq * crit.nnn);
527  std::vector<float> D(nq * crit.nnn);
528 
529  double t0 = getmillisecs ();
530  index->search (nq, xq, crit.nnn, D.data(), I.data());
531  double t_search = (getmillisecs() - t0) / 1e3;
532 
533  double perf = crit.evaluate (D.data(), I.data());
534 
535  bool keep = ops->add (perf, t_search, combination_name (cno), cno);
536 
537  if (verbose)
538  printf(" %ld/%ld: %s perf=%.3f t=%.3f s %s\n", cno, n_comb,
539  combination_name (cno).c_str(), perf, t_search,
540  keep ? "*" : "");
541  }
542  return;
543  }
544 
545  int n_exp = n_experiments;
546 
547  if (n_exp > n_comb) n_exp = n_comb;
548  FAISS_ASSERT (n_comb == 1 || n_exp > 2);
549  std::vector<int> perm (n_comb);
550  // make sure the slowest and fastest experiment are run
551  perm[0] = 0;
552  if (n_comb > 1) {
553  perm[1] = n_comb - 1;
554  rand_perm (&perm[2], n_comb - 2, 1234);
555  for (int i = 2; i < perm.size(); i++) perm[i] ++;
556  }
557 
558  for (size_t xp = 0; xp < n_exp; xp++) {
559  size_t cno = perm[xp];
560 
561  if (verbose)
562  printf(" %ld/%d: cno=%ld %s ", xp, n_exp, cno,
563  combination_name (cno).c_str());
564 
565  {
566  double lower_bound_t = 0.0;
567  double upper_bound_perf = 1.0;
568  for (int i = 0; i < ops->all_pts.size(); i++) {
569  update_bounds (cno, ops->all_pts[i],
570  &upper_bound_perf, &lower_bound_t);
571  }
572  double best_t = ops->t_for_perf (upper_bound_perf);
573  if (verbose)
574  printf ("bounds [perf<=%.3f t>=%.3f] %s",
575  upper_bound_perf, lower_bound_t,
576  best_t <= lower_bound_t ? "skip\n" : "");
577  if (best_t <= lower_bound_t) continue;
578  }
579 
580  set_index_parameters (index, cno);
581  std::vector<Index::idx_t> I(nq * crit.nnn);
582  std::vector<float> D(nq * crit.nnn);
583 
584  double t0 = getmillisecs ();
585 
586  if (thread_over_batches) {
587 #pragma omp parallel for
588  for (size_t q0 = 0; q0 < nq; q0 += batchsize) {
589  size_t q1 = q0 + batchsize;
590  if (q1 > nq) q1 = nq;
591  index->search (q1 - q0, xq + q0 * index->d,
592  crit.nnn,
593  D.data() + q0 * crit.nnn,
594  I.data() + q0 * crit.nnn);
595  }
596  } else {
597  for (size_t q0 = 0; q0 < nq; q0 += batchsize) {
598  size_t q1 = q0 + batchsize;
599  if (q1 > nq) q1 = nq;
600  index->search (q1 - q0, xq + q0 * index->d,
601  crit.nnn,
602  D.data() + q0 * crit.nnn,
603  I.data() + q0 * crit.nnn);
604  }
605  }
606 
607  double t_search = (getmillisecs() - t0) / 1e3;
608 
609  double perf = crit.evaluate (D.data(), I.data());
610 
611  bool keep = ops->add (perf, t_search, combination_name (cno), cno);
612 
613  if (verbose)
614  printf(" perf %.3f t %.3f %s\n", perf, t_search,
615  keep ? "*" : "");
616  }
617 }
618 
619 /***************************************************************
620  * index_factory
621  ***************************************************************/
622 
623 Index *index_factory (int d, const char *description_in, MetricType metric)
624 {
625  VectorTransform *vt = nullptr;
626  Index *coarse_quantizer = nullptr;
627  Index *index = nullptr;
628  bool add_idmap = false;
629  bool make_IndexRefineFlat = false;
630 
631  char description[strlen(description_in) + 1];
632  char *ptr;
633  memcpy (description, description_in, strlen(description_in) + 1);
634 
635  int ncentroids = -1;
636 
637  for (char *tok = strtok_r (description, " ,", &ptr);
638  tok;
639  tok = strtok_r (nullptr, " ,", &ptr)) {
640  int d_out, opq_M, nbit, M, M2;
641  VectorTransform *vt_1 = nullptr;
642  Index *coarse_quantizer_1 = nullptr;
643  Index *index_1 = nullptr;
644 
645  // VectorTransforms
646  if (sscanf (tok, "PCA%d", &d_out) == 1) {
647  vt_1 = new PCAMatrix (d, d_out);
648  d = d_out;
649  } else if (sscanf (tok, "PCAR%d", &d_out) == 1) {
650  vt_1 = new PCAMatrix (d, d_out, 0, true);
651  d = d_out;
652  } else if (sscanf (tok, "OPQ%d_%d", &opq_M, &d_out) == 2) {
653  vt_1 = new OPQMatrix (d, opq_M, d_out);
654  d = d_out;
655  } else if (sscanf (tok, "OPQ%d", &opq_M) == 1) {
656  vt_1 = new OPQMatrix (d, opq_M);
657  // coarse quantizers
658  } else if (sscanf (tok, "IVF%d", &ncentroids) == 1) {
659  if (metric == METRIC_L2) {
660  coarse_quantizer_1 = new IndexFlatL2 (d);
661  } else { // if (metric == METRIC_IP)
662  coarse_quantizer_1 = new IndexFlatIP (d);
663  }
664  } else if (sscanf (tok, "IMI2x%d", &nbit) == 1) {
665  FAISS_ASSERT(metric == METRIC_L2 ||
666  !"MultiIndex not implemented for inner prod search");
667  coarse_quantizer_1 = new MultiIndexQuantizer (d, 2, nbit);
668  ncentroids = 1 << (2 * nbit);
669  } else if (strcmp(tok, "IDMap") == 0) {
670  add_idmap = true;
671 
672  // IVFs
673  } else if (strcmp (tok, "Flat") == 0) {
674  if (coarse_quantizer) {
675  // if there was an IVF in front, then it is an IVFFlat
676  IndexIVF *index_ivf = new IndexIVFFlat (
677  coarse_quantizer, d, ncentroids, metric);
678  index_ivf->quantizer_trains_alone =
679  dynamic_cast<MultiIndexQuantizer*>(coarse_quantizer)
680  != nullptr;
681  index_ivf->cp.spherical = metric == METRIC_INNER_PRODUCT;
682  index_ivf->own_fields = true;
683  index_1 = index_ivf;
684  } else {
685  index_1 = new IndexFlat (d, metric);
686  if (add_idmap) {
687  IndexIDMap *idmap = new IndexIDMap(index_1);
688  idmap->own_fields = true;
689  index_1 = idmap;
690  add_idmap = false;
691  }
692  }
693  } else if (sscanf (tok, "PQ%d+%d", &M, &M2) == 2) {
694  FAISS_ASSERT(coarse_quantizer ||
695  !"PQ with + works only with an IVF");
696  FAISS_ASSERT(metric == METRIC_L2 ||
697  !"IVFPQR not implemented for inner product search");
698  IndexIVFPQR *index_ivf = new IndexIVFPQR (
699  coarse_quantizer, d, ncentroids, M, 8, M2, 8);
700  index_ivf->quantizer_trains_alone =
701  dynamic_cast<MultiIndexQuantizer*>(coarse_quantizer)
702  != nullptr;
703  index_ivf->own_fields = true;
704  index_1 = index_ivf;
705  } else if (sscanf (tok, "PQ%d", &M) == 1) {
706  if (coarse_quantizer) {
707  IndexIVFPQ *index_ivf = new IndexIVFPQ (
708  coarse_quantizer, d, ncentroids, M, 8);
709  index_ivf->quantizer_trains_alone =
710  dynamic_cast<MultiIndexQuantizer*>(coarse_quantizer)
711  != nullptr;
712  index_ivf->metric_type = metric;
713  index_ivf->cp.spherical = metric == METRIC_INNER_PRODUCT;
714  index_ivf->own_fields = true;
715  index_ivf->do_polysemous_training = true;
716  index_1 = index_ivf;
717  } else {
718  IndexPQ *index_pq = new IndexPQ (d, M, 8, metric);
719  index_pq->do_polysemous_training = true;
720  index_1 = index_pq;
721  if (add_idmap) {
722  IndexIDMap *idmap = new IndexIDMap(index_1);
723  idmap->own_fields = true;
724  index_1 = idmap;
725  add_idmap = false;
726  }
727  }
728  } else if (strcmp (tok, "RFlat") == 0) {
729  make_IndexRefineFlat = true;
730  } else {
731  fprintf (stderr, "could not parse token \"%s\" in %s\n",
732  tok, description_in);
733  FAISS_ASSERT (!"parse error");
734  }
735 
736  if (vt_1) {
737  FAISS_ASSERT (!vt || !"cannot apply two VectorTransforms");
738  vt = vt_1;
739  }
740 
741  if (coarse_quantizer_1) {
742  FAISS_ASSERT (!coarse_quantizer ||
743  !"cannot have 2 coarse quantizers");
744  coarse_quantizer = coarse_quantizer_1;
745  }
746 
747  if (index_1) {
748  FAISS_ASSERT (!index || !"cannot have 2 indexes");
749  index = index_1;
750  }
751  }
752 
753  if (add_idmap) {
754  fprintf(stderr, "index_factory: WARNING: "
755  "IDMap option not used\n");
756  }
757 
758  if (vt) {
759  IndexPreTransform *index_pt = new IndexPreTransform (vt, index);
760  index_pt->own_fields = true;
761  index = index_pt;
762  }
763 
764  if (make_IndexRefineFlat) {
765  IndexRefineFlat *index_rf = new IndexRefineFlat (index);
766  index_rf->own_fields = true;
767  index = index_rf;
768  }
769 
770  return index;
771 }
772 
773 
774 
775 
776 }; // namespace faiss
void explore(Index *index, size_t nq, const float *xq, const AutoTuneCriterion &crit, OperatingPoints *ops) const
Definition: AutoTune.cpp:512
std::vector< ParameterRange > parameter_ranges
all tunable parameters
Definition: AutoTune.h:135
std::string key
key that identifies this op pt
Definition: AutoTune.h:90
long cno
integer identifer
Definition: AutoTune.h:91
bool do_polysemous_training
false = standard PQ
Definition: IndexPQ.h:71
void display(bool only_optimal=true) const
easy-to-read output
Definition: AutoTune.cpp:220
double perf
performance measure (output of a Criterion)
Definition: AutoTune.h:88
double t_for_perf(double perf) const
get time required to obtain a given performance measure
Definition: AutoTune.cpp:173
idx_t nnn
nb of NNs that the query should request
Definition: AutoTune.h:30
bool quantizer_trains_alone
just pass over the trainset to quantizer
Definition: IndexIVF.h:51
virtual double evaluate(const float *D, const idx_t *I) const override
Definition: AutoTune.cpp:78
bool add(double perf, double t, const std::string &key, size_t cno=0)
add a performance measure. Return whether it is an optimal point
Definition: AutoTune.cpp:111
bool do_polysemous_training
reorder PQ centroids after training?
Definition: IndexIVFPQ.h:36
size_t batchsize
maximum number of queries to submit at a time.
Definition: AutoTune.h:146
virtual double evaluate(const float *D, const idx_t *I) const =0
idx_t nq
nb of queries this criterion is evaluated on
Definition: AutoTune.h:29
std::vector< OperatingPoint > optimal_pts
optimal operating points, sorted by perf
Definition: AutoTune.h:99
void set_groundtruth(int gt_nnn, const float *gt_D_in, const idx_t *gt_I_in)
Definition: AutoTune.cpp:37
ParameterRange & add_range(const char *name)
add a new parameter
Definition: AutoTune.cpp:321
idx_t gt_nnn
nb of GT NNs required to evaluate crterion
Definition: AutoTune.h:31
void all_to_gnuplot(const char *fname) const
output to a format easy to digest by gnuplot
Definition: AutoTune.cpp:187
bool own_fields
should the base index be deallocated?
Definition: IndexFlat.h:112
int d
vector dimension
Definition: Index.h:66
size_t code_size
byte per indexed vector
ClusteringParameters cp
to override default clustering params
Definition: IndexIVF.h:54
bool thread_over_batches
use multithreading over batches (useful to benchmark independent single-searches) ...
Definition: AutoTune.h:149
bool own_fields
whether object owns the quantizer
Definition: IndexIVF.h:52
std::vector< OperatingPoint > all_pts
all operating points
Definition: AutoTune.h:96
size_t ranklist_intersection_size(size_t k1, const long *v1, size_t k2, const long *v2_in)
Definition: utils.cpp:1393
bool verbose
verbosity level
Definition: Index.h:68
double getmillisecs()
ms elapsed since some arbitrary epoch
Definition: utils.cpp:72
std::vector< float > gt_D
Ground-truth distances (size nq * gt_nnn)
Definition: AutoTune.h:33
std::string combination_name(size_t cno) const
get string representation of the combination
Definition: AutoTune.cpp:276
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
void update_bounds(size_t cno, const OperatingPoint &op, double *upper_bound_perf, double *lower_bound_t) const
Definition: AutoTune.cpp:498
bool own_fields
! the sub-index
virtual void initialize(const Index *index)
initialize with reasonable parameters for the index
Definition: AutoTune.cpp:330
int verbose
verbosity during exploration
Definition: AutoTune.h:140
int merge_with(const OperatingPoints &other, const std::string &prefix="")
add operating points from other to this, with a prefix to the keys
Definition: AutoTune.cpp:158
virtual void set_index_parameter(Index *index, const std::string &name, double val) const
set one of the parameters
Definition: AutoTune.cpp:419
size_t n_combinations() const
nb of combinations, = product of values sizes
Definition: AutoTune.cpp:267
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:74
void set_index_parameters(Index *index, size_t cno) const
set a combination of parameters on an index
Definition: AutoTune.cpp:388
asymmetric product quantizer (default)
Definition: IndexPQ.h:78
void display() const
print a description on stdout
Definition: AutoTune.cpp:480
HE filter (using ht) + PQ combination.
Definition: IndexPQ.h:82
bool combination_ge(size_t c1, size_t c2) const
returns whether combinations c1 &gt;= c2 in the tuple sense
Definition: AutoTune.cpp:291
bool spherical
do we want normalized centroids?
Definition: Clustering.h:31
possible values of a parameter, sorted from least to most expensive/accurate
Definition: AutoTune.h:126
Index * index_factory(int d, const char *description_in, MetricType metric)
Definition: AutoTune.cpp:623
int n_experiments
nb of experiments during optimization (0 = try all combinations)
Definition: AutoTune.h:143
std::vector< idx_t > gt_I
Ground-truth indexes (size nq * gt_nnn)
Definition: AutoTune.h:34
double t
corresponding execution time (ms)
Definition: AutoTune.h:89
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:44
bool own_fields
! the sub-index
Definition: MetaIndexes.h:28