Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/AutoTune.h
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 // -*- c++ -*-
12 
13 #ifndef FAISS_AUTO_TUNE_H
14 #define FAISS_AUTO_TUNE_H
15 
16 #include <vector>
17 
18 #include "Index.h"
19 
20 namespace faiss {
21 
22 
23 /**
24  * Evaluation criterion. Returns a performance measure in [0,1],
25  * higher is better.
26  */
28  typedef Index::idx_t idx_t;
29  idx_t nq; ///< nb of queries this criterion is evaluated on
30  idx_t nnn; ///< nb of NNs that the query should request
31  idx_t gt_nnn; ///< nb of GT NNs required to evaluate crterion
32 
33  std::vector<float> gt_D; ///< Ground-truth distances (size nq * gt_nnn)
34  std::vector<idx_t> gt_I; ///< Ground-truth indexes (size nq * gt_nnn)
35 
36  AutoTuneCriterion (idx_t nq, idx_t nnn);
37 
38  /** Intitializes the gt_D and gt_I vectors. Must be called before evaluating
39  *
40  * @param gt_D_in size nq * gt_nnn
41  * @param gt_I_in size nq * gt_nnn
42  */
43  void set_groundtruth (int gt_nnn, const float *gt_D_in,
44  const idx_t *gt_I_in);
45 
46  /** Evaluate the criterion.
47  *
48  * @param D size nq * nnn
49  * @param I size nq * nnn
50  * @return the criterion, between 0 and 1. Larger is better.
51  */
52  virtual double evaluate (const float *D, const idx_t *I) const = 0;
53 
54  virtual ~AutoTuneCriterion () {}
55 
56 };
57 
59 
60  idx_t R;
61 
62  OneRecallAtRCriterion (idx_t nq, idx_t R);
63 
64  virtual double evaluate (const float *D, const idx_t *I) const override;
65 
66  virtual ~OneRecallAtRCriterion () {}
67 };
68 
69 
71 
72  idx_t R;
73 
74  IntersectionCriterion (idx_t nq, idx_t R);
75 
76  virtual double evaluate (const float *D, const idx_t *I) const override;
77 
78  virtual ~IntersectionCriterion () {}
79 };
80 
81 /**
82  * Maintains a list of experimental results. Each operating point is a
83  * (perf, t, key) triplet, where higher perf and lower t is
84  * better. The key field is an arbitrary identifier for the operating point
85  */
86 
88  double perf; ///< performance measure (output of a Criterion)
89  double t; ///< corresponding execution time (ms)
90  std::string key; ///< key that identifies this op pt
91  long cno; ///< integer identifer
92 };
93 
95  /// all operating points
96  std::vector<OperatingPoint> all_pts;
97 
98  /// optimal operating points, sorted by perf
99  std::vector<OperatingPoint> optimal_pts;
100 
101  // begins with a single operating point: t=0, perf=0
102  OperatingPoints ();
103 
104  /// add operating points from other to this, with a prefix to the keys
105  int merge_with (const OperatingPoints &other,
106  const std::string & prefix = "");
107 
108  void clear ();
109 
110  /// add a performance measure. Return whether it is an optimal point
111  bool add (double perf, double t, const std::string & key, size_t cno = 0);
112 
113  /// get time required to obtain a given performance measure
114  double t_for_perf (double perf) const;
115 
116  /// easy-to-read output
117  void display (bool only_optimal = true) const;
118 
119  /// output to a format easy to digest by gnuplot
120  void all_to_gnuplot (const char *fname) const;
121  void optimal_to_gnuplot (const char *fname) const;
122 
123 };
124 
125 /// possible values of a parameter, sorted from least to most expensive/accurate
127  std::string name;
128  std::vector<double> values;
129 };
130 
131 /** Uses a-priori knowledge on the Faiss indexes to extract tunable parameters.
132  */
134  /// all tunable parameters
135  std::vector<ParameterRange> parameter_ranges;
136 
137  // exploration parameters
138 
139  /// verbosity during exploration
140  int verbose;
141 
142  /// nb of experiments during optimization (0 = try all combinations)
144 
145  /// maximum number of queries to submit at a time.
146  size_t batchsize;
147 
148  /// use multithreading over batches (useful to benchmark independent single-searches)
150 
151  ParameterSpace ();
152 
153  /// nb of combinations, = product of values sizes
154  size_t n_combinations () const;
155 
156  /// returns whether combinations c1 >= c2 in the tuple sense
157  bool combination_ge (size_t c1, size_t c2) const;
158 
159  /// get string representation of the combination
160  std::string combination_name (size_t cno) const;
161 
162  /// print a description on stdout
163  void display () const;
164 
165  /// add a new parameter
166  ParameterRange &add_range(const char * name);
167 
168  /// initialize with reasonable parameters for the index
169  virtual void initialize (const Index * index);
170 
171  /// set a combination of parameters on an index
172  void set_index_parameters (Index *index, size_t cno) const;
173 
174  /// set a combination of parameters described by a string
175  void set_index_parameters (Index *index, const char *param_string) const;
176 
177  /// set one of the parameters
178  virtual void set_index_parameter (
179  Index * index, const std::string & name, double val) const;
180 
181  /** find an upper bound on the performance and a lower bound on t
182  * for configuration cno given another operating point op */
183  void update_bounds (size_t cno, const OperatingPoint & op,
184  double *upper_bound_perf,
185  double *lower_bound_t) const;
186 
187  /** explore operating points
188  * @param index index to run on
189  * @param xq query vectors (size nq * index.d)
190  * @param crit selection criterion
191  * @param ops resutling operating points
192  */
193  void explore (Index *index,
194  size_t nq, const float *xq,
195  const AutoTuneCriterion & crit,
196  OperatingPoints * ops) const;
197 
198  virtual ~ParameterSpace () {}
199 };
200 
201 /** Build and index with the sequence of processing steps described in
202  * the string. */
203 Index *index_factory (int d, const char *description,
204  MetricType metric = METRIC_L2);
205 
206 
207 
208 } // namespace faiss
209 
210 
211 
212 #endif
void explore(Index *index, size_t nq, const float *xq, const AutoTuneCriterion &crit, OperatingPoints *ops) const
Definition: AutoTune.cpp:512
std::vector< ParameterRange > parameter_ranges
all tunable parameters
Definition: AutoTune.h:135
std::string key
key that identifies this op pt
Definition: AutoTune.h:90
long cno
integer identifer
Definition: AutoTune.h:91
void display(bool only_optimal=true) const
easy-to-read output
Definition: AutoTune.cpp:220
double perf
performance measure (output of a Criterion)
Definition: AutoTune.h:88
double t_for_perf(double perf) const
get time required to obtain a given performance measure
Definition: AutoTune.cpp:173
idx_t nnn
nb of NNs that the query should request
Definition: AutoTune.h:30
virtual double evaluate(const float *D, const idx_t *I) const override
Definition: AutoTune.cpp:78
bool add(double perf, double t, const std::string &key, size_t cno=0)
add a performance measure. Return whether it is an optimal point
Definition: AutoTune.cpp:111
size_t batchsize
maximum number of queries to submit at a time.
Definition: AutoTune.h:146
virtual double evaluate(const float *D, const idx_t *I) const =0
idx_t nq
nb of queries this criterion is evaluated on
Definition: AutoTune.h:29
std::vector< OperatingPoint > optimal_pts
optimal operating points, sorted by perf
Definition: AutoTune.h:99
void set_groundtruth(int gt_nnn, const float *gt_D_in, const idx_t *gt_I_in)
Definition: AutoTune.cpp:37
ParameterRange & add_range(const char *name)
add a new parameter
Definition: AutoTune.cpp:321
idx_t gt_nnn
nb of GT NNs required to evaluate crterion
Definition: AutoTune.h:31
void all_to_gnuplot(const char *fname) const
output to a format easy to digest by gnuplot
Definition: AutoTune.cpp:187
bool thread_over_batches
use multithreading over batches (useful to benchmark independent single-searches) ...
Definition: AutoTune.h:149
std::vector< OperatingPoint > all_pts
all operating points
Definition: AutoTune.h:96
long idx_t
all indices are this type
Definition: Index.h:64
std::vector< float > gt_D
Ground-truth distances (size nq * gt_nnn)
Definition: AutoTune.h:33
std::string combination_name(size_t cno) const
get string representation of the combination
Definition: AutoTune.cpp:276
void update_bounds(size_t cno, const OperatingPoint &op, double *upper_bound_perf, double *lower_bound_t) const
Definition: AutoTune.cpp:498
virtual void initialize(const Index *index)
initialize with reasonable parameters for the index
Definition: AutoTune.cpp:330
int verbose
verbosity during exploration
Definition: AutoTune.h:140
int merge_with(const OperatingPoints &other, const std::string &prefix="")
add operating points from other to this, with a prefix to the keys
Definition: AutoTune.cpp:158
virtual void set_index_parameter(Index *index, const std::string &name, double val) const
set one of the parameters
Definition: AutoTune.cpp:419
size_t n_combinations() const
nb of combinations, = product of values sizes
Definition: AutoTune.cpp:267
void set_index_parameters(Index *index, size_t cno) const
set a combination of parameters on an index
Definition: AutoTune.cpp:388
void display() const
print a description on stdout
Definition: AutoTune.cpp:480
bool combination_ge(size_t c1, size_t c2) const
returns whether combinations c1 &gt;= c2 in the tuple sense
Definition: AutoTune.cpp:291
possible values of a parameter, sorted from least to most expensive/accurate
Definition: AutoTune.h:126
Index * index_factory(int d, const char *description_in, MetricType metric)
Definition: AutoTune.cpp:623
int n_experiments
nb of experiments during optimization (0 = try all combinations)
Definition: AutoTune.h:143
std::vector< idx_t > gt_I
Ground-truth indexes (size nq * gt_nnn)
Definition: AutoTune.h:34
double t
corresponding execution time (ms)
Definition: AutoTune.h:89
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:44