Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/hoss/faiss/AutoTune.h
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 // -*- c++ -*-
9 
10 #ifndef FAISS_AUTO_TUNE_H
11 #define FAISS_AUTO_TUNE_H
12 
13 #include <vector>
14 #include <unordered_map>
15 
16 #include "Index.h"
17 #include "IndexBinary.h"
18 
19 namespace faiss {
20 
21 
22 /**
23  * Evaluation criterion. Returns a performance measure in [0,1],
24  * higher is better.
25  */
27  typedef Index::idx_t idx_t;
28  idx_t nq; ///< nb of queries this criterion is evaluated on
29  idx_t nnn; ///< nb of NNs that the query should request
30  idx_t gt_nnn; ///< nb of GT NNs required to evaluate crterion
31 
32  std::vector<float> gt_D; ///< Ground-truth distances (size nq * gt_nnn)
33  std::vector<idx_t> gt_I; ///< Ground-truth indexes (size nq * gt_nnn)
34 
35  AutoTuneCriterion (idx_t nq, idx_t nnn);
36 
37  /** Intitializes the gt_D and gt_I vectors. Must be called before evaluating
38  *
39  * @param gt_D_in size nq * gt_nnn
40  * @param gt_I_in size nq * gt_nnn
41  */
42  void set_groundtruth (int gt_nnn, const float *gt_D_in,
43  const idx_t *gt_I_in);
44 
45  /** Evaluate the criterion.
46  *
47  * @param D size nq * nnn
48  * @param I size nq * nnn
49  * @return the criterion, between 0 and 1. Larger is better.
50  */
51  virtual double evaluate (const float *D, const idx_t *I) const = 0;
52 
53  virtual ~AutoTuneCriterion () {}
54 
55 };
56 
58 
59  idx_t R;
60 
61  OneRecallAtRCriterion (idx_t nq, idx_t R);
62 
63  double evaluate(const float* D, const idx_t* I) const override;
64 
65  ~OneRecallAtRCriterion() override {}
66 };
67 
68 
70 
71  idx_t R;
72 
73  IntersectionCriterion (idx_t nq, idx_t R);
74 
75  double evaluate(const float* D, const idx_t* I) const override;
76 
77  ~IntersectionCriterion() override {}
78 };
79 
80 /**
81  * Maintains a list of experimental results. Each operating point is a
82  * (perf, t, key) triplet, where higher perf and lower t is
83  * better. The key field is an arbitrary identifier for the operating point
84  */
85 
87  double perf; ///< performance measure (output of a Criterion)
88  double t; ///< corresponding execution time (ms)
89  std::string key; ///< key that identifies this op pt
90  long cno; ///< integer identifer
91 };
92 
94  /// all operating points
95  std::vector<OperatingPoint> all_pts;
96 
97  /// optimal operating points, sorted by perf
98  std::vector<OperatingPoint> optimal_pts;
99 
100  // begins with a single operating point: t=0, perf=0
101  OperatingPoints ();
102 
103  /// add operating points from other to this, with a prefix to the keys
104  int merge_with (const OperatingPoints &other,
105  const std::string & prefix = "");
106 
107  void clear ();
108 
109  /// add a performance measure. Return whether it is an optimal point
110  bool add (double perf, double t, const std::string & key, size_t cno = 0);
111 
112  /// get time required to obtain a given performance measure
113  double t_for_perf (double perf) const;
114 
115  /// easy-to-read output
116  void display (bool only_optimal = true) const;
117 
118  /// output to a format easy to digest by gnuplot
119  void all_to_gnuplot (const char *fname) const;
120  void optimal_to_gnuplot (const char *fname) const;
121 
122 };
123 
124 /// possible values of a parameter, sorted from least to most expensive/accurate
126  std::string name;
127  std::vector<double> values;
128 };
129 
130 /** Uses a-priori knowledge on the Faiss indexes to extract tunable parameters.
131  */
133  /// all tunable parameters
134  std::vector<ParameterRange> parameter_ranges;
135 
136  // exploration parameters
137 
138  /// verbosity during exploration
139  int verbose;
140 
141  /// nb of experiments during optimization (0 = try all combinations)
143 
144  /// maximum number of queries to submit at a time.
145  size_t batchsize;
146 
147  /// use multithreading over batches (useful to benchmark
148  /// independent single-searches)
150 
151  /// run tests several times until they reach at least this
152  /// duration (to avoid jittering in MT mode)
154 
155  ParameterSpace ();
156 
157  /// nb of combinations, = product of values sizes
158  size_t n_combinations () const;
159 
160  /// returns whether combinations c1 >= c2 in the tuple sense
161  bool combination_ge (size_t c1, size_t c2) const;
162 
163  /// get string representation of the combination
164  std::string combination_name (size_t cno) const;
165 
166  /// print a description on stdout
167  void display () const;
168 
169  /// add a new parameter (or return it if it exists)
170  ParameterRange &add_range(const char * name);
171 
172  /// initialize with reasonable parameters for the index
173  virtual void initialize (const Index * index);
174 
175  /// set a combination of parameters on an index
176  void set_index_parameters (Index *index, size_t cno) const;
177 
178  /// set a combination of parameters described by a string
179  void set_index_parameters (Index *index, const char *param_string) const;
180 
181  /// set one of the parameters
182  virtual void set_index_parameter (
183  Index * index, const std::string & name, double val) const;
184 
185  /** find an upper bound on the performance and a lower bound on t
186  * for configuration cno given another operating point op */
187  void update_bounds (size_t cno, const OperatingPoint & op,
188  double *upper_bound_perf,
189  double *lower_bound_t) const;
190 
191  /** explore operating points
192  * @param index index to run on
193  * @param xq query vectors (size nq * index.d)
194  * @param crit selection criterion
195  * @param ops resutling operating points
196  */
197  void explore (Index *index,
198  size_t nq, const float *xq,
199  const AutoTuneCriterion & crit,
200  OperatingPoints * ops) const;
201 
202  virtual ~ParameterSpace () {}
203 };
204 
205 /** Build and index with the sequence of processing steps described in
206  * the string. */
207 Index *index_factory (int d, const char *description,
208  MetricType metric = METRIC_L2);
209 
210 IndexBinary *index_binary_factory (int d, const char *description);
211 
212 
213 /** Reports some statistics on a dataset and comments on them.
214  *
215  * It is a class rather than a function so that all stats can also be
216  * accessed from code */
217 
218 struct MatrixStats {
219  MatrixStats (size_t n, size_t d, const float *x);
220  std::string comments;
221 
222  // raw statistics
223  size_t n, d;
224  size_t n_collision, n_valid, n0;
225  double min_norm2, max_norm2;
226 
227  struct PerDimStats {
228  size_t n, n_nan, n_inf, n0;
229 
230  float min, max;
231  double sum, sum2;
232 
233  size_t n_valid;
234  double mean, stddev;
235 
236  PerDimStats();
237  void add (float x);
238  void compute_mean_std ();
239  };
240 
241  std::vector<PerDimStats> per_dim_stats;
242  struct Occurrence {
243  size_t first;
244  size_t count;
245  };
246  std::unordered_map<uint64_t, Occurrence> occurrences;
247 
248  char *buf;
249  size_t nbuf;
250  void do_comment (const char *fmt, ...);
251 
252 };
253 
254 
255 
256 } // namespace faiss
257 
258 
259 
260 #endif
void explore(Index *index, size_t nq, const float *xq, const AutoTuneCriterion &crit, OperatingPoints *ops) const
Definition: AutoTune.cpp:597
std::vector< ParameterRange > parameter_ranges
all tunable parameters
Definition: AutoTune.h:134
std::string key
key that identifies this op pt
Definition: AutoTune.h:89
long cno
integer identifer
Definition: AutoTune.h:90
double min_test_duration
Definition: AutoTune.h:153
double evaluate(const float *D, const idx_t *I) const override
Definition: AutoTune.cpp:63
void display(bool only_optimal=true) const
easy-to-read output
Definition: AutoTune.cpp:230
double perf
performance measure (output of a Criterion)
Definition: AutoTune.h:87
double t_for_perf(double perf) const
get time required to obtain a given performance measure
Definition: AutoTune.cpp:183
idx_t nnn
nb of NNs that the query should request
Definition: AutoTune.h:29
double evaluate(const float *D, const idx_t *I) const override
Definition: AutoTune.cpp:87
bool add(double perf, double t, const std::string &key, size_t cno=0)
add a performance measure. Return whether it is an optimal point
Definition: AutoTune.cpp:121
size_t batchsize
maximum number of queries to submit at a time.
Definition: AutoTune.h:145
virtual double evaluate(const float *D, const idx_t *I) const =0
idx_t nq
nb of queries this criterion is evaluated on
Definition: AutoTune.h:28
std::vector< OperatingPoint > optimal_pts
optimal operating points, sorted by perf
Definition: AutoTune.h:98
void set_groundtruth(int gt_nnn, const float *gt_D_in, const idx_t *gt_I_in)
Definition: AutoTune.cpp:45
ParameterRange & add_range(const char *name)
add a new parameter (or return it if it exists)
Definition: AutoTune.cpp:333
idx_t gt_nnn
nb of GT NNs required to evaluate crterion
Definition: AutoTune.h:30
void all_to_gnuplot(const char *fname) const
output to a format easy to digest by gnuplot
Definition: AutoTune.cpp:197
long idx_t
all indices are this type
Definition: Index.h:62
std::vector< OperatingPoint > all_pts
all operating points
Definition: AutoTune.h:95
std::vector< float > gt_D
Ground-truth distances (size nq * gt_nnn)
Definition: AutoTune.h:32
std::string combination_name(size_t cno) const
get string representation of the combination
Definition: AutoTune.cpp:288
void update_bounds(size_t cno, const OperatingPoint &op, double *upper_bound_perf, double *lower_bound_t) const
Definition: AutoTune.cpp:583
virtual void initialize(const Index *index)
initialize with reasonable parameters for the index
Definition: AutoTune.cpp:347
int verbose
verbosity during exploration
Definition: AutoTune.h:139
int merge_with(const OperatingPoints &other, const std::string &prefix="")
add operating points from other to this, with a prefix to the keys
Definition: AutoTune.cpp:168
virtual void set_index_parameter(Index *index, const std::string &name, double val) const
set one of the parameters
Definition: AutoTune.cpp:455
size_t n_combinations() const
nb of combinations, = product of values sizes
Definition: AutoTune.cpp:279
void set_index_parameters(Index *index, size_t cno) const
set a combination of parameters on an index
Definition: AutoTune.cpp:422
void display() const
print a description on stdout
Definition: AutoTune.cpp:565
bool combination_ge(size_t c1, size_t c2) const
returns whether combinations c1 &gt;= c2 in the tuple sense
Definition: AutoTune.cpp:303
possible values of a parameter, sorted from least to most expensive/accurate
Definition: AutoTune.h:125
Index * index_factory(int d, const char *description_in, MetricType metric)
Definition: AutoTune.cpp:741
int n_experiments
nb of experiments during optimization (0 = try all combinations)
Definition: AutoTune.h:142
std::vector< idx_t > gt_I
Ground-truth indexes (size nq * gt_nnn)
Definition: AutoTune.h:33
double t
corresponding execution time (ms)
Definition: AutoTune.h:88
MetricType
Some algorithms support both an inner product version and a L2 search version.
Definition: Index.h:44