Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/AutoTune.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 // -*- c++ -*-
11 
12 #ifndef FAISS_AUTO_TUNE_H
13 #define FAISS_AUTO_TUNE_H
14 
15 #include <vector>
16 
17 #include "Index.h"
18 
19 namespace faiss {
20 
21 
22 /**
23  * Evaluation criterion. Returns a performance measure in [0,1],
24  * higher is better.
25  */
27  typedef Index::idx_t idx_t;
28  idx_t nq; ///< nb of queries this criterion is evaluated on
29  idx_t nnn; ///< nb of NNs that the query should request
30  idx_t gt_nnn; ///< nb of GT NNs required to evaluate crterion
31 
32  std::vector<float> gt_D; ///< Ground-truth distances (size nq * gt_nnn)
33  std::vector<idx_t> gt_I; ///< Ground-truth indexes (size nq * gt_nnn)
34 
35  AutoTuneCriterion (idx_t nq, idx_t nnn);
36 
37  /** Intitializes the gt_D and gt_I vectors. Must be called before evaluating
38  *
39  * @param gt_D_in size nq * gt_nnn
40  * @param gt_I_in size nq * gt_nnn
41  */
42  void set_groundtruth (int gt_nnn, const float *gt_D_in,
43  const idx_t *gt_I_in);
44 
45  /** Evaluate the criterion.
46  *
47  * @param D size nq * nnn
48  * @param I size nq * nnn
49  * @return the criterion, between 0 and 1. Larger is better.
50  */
51  virtual double evaluate (const float *D, const idx_t *I) const = 0;
52 
53  virtual ~AutoTuneCriterion () {}
54 
55 };
56 
58 
59  idx_t R;
60 
61  OneRecallAtRCriterion (idx_t nq, idx_t R);
62 
63  double evaluate(const float* D, const idx_t* I) const override;
64 
65  ~OneRecallAtRCriterion() override {}
66 };
67 
68 
70 
71  idx_t R;
72 
73  IntersectionCriterion (idx_t nq, idx_t R);
74 
75  double evaluate(const float* D, const idx_t* I) const override;
76 
77  ~IntersectionCriterion() override {}
78 };
79 
80 /**
81  * Maintains a list of experimental results. Each operating point is a
82  * (perf, t, key) triplet, where higher perf and lower t is
83  * better. The key field is an arbitrary identifier for the operating point
84  */
85 
87  double perf; ///< performance measure (output of a Criterion)
88  double t; ///< corresponding execution time (ms)
89  std::string key; ///< key that identifies this op pt
90  long cno; ///< integer identifer
91 };
92 
94  /// all operating points
95  std::vector<OperatingPoint> all_pts;
96 
97  /// optimal operating points, sorted by perf
98  std::vector<OperatingPoint> optimal_pts;
99 
100  // begins with a single operating point: t=0, perf=0
101  OperatingPoints ();
102 
103  /// add operating points from other to this, with a prefix to the keys
104  int merge_with (const OperatingPoints &other,
105  const std::string & prefix = "");
106 
107  void clear ();
108 
109  /// add a performance measure. Return whether it is an optimal point
110  bool add (double perf, double t, const std::string & key, size_t cno = 0);
111 
112  /// get time required to obtain a given performance measure
113  double t_for_perf (double perf) const;
114 
115  /// easy-to-read output
116  void display (bool only_optimal = true) const;
117 
118  /// output to a format easy to digest by gnuplot
119  void all_to_gnuplot (const char *fname) const;
120  void optimal_to_gnuplot (const char *fname) const;
121 
122 };
123 
124 /// possible values of a parameter, sorted from least to most expensive/accurate
126  std::string name;
127  std::vector<double> values;
128 };
129 
130 /** Uses a-priori knowledge on the Faiss indexes to extract tunable parameters.
131  */
133  /// all tunable parameters
134  std::vector<ParameterRange> parameter_ranges;
135 
136  // exploration parameters
137 
138  /// verbosity during exploration
139  int verbose;
140 
141  /// nb of experiments during optimization (0 = try all combinations)
143 
144  /// maximum number of queries to submit at a time.
145  size_t batchsize;
146 
147  /// use multithreading over batches (useful to benchmark
148  /// independent single-searches)
150 
151  ParameterSpace ();
152 
153  /// nb of combinations, = product of values sizes
154  size_t n_combinations () const;
155 
156  /// returns whether combinations c1 >= c2 in the tuple sense
157  bool combination_ge (size_t c1, size_t c2) const;
158 
159  /// get string representation of the combination
160  std::string combination_name (size_t cno) const;
161 
162  /// print a description on stdout
163  void display () const;
164 
165  /// add a new parameter
166  ParameterRange &add_range(const char * name);
167 
168  /// initialize with reasonable parameters for the index
169  virtual void initialize (const Index * index);
170 
171  /// set a combination of parameters on an index
172  void set_index_parameters (Index *index, size_t cno) const;
173 
174  /// set a combination of parameters described by a string
175  void set_index_parameters (Index *index, const char *param_string) const;
176 
177  /// set one of the parameters
178  virtual void set_index_parameter (
179  Index * index, const std::string & name, double val) const;
180 
181  /** find an upper bound on the performance and a lower bound on t
182  * for configuration cno given another operating point op */
183  void update_bounds (size_t cno, const OperatingPoint & op,
184  double *upper_bound_perf,
185  double *lower_bound_t) const;
186 
187  /** explore operating points
188  * @param index index to run on
189  * @param xq query vectors (size nq * index.d)
190  * @param crit selection criterion
191  * @param ops resutling operating points
192  */
193  void explore (Index *index,
194  size_t nq, const float *xq,
195  const AutoTuneCriterion & crit,
196  OperatingPoints * ops) const;
197 
198  virtual ~ParameterSpace () {}
199 };
200 
201 /** Build and index with the sequence of processing steps described in
202  * the string. */
203 Index *index_factory (int d, const char *description,
204  MetricType metric = METRIC_L2);
205 
206 
207 
208 } // namespace faiss
209 
210 
211 
212 #endif
void explore(Index *index, size_t nq, const float *xq, const AutoTuneCriterion &crit, OperatingPoints *ops) const
Definition: AutoTune.cpp:515
std::vector< ParameterRange > parameter_ranges
all tunable parameters
Definition: AutoTune.h:134
std::string key
key that identifies this op pt
Definition: AutoTune.h:89
long cno
integer identifer
Definition: AutoTune.h:90
void display(bool only_optimal=true) const
easy-to-read output
Definition: AutoTune.cpp:221
double perf
performance measure (output of a Criterion)
Definition: AutoTune.h:87
double t_for_perf(double perf) const
get time required to obtain a given performance measure
Definition: AutoTune.cpp:174
idx_t nnn
nb of NNs that the query should request
Definition: AutoTune.h:29
bool add(double perf, double t, const std::string &key, size_t cno=0)
add a performance measure. Return whether it is an optimal point
Definition: AutoTune.cpp:112
size_t batchsize
maximum number of queries to submit at a time.
Definition: AutoTune.h:145
virtual double evaluate(const float *D, const idx_t *I) const =0
idx_t nq
nb of queries this criterion is evaluated on
Definition: AutoTune.h:28
std::vector< OperatingPoint > optimal_pts
optimal operating points, sorted by perf
Definition: AutoTune.h:98
void set_groundtruth(int gt_nnn, const float *gt_D_in, const idx_t *gt_I_in)
Definition: AutoTune.cpp:36
ParameterRange & add_range(const char *name)
add a new parameter
Definition: AutoTune.cpp:322
idx_t gt_nnn
nb of GT NNs required to evaluate crterion
Definition: AutoTune.h:30
void all_to_gnuplot(const char *fname) const
output to a format easy to digest by gnuplot
Definition: AutoTune.cpp:188
std::vector< OperatingPoint > all_pts
all operating points
Definition: AutoTune.h:95
long idx_t
all indices are this type
Definition: Index.h:62
std::vector< float > gt_D
Ground-truth distances (size nq * gt_nnn)
Definition: AutoTune.h:32
std::string combination_name(size_t cno) const
get string representation of the combination
Definition: AutoTune.cpp:277
void update_bounds(size_t cno, const OperatingPoint &op, double *upper_bound_perf, double *lower_bound_t) const
Definition: AutoTune.cpp:501
virtual void initialize(const Index *index)
initialize with reasonable parameters for the index
Definition: AutoTune.cpp:331
int verbose
verbosity during exploration
Definition: AutoTune.h:139
int merge_with(const OperatingPoints &other, const std::string &prefix="")
add operating points from other to this, with a prefix to the keys
Definition: AutoTune.cpp:159
virtual void set_index_parameter(Index *index, const std::string &name, double val) const
set one of the parameters
Definition: AutoTune.cpp:422
size_t n_combinations() const
nb of combinations, = product of values sizes
Definition: AutoTune.cpp:268
void set_index_parameters(Index *index, size_t cno) const
set a combination of parameters on an index
Definition: AutoTune.cpp:389
void display() const
print a description on stdout
Definition: AutoTune.cpp:483
bool combination_ge(size_t c1, size_t c2) const
returns whether combinations c1 &gt;= c2 in the tuple sense
Definition: AutoTune.cpp:292
possible values of a parameter, sorted from least to most expensive/accurate
Definition: AutoTune.h:125
Index * index_factory(int d, const char *description_in, MetricType metric)
Definition: AutoTune.cpp:639
int n_experiments
nb of experiments during optimization (0 = try all combinations)
Definition: AutoTune.h:142
std::vector< idx_t > gt_I
Ground-truth indexes (size nq * gt_nnn)
Definition: AutoTune.h:33
double t
corresponding execution time (ms)
Definition: AutoTune.h:88
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:43