/** * Copyright (c) Facebook, Inc. and its affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ // -*- c++ -*- #ifndef FAISS_AUTO_TUNE_H #define FAISS_AUTO_TUNE_H #include #include #include #include #include namespace faiss { /** * Evaluation criterion. Returns a performance measure in [0,1], * higher is better. */ struct AutoTuneCriterion { typedef Index::idx_t idx_t; idx_t nq; ///< nb of queries this criterion is evaluated on idx_t nnn; ///< nb of NNs that the query should request idx_t gt_nnn; ///< nb of GT NNs required to evaluate crterion std::vector gt_D; ///< Ground-truth distances (size nq * gt_nnn) std::vector gt_I; ///< Ground-truth indexes (size nq * gt_nnn) AutoTuneCriterion (idx_t nq, idx_t nnn); /** Intitializes the gt_D and gt_I vectors. Must be called before evaluating * * @param gt_D_in size nq * gt_nnn * @param gt_I_in size nq * gt_nnn */ void set_groundtruth (int gt_nnn, const float *gt_D_in, const idx_t *gt_I_in); /** Evaluate the criterion. * * @param D size nq * nnn * @param I size nq * nnn * @return the criterion, between 0 and 1. Larger is better. */ virtual double evaluate (const float *D, const idx_t *I) const = 0; virtual ~AutoTuneCriterion () {} }; struct OneRecallAtRCriterion: AutoTuneCriterion { idx_t R; OneRecallAtRCriterion (idx_t nq, idx_t R); double evaluate(const float* D, const idx_t* I) const override; ~OneRecallAtRCriterion() override {} }; struct IntersectionCriterion: AutoTuneCriterion { idx_t R; IntersectionCriterion (idx_t nq, idx_t R); double evaluate(const float* D, const idx_t* I) const override; ~IntersectionCriterion() override {} }; /** * Maintains a list of experimental results. Each operating point is a * (perf, t, key) triplet, where higher perf and lower t is * better. The key field is an arbitrary identifier for the operating point */ struct OperatingPoint { double perf; ///< performance measure (output of a Criterion) double t; ///< corresponding execution time (ms) std::string key; ///< key that identifies this op pt int64_t cno; ///< integer identifer }; struct OperatingPoints { /// all operating points std::vector all_pts; /// optimal operating points, sorted by perf std::vector optimal_pts; // begins with a single operating point: t=0, perf=0 OperatingPoints (); /// add operating points from other to this, with a prefix to the keys int merge_with (const OperatingPoints &other, const std::string & prefix = ""); void clear (); /// add a performance measure. Return whether it is an optimal point bool add (double perf, double t, const std::string & key, size_t cno = 0); /// get time required to obtain a given performance measure double t_for_perf (double perf) const; /// easy-to-read output void display (bool only_optimal = true) const; /// output to a format easy to digest by gnuplot void all_to_gnuplot (const char *fname) const; void optimal_to_gnuplot (const char *fname) const; }; /// possible values of a parameter, sorted from least to most expensive/accurate struct ParameterRange { std::string name; std::vector values; }; /** Uses a-priori knowledge on the Faiss indexes to extract tunable parameters. */ struct ParameterSpace { /// all tunable parameters std::vector parameter_ranges; // exploration parameters /// verbosity during exploration int verbose; /// nb of experiments during optimization (0 = try all combinations) int n_experiments; /// maximum number of queries to submit at a time. size_t batchsize; /// use multithreading over batches (useful to benchmark /// independent single-searches) bool thread_over_batches; /// run tests several times until they reach at least this /// duration (to avoid jittering in MT mode) double min_test_duration; ParameterSpace (); /// nb of combinations, = product of values sizes size_t n_combinations () const; /// returns whether combinations c1 >= c2 in the tuple sense bool combination_ge (size_t c1, size_t c2) const; /// get string representation of the combination std::string combination_name (size_t cno) const; /// print a description on stdout void display () const; /// add a new parameter (or return it if it exists) ParameterRange &add_range(const char * name); /// initialize with reasonable parameters for the index virtual void initialize (const Index * index); /// set a combination of parameters on an index void set_index_parameters (Index *index, size_t cno) const; /// set a combination of parameters described by a string void set_index_parameters (Index *index, const char *param_string) const; /// set one of the parameters virtual void set_index_parameter ( Index * index, const std::string & name, double val) const; /** find an upper bound on the performance and a lower bound on t * for configuration cno given another operating point op */ void update_bounds (size_t cno, const OperatingPoint & op, double *upper_bound_perf, double *lower_bound_t) const; /** explore operating points * @param index index to run on * @param xq query vectors (size nq * index.d) * @param crit selection criterion * @param ops resulting operating points */ void explore (Index *index, size_t nq, const float *xq, const AutoTuneCriterion & crit, OperatingPoints * ops) const; virtual ~ParameterSpace () {} }; } // namespace faiss #endif