Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/tmp/faiss/PolysemousTraining.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // -*- c++ -*-
10 
11 #ifndef FAISS_POLYSEMOUS_TRAINING_INCLUDED
12 #define FAISS_POLYSEMOUS_TRAINING_INCLUDED
13 
14 
15 #include "ProductQuantizer.h"
16 
17 
18 namespace faiss {
19 
20 
21 /// parameters used for the simulated annealing method
23 
24  // optimization parameters
25  double init_temperature; // init probaility of accepting a bad swap
26  double temperature_decay; // at each iteration the temp is multiplied by this
27  int n_iter; // nb of iterations
28  int n_redo; // nb of runs of the simulation
29  int seed; // random seed
30  int verbose;
31  bool only_bit_flips; // restrict permutation changes to bit flips
32  bool init_random; // intialize with a random permutation (not identity)
33 
34  // set reasonable defaults
36 
37 };
38 
39 
40 /// abstract class for the loss function
42 
43  int n;
44 
45  virtual double compute_cost (const int *perm) const = 0;
46 
47  // what would the cost update be if iw and jw were swapped?
48  // default implementation just computes both and computes the difference
49  virtual double cost_update (const int *perm, int iw, int jw) const;
50 
51  virtual ~PermutationObjective () {}
52 };
53 
54 
56 
57  double dis_weight_factor;
58 
59  static double sqr (double x) { return x * x; }
60 
61  // weihgting of distances: it is more important to reproduce small
62  // distances well
63  double dis_weight (double x) const;
64 
65  std::vector<double> source_dis; ///< "real" corrected distances (size n^2)
66  const double * target_dis; ///< wanted distances (size n^2)
67  std::vector<double> weights; ///< weights for each distance (size n^2)
68 
69  double get_source_dis (int i, int j) const;
70 
71  // cost = quadratic difference between actual distance and Hamming distance
72  double compute_cost(const int* perm) const override;
73 
74  // what would the cost update be if iw and jw were swapped?
75  // computed in O(n) instead of O(n^2) for the full re-computation
76  double cost_update(const int* perm, int iw, int jw) const override;
77 
79  int n,
80  const double *source_dis_in,
81  const double *target_dis_in,
82  double dis_weight_factor);
83 
84  static void compute_mean_stdev (const double *tab, size_t n2,
85  double *mean_out, double *stddev_out);
86 
87  void set_affine_target_dis (const double *source_dis_in);
88 
89  ~ReproduceDistancesObjective() override {}
90 };
91 
92 struct RandomGenerator;
93 
94 /// Simulated annealing optimization algorithm for permutations.
96 
98  int n; ///< size of the permutation
99  FILE *logfile; /// logs values of the cost function
100 
103  RandomGenerator *rnd;
104 
105  /// remember intial cost of optimization
106  double init_cost;
107 
108  // main entry point. Perform the optimization loop, starting from
109  // and modifying permutation in-place
110  double optimize (int *perm);
111 
112  // run the optimization and return the best result in best_perm
113  double run_optimization (int * best_perm);
114 
115  virtual ~SimulatedAnnealingOptimizer ();
116 };
117 
118 
119 
120 
121 /// optimizes the order of indices in a ProductQuantizer
123 
125  OT_None,
127  OT_Ranking_weighted_diff /// same as _2, but use rank of y+ - rank of y-
128  };
129  Optimization_type_t optimization_type;
130 
131  // use 1/4 of the training points for the optimization, with
132  // max. ntrain_permutation. If ntrain_permutation == 0: train on
133  // centroids
134  int ntrain_permutation;
135  double dis_weight_factor; // decay of exp that weights distance loss
136 
137  // filename pattern for the logging of iterations
138  std::string log_pattern;
139 
140  // sets default values
142 
143  /// reorder the centroids so that the Hamming distace becomes a
144  /// good approximation of the SDC distance (called by train)
146  size_t n, const float *x) const;
147 
148  /// called by optimize_pq_for_hamming
149  void optimize_ranking (ProductQuantizer &pq, size_t n, const float *x) const;
150  /// called by optimize_pq_for_hamming
152 
153 };
154 
155 
156 } // namespace faiss
157 
158 
159 #endif
random generator that can be used in multithreaded contexts
Definition: utils.h:48
same as _2, but use rank of y+ - rank of y-
SimulatedAnnealingOptimizer(PermutationObjective *obj, const SimulatedAnnealingParameters &p)
logs values of the cost function
int n
size of the permutation
const double * target_dis
wanted distances (size n^2)
double init_cost
remember intial cost of optimization
void optimize_ranking(ProductQuantizer &pq, size_t n, const float *x) const
called by optimize_pq_for_hamming
optimizes the order of indices in a ProductQuantizer
std::vector< double > weights
weights for each distance (size n^2)
parameters used for the simulated annealing method
abstract class for the loss function
std::vector< double > source_dis
&quot;real&quot; corrected distances (size n^2)
void optimize_reproduce_distances(ProductQuantizer &pq) const
called by optimize_pq_for_hamming
void optimize_pq_for_hamming(ProductQuantizer &pq, size_t n, const float *x) const
Simulated annealing optimization algorithm for permutations.