Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/hoss/faiss/PolysemousTraining.h
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 // -*- c++ -*-
9 
10 #ifndef FAISS_POLYSEMOUS_TRAINING_INCLUDED
11 #define FAISS_POLYSEMOUS_TRAINING_INCLUDED
12 
13 
14 #include "ProductQuantizer.h"
15 
16 
17 namespace faiss {
18 
19 
20 /// parameters used for the simulated annealing method
22 
23  // optimization parameters
24  double init_temperature; // init probaility of accepting a bad swap
25  double temperature_decay; // at each iteration the temp is multiplied by this
26  int n_iter; // nb of iterations
27  int n_redo; // nb of runs of the simulation
28  int seed; // random seed
29  int verbose;
30  bool only_bit_flips; // restrict permutation changes to bit flips
31  bool init_random; // intialize with a random permutation (not identity)
32 
33  // set reasonable defaults
35 
36 };
37 
38 
39 /// abstract class for the loss function
41 
42  int n;
43 
44  virtual double compute_cost (const int *perm) const = 0;
45 
46  // what would the cost update be if iw and jw were swapped?
47  // default implementation just computes both and computes the difference
48  virtual double cost_update (const int *perm, int iw, int jw) const;
49 
50  virtual ~PermutationObjective () {}
51 };
52 
53 
55 
56  double dis_weight_factor;
57 
58  static double sqr (double x) { return x * x; }
59 
60  // weihgting of distances: it is more important to reproduce small
61  // distances well
62  double dis_weight (double x) const;
63 
64  std::vector<double> source_dis; ///< "real" corrected distances (size n^2)
65  const double * target_dis; ///< wanted distances (size n^2)
66  std::vector<double> weights; ///< weights for each distance (size n^2)
67 
68  double get_source_dis (int i, int j) const;
69 
70  // cost = quadratic difference between actual distance and Hamming distance
71  double compute_cost(const int* perm) const override;
72 
73  // what would the cost update be if iw and jw were swapped?
74  // computed in O(n) instead of O(n^2) for the full re-computation
75  double cost_update(const int* perm, int iw, int jw) const override;
76 
78  int n,
79  const double *source_dis_in,
80  const double *target_dis_in,
81  double dis_weight_factor);
82 
83  static void compute_mean_stdev (const double *tab, size_t n2,
84  double *mean_out, double *stddev_out);
85 
86  void set_affine_target_dis (const double *source_dis_in);
87 
88  ~ReproduceDistancesObjective() override {}
89 };
90 
91 struct RandomGenerator;
92 
93 /// Simulated annealing optimization algorithm for permutations.
95 
97  int n; ///< size of the permutation
98  FILE *logfile; /// logs values of the cost function
99 
102  RandomGenerator *rnd;
103 
104  /// remember intial cost of optimization
105  double init_cost;
106 
107  // main entry point. Perform the optimization loop, starting from
108  // and modifying permutation in-place
109  double optimize (int *perm);
110 
111  // run the optimization and return the best result in best_perm
112  double run_optimization (int * best_perm);
113 
114  virtual ~SimulatedAnnealingOptimizer ();
115 };
116 
117 
118 
119 
120 /// optimizes the order of indices in a ProductQuantizer
122 
124  OT_None,
126  OT_Ranking_weighted_diff /// same as _2, but use rank of y+ - rank of y-
127  };
128  Optimization_type_t optimization_type;
129 
130  // use 1/4 of the training points for the optimization, with
131  // max. ntrain_permutation. If ntrain_permutation == 0: train on
132  // centroids
133  int ntrain_permutation;
134  double dis_weight_factor; // decay of exp that weights distance loss
135 
136  // filename pattern for the logging of iterations
137  std::string log_pattern;
138 
139  // sets default values
141 
142  /// reorder the centroids so that the Hamming distace becomes a
143  /// good approximation of the SDC distance (called by train)
145  size_t n, const float *x) const;
146 
147  /// called by optimize_pq_for_hamming
148  void optimize_ranking (ProductQuantizer &pq, size_t n, const float *x) const;
149  /// called by optimize_pq_for_hamming
151 
152 };
153 
154 
155 } // namespace faiss
156 
157 
158 #endif
random generator that can be used in multithreaded contexts
Definition: utils.h:47
same as _2, but use rank of y+ - rank of y-
SimulatedAnnealingOptimizer(PermutationObjective *obj, const SimulatedAnnealingParameters &p)
logs values of the cost function
int n
size of the permutation
const double * target_dis
wanted distances (size n^2)
double init_cost
remember intial cost of optimization
void optimize_ranking(ProductQuantizer &pq, size_t n, const float *x) const
called by optimize_pq_for_hamming
optimizes the order of indices in a ProductQuantizer
std::vector< double > weights
weights for each distance (size n^2)
parameters used for the simulated annealing method
abstract class for the loss function
std::vector< double > source_dis
&quot;real&quot; corrected distances (size n^2)
void optimize_reproduce_distances(ProductQuantizer &pq) const
called by optimize_pq_for_hamming
void optimize_pq_for_hamming(ProductQuantizer &pq, size_t n, const float *x) const
Simulated annealing optimization algorithm for permutations.