Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/PolysemousTraining.h
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 // -*- c++ -*-
12 
13 #ifndef FAISS_POLYSEMOUS_TRAINING_INCLUDED
14 #define FAISS_POLYSEMOUS_TRAINING_INCLUDED
15 
16 
17 #include "ProductQuantizer.h"
18 
19 
20 namespace faiss {
21 
22 
23 
24 
25 /// parameters used for the simulated annealing method
27 
28  // optimization parameters
29  double init_temperature; // init probaility of accepting a bad swap
30  double temperature_decay; // at each iteration the temp is multiplied by this
31  int n_iter; // nb of iterations
32  int n_redo; // nb of runs of the simulation
33  int seed; // random seed
34  int verbose;
35  bool only_bit_flips; // restrict permutation changes to bit flips
36  bool init_random; // intialize with a random permutation (not identity)
37 
38  // set reasonable defaults
40 
41 };
42 
43 
44 /// abstract class for the loss function
46 
47  int n;
48 
49  virtual double compute_cost (const int *perm) const = 0;
50 
51  // what would the cost update be if iw and jw were swapped?
52  // default implementation just computes both and computes the difference
53  virtual double cost_update (const int *perm, int iw, int jw) const;
54 
55  virtual ~PermutationObjective () {}
56 };
57 
58 
60 
61  double dis_weight_factor;
62 
63  static double sqr (double x) { return x * x; }
64 
65  // weihgting of distances: it is more important to reproduce small
66  // distances well
67  double dis_weight (double x) const;
68 
69  std::vector<double> source_dis; ///< "real" corrected distances (size n^2)
70  const double * target_dis; ///< wanted distances (size n^2)
71  std::vector<double> weights; ///< weights for each distance (size n^2)
72 
73  double get_source_dis (int i, int j) const;
74 
75  // cost = quadratic difference between actual distance and Hamming distance
76  virtual double compute_cost (const int *perm) const override;
77 
78  // what would the cost update be if iw and jw were swapped?
79  // computed in O(n) instead of O(n^2) for the full re-computation
80  virtual double cost_update (const int *perm, int iw, int jw) const override;
81 
83  int n,
84  const double *source_dis_in,
85  const double *target_dis_in,
86  double dis_weight_factor);
87 
88  static void compute_mean_stdev (const double *tab, size_t n2,
89  double *mean_out, double *stddev_out);
90 
91  void set_affine_target_dis (const double *source_dis_in);
92 
93  virtual ~ReproduceDistancesObjective () {}
94 
95 };
96 
97 struct RandomGenerator;
98 
99 /// Simulated annealing optimization algorithm for permutations.
101 
103  int n; ///< size of the permutation
104  FILE *logfile; /// logs values of the cost function
105 
108  RandomGenerator *rnd;
109 
110  /// remember intial cost of optimization
111  double init_cost;
112 
113  // main entry point. Perform the optimization loop, starting from
114  // and modifying permutation in-place
115  double optimize (int *perm);
116 
117  // run the optimization and return the best result in best_perm
118  double run_optimization (int * best_perm);
119 
120  virtual ~SimulatedAnnealingOptimizer ();
121 };
122 
123 
124 
125 
126 /// optimizes the order of indices in a ProductQuantizer
128 
130  OT_None,
132  OT_Ranking_weighted_diff /// same as _2, but use rank of y+ - rank of y-
133  };
134  Optimization_type_t optimization_type;
135 
136  // use 1/4 of the training points for the optimization, with
137  // max. ntrain_permutation. If ntrain_permutation == 0: train on
138  // centroids
139  int ntrain_permutation;
140  double dis_weight_factor; // decay of exp that weights distance loss
141 
142  // filename pattern for the logging of iterations
143  std::string log_pattern;
144 
145  // sets default values
147 
148  /// reorder the centroids so that the Hamming distace becomes a
149  /// good approximation of the SDC distance (called by train)
151  size_t n, const float *x) const;
152 
153  /// called by optimize_pq_for_hamming
154  void optimize_ranking (ProductQuantizer &pq, size_t n, const float *x) const;
155  /// called by optimize_pq_for_hamming
157 
158 };
159 
160 
161 
162 } // namespace faiss
163 
164 
165 #endif
random generator that can be used in multithreaded contexts
Definition: utils.h:49
same as _2, but use rank of y+ - rank of y-
SimulatedAnnealingOptimizer(PermutationObjective *obj, const SimulatedAnnealingParameters &p)
logs values of the cost function
int n
size of the permutation
const double * target_dis
wanted distances (size n^2)
double init_cost
remember intial cost of optimization
void optimize_ranking(ProductQuantizer &pq, size_t n, const float *x) const
called by optimize_pq_for_hamming
optimizes the order of indices in a ProductQuantizer
std::vector< double > weights
weights for each distance (size n^2)
parameters used for the simulated annealing method
abstract class for the loss function
std::vector< double > source_dis
&quot;real&quot; corrected distances (size n^2)
void optimize_reproduce_distances(ProductQuantizer &pq) const
called by optimize_pq_for_hamming
void optimize_pq_for_hamming(ProductQuantizer &pq, size_t n, const float *x) const
Simulated annealing optimization algorithm for permutations.