Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/hoss/faiss/VectorTransform.h
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 // -*- c++ -*-
9 
10 #ifndef FAISS_VECTOR_TRANSFORM_H
11 #define FAISS_VECTOR_TRANSFORM_H
12 
13 /** Defines a few objects that apply transformations to a set of
14  * vectors Often these are pre-processing steps.
15  */
16 
17 #include <vector>
18 
19 #include "Index.h"
20 
21 
22 namespace faiss {
23 
24 
25 /** Any transformation applied on a set of vectors */
27 
28  typedef Index::idx_t idx_t;
29 
30  int d_in; ///! input dimension
31  int d_out; ///! output dimension
32 
33  explicit VectorTransform (int d_in = 0, int d_out = 0):
34  d_in(d_in), d_out(d_out), is_trained(true)
35  {}
36 
37 
38  /// set if the VectorTransform does not require training, or if
39  /// training is done already
40  bool is_trained;
41 
42 
43  /** Perform training on a representative set of vectors. Does
44  * nothing by default.
45  *
46  * @param n nb of training vectors
47  * @param x training vecors, size n * d
48  */
49  virtual void train (idx_t n, const float *x);
50 
51  /** apply the random roation, return new allocated matrix
52  * @param x size n * d_in
53  * @return size n * d_out
54  */
55  float *apply (idx_t n, const float * x) const;
56 
57  /// same as apply, but result is pre-allocated
58  virtual void apply_noalloc (idx_t n, const float * x,
59  float *xt) const = 0;
60 
61  /// reverse transformation. May not be implemented or may return
62  /// approximate result
63  virtual void reverse_transform (idx_t n, const float * xt,
64  float *x) const;
65 
66  virtual ~VectorTransform () {}
67 
68 };
69 
70 
71 
72 /** Generic linear transformation, with bias term applied on output
73  * y = A * x + b
74  */
76 
77  bool have_bias; ///! whether to use the bias term
78 
79  /// check if matrix A is orthonormal (enables reverse_transform)
81 
82  /// Transformation matrix, size d_out * d_in
83  std::vector<float> A;
84 
85  /// bias vector, size d_out
86  std::vector<float> b;
87 
88  /// both d_in > d_out and d_out < d_in are supported
89  explicit LinearTransform (int d_in = 0, int d_out = 0,
90  bool have_bias = false);
91 
92  /// same as apply, but result is pre-allocated
93  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
94 
95  /// compute x = A^T * (x - b)
96  /// is reverse transform if A has orthonormal lines
97  void transform_transpose (idx_t n, const float * y,
98  float *x) const;
99 
100  /// works only if is_orthonormal
101  void reverse_transform (idx_t n, const float * xt,
102  float *x) const override;
103 
104  /// compute A^T * A to set the is_orthonormal flag
105  void set_is_orthonormal ();
106 
107  bool verbose;
108 
109  ~LinearTransform() override {}
110 };
111 
112 
113 
114 /// Randomly rotate a set of vectors
116 
117  /// both d_in > d_out and d_out < d_in are supported
118  RandomRotationMatrix (int d_in, int d_out):
119  LinearTransform(d_in, d_out, false) {}
120 
121  /// must be called before the transform is used
122  void init(int seed);
123 
124  // intializes with an arbitrary seed
125  void train(Index::idx_t n, const float* x) override;
126 
128 };
129 
130 
131 /** Applies a principal component analysis on a set of vectors,
132  * with optionally whitening and random rotation. */
134 
135  /** after transformation the components are multiplied by
136  * eigenvalues^eigen_power
137  *
138  * =0: no whitening
139  * =-0.5: full whitening
140  */
141  float eigen_power;
142 
143  /// random rotation after PCA
145 
146  /// ratio between # training vectors and dimension
148 
149  /// try to distribute output eigenvectors in this many bins
151 
152  /// Mean, size d_in
153  std::vector<float> mean;
154 
155  /// eigenvalues of covariance matrix (= squared singular values)
156  std::vector<float> eigenvalues;
157 
158  /// PCA matrix, size d_in * d_in
159  std::vector<float> PCAMat;
160 
161  // the final matrix is computed after random rotation and/or whitening
162  explicit PCAMatrix (int d_in = 0, int d_out = 0,
163  float eigen_power = 0, bool random_rotation = false);
164 
165  /// train on n vectors. If n < d_in then the eigenvector matrix
166  /// will be completed with 0s
167  void train(Index::idx_t n, const float* x) override;
168 
169  /// copy pre-trained PCA matrix
170  void copy_from (const PCAMatrix & other);
171 
172  /// called after mean, PCAMat and eigenvalues are computed
173  void prepare_Ab();
174 
175 };
176 
177 
178 struct ProductQuantizer;
179 
180 /** Applies a rotation to align the dimensions with a PQ to minimize
181  * the reconstruction error. Can be used before an IndexPQ or an
182  * IndexIVFPQ. The method is the non-parametric version described in:
183  *
184  * "Optimized Product Quantization for Approximate Nearest Neighbor Search"
185  * Tiezheng Ge, Kaiming He, Qifa Ke, Jian Sun, CVPR'13
186  *
187  */
189 
190  int M; ///< nb of subquantizers
191  int niter; ///< Number of outer training iterations
192  int niter_pq; ///< Number of training iterations for the PQ
193  int niter_pq_0; ///< same, for the first outer iteration
194 
195  /// if there are too many training points, resample
197  bool verbose;
198 
199  /// if non-NULL, use this product quantizer for training
200  /// should be constructed with (d_out, M, _)
202 
203  /// if d2 != -1, output vectors of this dimension
204  explicit OPQMatrix (int d = 0, int M = 1, int d2 = -1);
205 
206  void train(Index::idx_t n, const float* x) override;
207 };
208 
209 
210 /** remap dimensions for intput vectors, possibly inserting 0s
211  * strictly speaking this is also a linear transform but we don't want
212  * to compute it with matrix multiplies */
214 
215  /// map from output dimension to input, size d_out
216  /// -1 -> set output to 0
217  std::vector<int> map;
218 
219  RemapDimensionsTransform (int d_in, int d_out, const int *map);
220 
221  /// remap input to output, skipping or inserting dimensions as needed
222  /// if uniform: distribute dimensions uniformly
223  /// otherwise just take the d_out first ones.
224  RemapDimensionsTransform (int d_in, int d_out, bool uniform = true);
225 
226  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
227 
228  /// reverse transform correct only when the mapping is a permuation
229  void reverse_transform(idx_t n, const float* xt, float* x) const override;
230 
232 };
233 
234 
235 /** per-vector normalization */
237  float norm;
238 
239  explicit NormalizationTransform (int d, float norm = 2.0);
241 
242  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
243 
244  /// Identity transform since norm is not revertible
245  void reverse_transform(idx_t n, const float* xt, float* x) const override;
246 };
247 
248 /** Subtract the mean of each component from the vectors. */
250 
251  /// Mean, size d_in = d_out
252  std::vector<float> mean;
253 
254  explicit CenteringTransform (int d = 0);
255 
256  /// train on n vectors.
257  void train(Index::idx_t n, const float* x) override;
258 
259  /// subtract the mean
260  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
261 
262  /// add the mean
263  void reverse_transform (idx_t n, const float * xt,
264  float *x) const override;
265 
266 };
267 
268 
269 /** Index that applies a LinearTransform transform on vectors before
270  * handing them over to a sub-index */
272 
273  std::vector<VectorTransform *> chain; ///! chain of tranforms
274  Index * index; ///! the sub-index
275 
276  bool own_fields; ///! whether pointers are deleted in destructor
277 
278  explicit IndexPreTransform (Index *index);
279 
281 
282  /// ltrans is the last transform before the index
284 
285  void prepend_transform (VectorTransform * ltrans);
286 
287  void train(idx_t n, const float* x) override;
288 
289  void add(idx_t n, const float* x) override;
290 
291  void add_with_ids(idx_t n, const float* x, const long* xids) override;
292 
293  void reset() override;
294 
295  /** removes IDs from the index. Not supported by all indexes.
296  */
297  long remove_ids(const IDSelector& sel) override;
298 
299  void search(
300  idx_t n,
301  const float* x,
302  idx_t k,
303  float* distances,
304  idx_t* labels) const override;
305 
306 
307  /* range search, no attempt is done to change the radius */
308  void range_search (idx_t n, const float* x, float radius,
309  RangeSearchResult* result) const override;
310 
311 
312  void reconstruct (idx_t key, float * recons) const override;
313 
314  void reconstruct_n (idx_t i0, idx_t ni, float *recons)
315  const override;
316 
317  void search_and_reconstruct (idx_t n, const float *x, idx_t k,
318  float *distances, idx_t *labels,
319  float *recons) const override;
320 
321  /// apply the transforms in the chain. The returned float * may be
322  /// equal to x, otherwise it should be deallocated.
323  const float * apply_chain (idx_t n, const float *x) const;
324 
325  /// Reverse the transforms in the chain. May not be implemented for
326  /// all transforms in the chain or may return approximate results.
327  void reverse_chain (idx_t n, const float* xt, float* x) const;
328 
329  ~IndexPreTransform() override;
330 };
331 
332 
333 } // namespace faiss
334 
335 
336 #endif
void transform_transpose(idx_t n, const float *y, float *x) const
Index * index
! chain of tranforms
Randomly rotate a set of vectors.
int niter
Number of outer training iterations.
void train(Index::idx_t n, const float *x) override
train on n vectors.
RandomRotationMatrix(int d_in, int d_out)
both d_in &gt; d_out and d_out &lt; d_in are supported
void init(int seed)
must be called before the transform is used
void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const override
void reset() override
removes all elements from the database.
int niter_pq
Number of training iterations for the PQ.
std::vector< float > A
Transformation matrix, size d_out * d_in.
std::vector< float > mean
Mean, size d_in = d_out.
LinearTransform(int d_in=0, int d_out=0, bool have_bias=false)
both d_in &gt; d_out and d_out &lt; d_in are supported
VectorTransform(int d_in=0, int d_out=0)
! output dimension
void set_is_orthonormal()
compute A^T * A to set the is_orthonormal flag
ProductQuantizer * pq
void train(Index::idx_t n, const float *x) override
std::vector< float > mean
Mean, size d_in.
const float * apply_chain(idx_t n, const float *x) const
std::vector< float > PCAMat
PCA matrix, size d_in * d_in.
void train(idx_t n, const float *x) override
long idx_t
all indices are this type
Definition: Index.h:62
std::vector< float > b
bias vector, size d_out
void reverse_transform(idx_t n, const float *xt, float *x) const override
works only if is_orthonormal
void reverse_transform(idx_t n, const float *xt, float *x) const override
Identity transform since norm is not revertible.
void train(Index::idx_t n, const float *x) override
int balanced_bins
try to distribute output eigenvectors in this many bins
void train(Index::idx_t n, const float *x) override
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
void reverse_transform(idx_t n, const float *xt, float *x) const override
add the mean
void apply_noalloc(idx_t n, const float *x, float *xt) const override
same as apply, but result is pre-allocated
bool own_fields
! the sub-index
int niter_pq_0
same, for the first outer iteration
virtual void train(idx_t n, const float *x)
virtual void reverse_transform(idx_t n, const float *xt, float *x) const
void search_and_reconstruct(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels, float *recons) const override
void reverse_transform(idx_t n, const float *xt, float *x) const override
reverse transform correct only when the mapping is a permuation
size_t max_train_points
if there are too many training points, resample
void copy_from(const PCAMatrix &other)
copy pre-trained PCA matrix
int d_out
! input dimension
OPQMatrix(int d=0, int M=1, int d2=-1)
if d2 != -1, output vectors of this dimension
void prepare_Ab()
called after mean, PCAMat and eigenvalues are computed
void add(idx_t n, const float *x) override
void apply_noalloc(idx_t n, const float *x, float *xt) const override
same as apply, but result is pre-allocated
void reverse_chain(idx_t n, const float *xt, float *x) const
bool is_orthonormal
! whether to use the bias term
std::vector< float > eigenvalues
eigenvalues of covariance matrix (= squared singular values)
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void add_with_ids(idx_t n, const float *x, const long *xids) override
bool random_rotation
random rotation after PCA
size_t max_points_per_d
ratio between # training vectors and dimension
float * apply(idx_t n, const float *x) const
long remove_ids(const IDSelector &sel) override
virtual void apply_noalloc(idx_t n, const float *x, float *xt) const =0
same as apply, but result is pre-allocated
void reconstruct(idx_t key, float *recons) const override
void apply_noalloc(idx_t n, const float *x, float *xt) const override
subtract the mean
int M
nb of subquantizers
void apply_noalloc(idx_t n, const float *x, float *xt) const override
same as apply, but result is pre-allocated