Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/tmp/faiss/VectorTransform.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // -*- c++ -*-
10 
11 #ifndef FAISS_VECTOR_TRANSFORM_H
12 #define FAISS_VECTOR_TRANSFORM_H
13 
14 /** Defines a few objects that apply transformations to a set of
15  * vectors Often these are pre-processing steps.
16  */
17 
18 #include <vector>
19 
20 #include "Index.h"
21 
22 
23 namespace faiss {
24 
25 
26 /** Any transformation applied on a set of vectors */
28 
29  typedef Index::idx_t idx_t;
30 
31  int d_in; ///! input dimension
32  int d_out; ///! output dimension
33 
34  explicit VectorTransform (int d_in = 0, int d_out = 0):
35  d_in(d_in), d_out(d_out), is_trained(true)
36  {}
37 
38 
39  /// set if the VectorTransform does not require training, or if
40  /// training is done already
41  bool is_trained;
42 
43 
44  /** Perform training on a representative set of vectors. Does
45  * nothing by default.
46  *
47  * @param n nb of training vectors
48  * @param x training vecors, size n * d
49  */
50  virtual void train (idx_t n, const float *x);
51 
52  /** apply the random roation, return new allocated matrix
53  * @param x size n * d_in
54  * @return size n * d_out
55  */
56  float *apply (idx_t n, const float * x) const;
57 
58  /// same as apply, but result is pre-allocated
59  virtual void apply_noalloc (idx_t n, const float * x,
60  float *xt) const = 0;
61 
62  /// reverse transformation. May not be implemented or may return
63  /// approximate result
64  virtual void reverse_transform (idx_t n, const float * xt,
65  float *x) const;
66 
67  virtual ~VectorTransform () {}
68 
69 };
70 
71 
72 
73 /** Generic linear transformation, with bias term applied on output
74  * y = A * x + b
75  */
77 
78  bool have_bias; ///! whether to use the bias term
79 
80  /// check if matrix A is orthonormal (enables reverse_transform)
82 
83  /// Transformation matrix, size d_out * d_in
84  std::vector<float> A;
85 
86  /// bias vector, size d_out
87  std::vector<float> b;
88 
89  /// both d_in > d_out and d_out < d_in are supported
90  explicit LinearTransform (int d_in = 0, int d_out = 0,
91  bool have_bias = false);
92 
93  /// same as apply, but result is pre-allocated
94  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
95 
96  /// compute x = A^T * (x - b)
97  /// is reverse transform if A has orthonormal lines
98  void transform_transpose (idx_t n, const float * y,
99  float *x) const;
100 
101  /// works only if is_orthonormal
102  void reverse_transform (idx_t n, const float * xt,
103  float *x) const override;
104 
105  /// compute A^T * A to set the is_orthonormal flag
106  void set_is_orthonormal ();
107 
108  bool verbose;
109 
110  ~LinearTransform() override {}
111 };
112 
113 
114 
115 /// Randomly rotate a set of vectors
117 
118  /// both d_in > d_out and d_out < d_in are supported
119  RandomRotationMatrix (int d_in, int d_out):
120  LinearTransform(d_in, d_out, false) {}
121 
122  /// must be called before the transform is used
123  void init(int seed);
124 
125  // intializes with an arbitrary seed
126  void train(Index::idx_t n, const float* x) override;
127 
129 };
130 
131 
132 /** Applies a principal component analysis on a set of vectors,
133  * with optionally whitening and random rotation. */
135 
136  /** after transformation the components are multiplied by
137  * eigenvalues^eigen_power
138  *
139  * =0: no whitening
140  * =-0.5: full whitening
141  */
142  float eigen_power;
143 
144  /// random rotation after PCA
146 
147  /// ratio between # training vectors and dimension
149 
150  /// try to distribute output eigenvectors in this many bins
152 
153  /// Mean, size d_in
154  std::vector<float> mean;
155 
156  /// eigenvalues of covariance matrix (= squared singular values)
157  std::vector<float> eigenvalues;
158 
159  /// PCA matrix, size d_in * d_in
160  std::vector<float> PCAMat;
161 
162  // the final matrix is computed after random rotation and/or whitening
163  explicit PCAMatrix (int d_in = 0, int d_out = 0,
164  float eigen_power = 0, bool random_rotation = false);
165 
166  /// train on n vectors. If n < d_in then the eigenvector matrix
167  /// will be completed with 0s
168  void train(Index::idx_t n, const float* x) override;
169 
170  /// copy pre-trained PCA matrix
171  void copy_from (const PCAMatrix & other);
172 
173  /// called after mean, PCAMat and eigenvalues are computed
174  void prepare_Ab();
175 
176 };
177 
178 
179 struct ProductQuantizer;
180 
181 /** Applies a rotation to align the dimensions with a PQ to minimize
182  * the reconstruction error. Can be used before an IndexPQ or an
183  * IndexIVFPQ. The method is the non-parametric version described in:
184  *
185  * "Optimized Product Quantization for Approximate Nearest Neighbor Search"
186  * Tiezheng Ge, Kaiming He, Qifa Ke, Jian Sun, CVPR'13
187  *
188  */
190 
191  int M; ///< nb of subquantizers
192  int niter; ///< Number of outer training iterations
193  int niter_pq; ///< Number of training iterations for the PQ
194  int niter_pq_0; ///< same, for the first outer iteration
195 
196  /// if there are too many training points, resample
198  bool verbose;
199 
200  /// if non-NULL, use this product quantizer for training
201  /// should be constructed with (d_out, M, _)
203 
204  /// if d2 != -1, output vectors of this dimension
205  explicit OPQMatrix (int d = 0, int M = 1, int d2 = -1);
206 
207  void train(Index::idx_t n, const float* x) override;
208 };
209 
210 
211 /** remap dimensions for intput vectors, possibly inserting 0s
212  * strictly speaking this is also a linear transform but we don't want
213  * to compute it with matrix multiplies */
215 
216  /// map from output dimension to input, size d_out
217  /// -1 -> set output to 0
218  std::vector<int> map;
219 
220  RemapDimensionsTransform (int d_in, int d_out, const int *map);
221 
222  /// remap input to output, skipping or inserting dimensions as needed
223  /// if uniform: distribute dimensions uniformly
224  /// otherwise just take the d_out first ones.
225  RemapDimensionsTransform (int d_in, int d_out, bool uniform = true);
226 
227  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
228 
229  /// reverse transform correct only when the mapping is a permuation
230  void reverse_transform(idx_t n, const float* xt, float* x) const override;
231 
233 };
234 
235 
236 /** per-vector normalization */
238  float norm;
239 
240  explicit NormalizationTransform (int d, float norm = 2.0);
242 
243  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
244 
245  /// Identity transform since norm is not revertible
246  void reverse_transform(idx_t n, const float* xt, float* x) const override;
247 };
248 
249 
250 
251 /** Index that applies a LinearTransform transform on vectors before
252  * handing them over to a sub-index */
254 
255  std::vector<VectorTransform *> chain; ///! chain of tranforms
256  Index * index; ///! the sub-index
257 
258  bool own_fields; ///! whether pointers are deleted in destructor
259 
260  explicit IndexPreTransform (Index *index);
261 
263 
264  /// ltrans is the last transform before the index
266 
267  void prepend_transform (VectorTransform * ltrans);
268 
269  void train(idx_t n, const float* x) override;
270 
271  void add(idx_t n, const float* x) override;
272 
273  void add_with_ids(idx_t n, const float* x, const long* xids) override;
274 
275  void reset() override;
276 
277  /** removes IDs from the index. Not supported by all indexes.
278  */
279  long remove_ids(const IDSelector& sel) override;
280 
281  void search(
282  idx_t n,
283  const float* x,
284  idx_t k,
285  float* distances,
286  idx_t* labels) const override;
287 
288  void reconstruct (idx_t key, float * recons) const override;
289 
290  void reconstruct_n (idx_t i0, idx_t ni, float *recons)
291  const override;
292 
293  void search_and_reconstruct (idx_t n, const float *x, idx_t k,
294  float *distances, idx_t *labels,
295  float *recons) const override;
296 
297  /// apply the transforms in the chain. The returned float * may be
298  /// equal to x, otherwise it should be deallocated.
299  const float * apply_chain (idx_t n, const float *x) const;
300 
301  /// Reverse the transforms in the chain. May not be implemented for
302  /// all transforms in the chain or may return approximate results.
303  void reverse_chain (idx_t n, const float* xt, float* x) const;
304 
305  ~IndexPreTransform() override;
306 };
307 
308 
309 } // namespace faiss
310 
311 
312 #endif
void transform_transpose(idx_t n, const float *y, float *x) const
Index * index
! chain of tranforms
Randomly rotate a set of vectors.
int niter
Number of outer training iterations.
RandomRotationMatrix(int d_in, int d_out)
both d_in &gt; d_out and d_out &lt; d_in are supported
void init(int seed)
must be called before the transform is used
void reset() override
removes all elements from the database.
int niter_pq
Number of training iterations for the PQ.
std::vector< float > A
Transformation matrix, size d_out * d_in.
LinearTransform(int d_in=0, int d_out=0, bool have_bias=false)
both d_in &gt; d_out and d_out &lt; d_in are supported
VectorTransform(int d_in=0, int d_out=0)
! output dimension
void set_is_orthonormal()
compute A^T * A to set the is_orthonormal flag
ProductQuantizer * pq
void train(Index::idx_t n, const float *x) override
std::vector< float > mean
Mean, size d_in.
const float * apply_chain(idx_t n, const float *x) const
std::vector< float > PCAMat
PCA matrix, size d_in * d_in.
void train(idx_t n, const float *x) override
std::vector< float > b
bias vector, size d_out
void reverse_transform(idx_t n, const float *xt, float *x) const override
works only if is_orthonormal
void reverse_transform(idx_t n, const float *xt, float *x) const override
Identity transform since norm is not revertible.
void train(Index::idx_t n, const float *x) override
int balanced_bins
try to distribute output eigenvectors in this many bins
long idx_t
all indices are this type
Definition: Index.h:64
void train(Index::idx_t n, const float *x) override
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
void apply_noalloc(idx_t n, const float *x, float *xt) const override
same as apply, but result is pre-allocated
bool own_fields
! the sub-index
int niter_pq_0
same, for the first outer iteration
virtual void train(idx_t n, const float *x)
virtual void reverse_transform(idx_t n, const float *xt, float *x) const
void search_and_reconstruct(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels, float *recons) const override
void reverse_transform(idx_t n, const float *xt, float *x) const override
reverse transform correct only when the mapping is a permuation
size_t max_train_points
if there are too many training points, resample
void copy_from(const PCAMatrix &other)
copy pre-trained PCA matrix
int d_out
! input dimension
OPQMatrix(int d=0, int M=1, int d2=-1)
if d2 != -1, output vectors of this dimension
void prepare_Ab()
called after mean, PCAMat and eigenvalues are computed
void add(idx_t n, const float *x) override
void apply_noalloc(idx_t n, const float *x, float *xt) const override
same as apply, but result is pre-allocated
void reverse_chain(idx_t n, const float *xt, float *x) const
bool is_orthonormal
! whether to use the bias term
std::vector< float > eigenvalues
eigenvalues of covariance matrix (= squared singular values)
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void add_with_ids(idx_t n, const float *x, const long *xids) override
bool random_rotation
random rotation after PCA
size_t max_points_per_d
ratio between # training vectors and dimension
float * apply(idx_t n, const float *x) const
long remove_ids(const IDSelector &sel) override
virtual void apply_noalloc(idx_t n, const float *x, float *xt) const =0
same as apply, but result is pre-allocated
void reconstruct(idx_t key, float *recons) const override
int M
nb of subquantizers
void apply_noalloc(idx_t n, const float *x, float *xt) const override
same as apply, but result is pre-allocated