Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/VectorTransform.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 // -*- c++ -*-
11 
12 #ifndef FAISS_VECTOR_TRANSFORM_H
13 #define FAISS_VECTOR_TRANSFORM_H
14 
15 /** Defines a few objects that apply transformations to a set of
16  * vectors Often these are pre-processing steps.
17  */
18 
19 #include <vector>
20 
21 #include "Index.h"
22 
23 
24 namespace faiss {
25 
26 
27 /** Any transformation applied on a set of vectors */
29 
30  typedef Index::idx_t idx_t;
31 
32  int d_in; ///! input dimension
33  int d_out; ///! output dimension
34 
35  explicit VectorTransform (int d_in = 0, int d_out = 0):
36  d_in(d_in), d_out(d_out), is_trained(true)
37  {}
38 
39 
40  /// set if the LinearTransform does not require training, or if
41  /// training is done already
42  bool is_trained;
43 
44 
45  /** Perform training on a representative set of vectors. Does
46  * nothing by default.
47  *
48  * @param n nb of training vectors
49  * @param x training vecors, size n * d
50  */
51  virtual void train (idx_t n, const float *x);
52 
53  /** apply the random roation, return new allocated matrix
54  * @param x size n * d_in
55  * @return size n * d_out
56  */
57  float *apply (idx_t n, const float * x) const;
58 
59  /// same as apply, but result is pre-allocated
60  virtual void apply_noalloc (idx_t n, const float * x,
61  float *xt) const = 0;
62 
63  /// reverse transformation. May not be implemented or may return
64  /// approximate result
65  virtual void reverse_transform (idx_t n, const float * xt,
66  float *x) const;
67 
68  virtual ~VectorTransform () {}
69 
70 };
71 
72 
73 
74 /** Generic linear transformation, with bias term applied on output
75  * y = A * x + b
76  */
78 
79  bool have_bias; ///! whether to use the bias term
80 
81  /// Transformation matrix, size d_out * d_in
82  std::vector<float> A;
83 
84  /// bias vector, size d_out
85  std::vector<float> b;
86 
87  /// both d_in > d_out and d_out < d_in are supported
88  explicit LinearTransform (int d_in = 0, int d_out = 0,
89  bool have_bias = false);
90 
91  /// same as apply, but result is pre-allocated
92  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
93 
94  /// compute x = A^T * (x - b)
95  /// is reverse transform if A has orthonormal lines
96  void transform_transpose (idx_t n, const float * y,
97  float *x) const;
98 
99  bool verbose;
100 
101  ~LinearTransform() override {}
102 };
103 
104 
105 
106 /// Randomly rotate a set of vectors
108 
109  /// both d_in > d_out and d_out < d_in are supported
110  RandomRotationMatrix (int d_in, int d_out):
111  LinearTransform(d_in, d_out, false) {}
112 
113  /// must be called before the transform is used
114  void init(int seed);
115 
116  void reverse_transform(idx_t n, const float* xt, float* x) const override;
117 
119 };
120 
121 
122 /** Applies a principal component analysis on a set of vectors,
123  * with optionally whitening and random rotation. */
125 
126  /** after transformation the components are multiplied by
127  * eigenvalues^eigen_power
128  *
129  * =0: no whitening
130  * =-2: full whitening
131  */
132  float eigen_power;
133 
134  /// random rotation after PCA
136 
137  /// ratio between # training vectors and dimension
139 
140  /// try to distribute output eigenvectors in this many bins
142 
143  /// Mean, size d_in
144  std::vector<float> mean;
145 
146  /// eigenvalues of covariance matrix (= squared singular values)
147  std::vector<float> eigenvalues;
148 
149  /// PCA matrix, size d_in * d_in
150  std::vector<float> PCAMat;
151 
152  // the final matrix is computed after random rotation and/or whitening
153  explicit PCAMatrix (int d_in = 0, int d_out = 0,
154  float eigen_power = 0, bool random_rotation = false);
155 
156  /// train on n vectors. If n < d_in then the eigenvector matrix
157  /// will be completed with 0s
158  void train(Index::idx_t n, const float* x) override;
159 
160  void reverse_transform(idx_t n, const float* xt, float* x) const override;
161 
162  /// copy pre-trained PCA matrix
163  void copy_from (const PCAMatrix & other);
164 
165  /// called after mean, PCAMat and eigenvalues are computed
166  void prepare_Ab();
167 
168 };
169 
170 
171 
172 /** Applies a rotation to align the dimensions with a PQ to minimize
173  * the reconstruction error. Can be used before an IndexPQ or an
174  * IndexIVFPQ. The method is the non-parametric version described in:
175  *
176  * "Optimized Product Quantization for Approximate Nearest Neighbor Search"
177  * Tiezheng Ge, Kaiming He, Qifa Ke, Jian Sun, CVPR'13
178  *
179  */
181 
182  int M; ///< nb of subquantizers
183  int niter; ///< Number of outer training iterations
184  int niter_pq; ///< Number of training iterations for the PQ
185  int niter_pq_0; ///< same, for the first outer iteration
186 
187  /// if there are too many training points, resample
189  bool verbose;
190 
191  /// if d2 != -1, output vectors of this dimension
192  explicit OPQMatrix (int d = 0, int M = 1, int d2 = -1);
193 
194  void train(Index::idx_t n, const float* x) override;
195 
196  void reverse_transform(idx_t n, const float* xt, float* x) const override;
197 };
198 
199 
200 /** remap dimensions for intput vectors, possibly inserting 0s
201  * strictly speaking this is also a linear transform but we don't want
202  * to compute it with matrix multiplies */
204 
205  /// map from output dimension to input, size d_out
206  /// -1 -> set output to 0
207  std::vector<int> map;
208 
209  RemapDimensionsTransform (int d_in, int d_out, const int *map);
210 
211  /// remap input to output, skipping or inserting dimensions as needed
212  /// if uniform: distribute dimensions uniformly
213  /// otherwise just take the d_out first ones.
214  RemapDimensionsTransform (int d_in, int d_out, bool uniform = true);
215 
216  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
217 
218  /// reverse transform correct only when the mapping is a permuation
219  void reverse_transform(idx_t n, const float* xt, float* x) const override;
220 
222 };
223 
224 
225 /** per-vector normalization */
227  float norm;
228 
229  explicit NormalizationTransform (int d, float norm = 2.0);
231 
232  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
233 };
234 
235 
236 
237 /** Index that applies a LinearTransform transform on vectors before
238  * handing them over to a sub-index */
240 
241  std::vector<VectorTransform *> chain; ///! chain of tranforms
242  Index * index; ///! the sub-index
243 
244  bool own_fields; ///! whether pointers are deleted in destructor
245 
246  explicit IndexPreTransform (Index *index);
247 
249 
250  /// ltrans is the last transform before the index
252 
253  void prepend_transform (VectorTransform * ltrans);
254 
255  void train(idx_t n, const float* x) override;
256 
257  void add(idx_t n, const float* x) override;
258 
259  void add_with_ids(idx_t n, const float* x, const long* xids) override;
260 
261  void reset() override;
262 
263  /** removes IDs from the index. Not supported by all indexes.
264  */
265  long remove_ids(const IDSelector& sel) override;
266 
267  void search(
268  idx_t n,
269  const float* x,
270  idx_t k,
271  float* distances,
272  idx_t* labels) const override;
273 
274  void reconstruct_n (idx_t i0, idx_t ni, float *recons)
275  const override;
276 
277  /// apply the transforms in the chain. The returned float * may be
278  /// equal to x, otherwise it should be deallocated.
279  const float * apply_chain (idx_t n, const float *x) const;
280 
281  ~IndexPreTransform() override;
282 };
283 
284 
285 
286 } // namespace faiss
287 
288 
289 
290 #endif
void transform_transpose(idx_t n, const float *y, float *x) const
Index * index
! chain of tranforms
Randomly rotate a set of vectors.
int niter
Number of outer training iterations.
RandomRotationMatrix(int d_in, int d_out)
both d_in &gt; d_out and d_out &lt; d_in are supported
void init(int seed)
must be called before the transform is used
void reset() override
removes all elements from the database.
int niter_pq
Number of training iterations for the PQ.
std::vector< float > A
! whether to use the bias term
LinearTransform(int d_in=0, int d_out=0, bool have_bias=false)
both d_in &gt; d_out and d_out &lt; d_in are supported
VectorTransform(int d_in=0, int d_out=0)
! output dimension
void train(Index::idx_t n, const float *x) override
std::vector< float > mean
Mean, size d_in.
const float * apply_chain(idx_t n, const float *x) const
std::vector< float > PCAMat
PCA matrix, size d_in * d_in.
void train(idx_t n, const float *x) override
std::vector< float > b
bias vector, size d_out
void train(Index::idx_t n, const float *x) override
int balanced_bins
try to distribute output eigenvectors in this many bins
long idx_t
all indices are this type
Definition: Index.h:62
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
void apply_noalloc(idx_t n, const float *x, float *xt) const override
same as apply, but result is pre-allocated
bool own_fields
! the sub-index
int niter_pq_0
same, for the first outer iteration
void reverse_transform(idx_t n, const float *xt, float *x) const override
virtual void train(idx_t n, const float *x)
void reverse_transform(idx_t n, const float *xt, float *x) const override
virtual void reverse_transform(idx_t n, const float *xt, float *x) const
void reverse_transform(idx_t n, const float *xt, float *x) const override
reverse transform correct only when the mapping is a permuation
void reverse_transform(idx_t n, const float *xt, float *x) const override
size_t max_train_points
if there are too many training points, resample
void copy_from(const PCAMatrix &other)
copy pre-trained PCA matrix
int d_out
! input dimension
OPQMatrix(int d=0, int M=1, int d2=-1)
if d2 != -1, output vectors of this dimension
void prepare_Ab()
called after mean, PCAMat and eigenvalues are computed
void add(idx_t n, const float *x) override
void apply_noalloc(idx_t n, const float *x, float *xt) const override
same as apply, but result is pre-allocated
std::vector< float > eigenvalues
eigenvalues of covariance matrix (= squared singular values)
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void add_with_ids(idx_t n, const float *x, const long *xids) override
bool random_rotation
random rotation after PCA
size_t max_points_per_d
ratio between # training vectors and dimension
float * apply(idx_t n, const float *x) const
long remove_ids(const IDSelector &sel) override
virtual void apply_noalloc(idx_t n, const float *x, float *xt) const =0
same as apply, but result is pre-allocated
int M
nb of subquantizers
void apply_noalloc(idx_t n, const float *x, float *xt) const override
same as apply, but result is pre-allocated