Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/VectorTransform.h
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 // -*- c++ -*-
12 
13 #ifndef FAISS_VECTOR_TRANSFORM_H
14 #define FAISS_VECTOR_TRANSFORM_H
15 
16 /** Defines a few objects that apply transformations to a set of
17  * vectors Often these are pre-processing steps.
18  */
19 
20 #include <vector>
21 
22 #include "Index.h"
23 
24 
25 namespace faiss {
26 
27 
28 /** Any transformation applied on a set of vectors */
30 
31  typedef Index::idx_t idx_t;
32 
33  int d_in; ///! input dimension
34  int d_out; ///! output dimension
35 
36  explicit VectorTransform (int d_in = 0, int d_out = 0):
37  d_in(d_in), d_out(d_out), is_trained(true)
38  {}
39 
40 
41  /// set if the LinearTransform does not require training, or if
42  /// training is done already
43  bool is_trained;
44 
45 
46  /** Perform training on a representative set of vectors. Does
47  * nothing by default.
48  *
49  * @param n nb of training vectors
50  * @param x training vecors, size n * d
51  */
52  virtual void train (idx_t n, const float *x);
53 
54  /** apply the random roation, return new allocated matrix
55  * @param x size n * d_in
56  * @return size n * d_out
57  */
58  float *apply (idx_t n, const float * x) const;
59 
60  /// same as apply, but result is pre-allocated
61  virtual void apply_noalloc (idx_t n, const float * x,
62  float *xt) const = 0;
63 
64  /// reverse transformation. May not be implemented or may return
65  /// approximate result
66  virtual void reverse_transform (idx_t n, const float * xt,
67  float *x) const;
68 
69  virtual ~VectorTransform () {}
70 
71 };
72 
73 
74 
75 /** Generic linear transformation, with bias term applied on output
76  * y = A * x + b
77  */
79 
80 
81  bool have_bias; ///! whether to use the bias term
82 
83  /// Transformation matrix, size d_out * d_in
84  std::vector<float> A;
85 
86  /// bias vector, size d_out
87  std::vector<float> b;
88 
89 
90  /// both d_in > d_out and d_out < d_in are supported
91  explicit LinearTransform (int d_in = 0, int d_out = 0,
92  bool have_bias = false);
93 
94  /// same as apply, but result is pre-allocated
95  virtual void apply_noalloc (idx_t n, const float * x,
96  float *xt) const;
97 
98  /// compute x = A^T * (x - b)
99  /// is reverse transform if A has orthonormal lines
100  void transform_transpose (idx_t n, const float * y,
101  float *x) const;
102 
103  // ratio between # training vectors and dimension
104  size_t max_points_per_d;
105  bool verbose;
106 
107  // subsamples training set if there are too many vectors
108  const float *maybe_subsample_train_set (Index::idx_t *n, const float *x);
109 
110  virtual ~LinearTransform () {}
111 
112 
113 };
114 
115 
116 
117 /// Randomly rotate a set of vectors
119 
120  /// both d_in > d_out and d_out < d_in are supported
121  RandomRotationMatrix (int d_in, int d_out):
122  LinearTransform(d_in, d_out, false) {}
123 
124  /// must be called before the transform is used
125  void init(int seed);
126 
127  virtual void reverse_transform (idx_t n, const float * xt,
128  float *x) const override;
129 
131 };
132 
133 
134 /** Applies a principal component analysis on a set of vectors,
135  * with optionally whitening and random rotation. */
137 
138  /** after transformation the components are multiplied by
139  * eigenvalues^eigen_power
140  *
141  * =0: no whitening
142  * =-2: full whitening
143  */
144  float eigen_power;
145 
146  /// random rotation after PCA
148 
149  /// try to distribute output eigenvectors in this many bins
151 
152  /// Mean, size d_in
153  std::vector<float> mean;
154 
155  /// eigenvalues of covariance matrix (= squared singular values)
156  std::vector<float> eigenvalues;
157 
158  /// PCA matrix, size d_in * d_in
159  std::vector<float> PCAMat;
160 
161  // the final matrix is computed after random rotation and/or whitening
162  explicit PCAMatrix (int d_in = 0, int d_out = 0,
163  float eigen_power = 0, bool random_rotation = false);
164 
165  virtual void train (Index::idx_t n, const float *x) override;
166 
167  virtual void reverse_transform (idx_t n, const float * xt,
168  float *x) const override;
169 
170  /// copy pre-trained PCA matrix
171  void copy_from (const PCAMatrix & other);
172 
173  /// called after mean, PCAMat and eigenvalues are computed
174  void prepare_Ab();
175 
176 };
177 
178 
179 
180 /** Applies a rotation to align the dimensions with a PQ to minimize
181  * the reconstruction error. Can be used before an IndexPQ or an
182  * IndexIVFPQ. The method is the non-parametric version described in:
183  *
184  * "Optimized Product Quantization for Approximate Nearest Neighbor Search"
185  * Tiezheng Ge, Kaiming He, Qifa Ke, Jian Sun, CVPR'13
186  *
187  */
189 
190  int M; ///< nb of subquantizers
191  int niter; ///< Number of outer training iterations
192  int niter_pq; ///< Number of training iterations for the PQ
193  int niter_pq_0; ///< same, for the first outer iteration
194  /// if there are too many training points, resample
196  bool verbose;
197 
198  /// if d2 != -1, output vectors of this dimension
199  explicit OPQMatrix (int d = 0, int M = 1, int d2 = -1);
200 
201  virtual void train (Index::idx_t n, const float *x) override;
202 
203  virtual void reverse_transform (idx_t n, const float * xt,
204  float *x) const override;
205 };
206 
207 
208 /** remap dimensions for intput vectors, possibly inserting 0s
209  * strictly speaking this is also a linear transform but we don't want
210  * to compute it with matrix multiplies */
212 
213 
214  /// map from output dimension to input, size d_out
215  /// -1 -> set output to 0
216  std::vector<int> map;
217 
218  RemapDimensionsTransform (int d_in, int d_out, const int *map);
219 
220  /// remap input to output, skipping or inserting dimensions as needed
221  /// if uniform: distribute dimensions uniformly
222  /// otherwise just take the d_out first ones.
223  RemapDimensionsTransform (int d_in, int d_out, bool uniform = true);
224 
225  virtual void apply_noalloc (idx_t n, const float * x,
226  float *xt) const override;
227 
228  /// reverse transform correct only when the mapping is a permuation
229  virtual void reverse_transform (idx_t n, const float * xt,
230  float *x) const override;
231 
233 };
234 
235 
236 /** Index that applies a LinearTransform transform on vectors before
237  * handing them over to a sub-index */
239 
240  std::vector<VectorTransform *> chain; ///! chain of tranforms
241  Index * index; ///! the sub-index
242 
243  bool own_fields; ///! whether pointers are deleted in destructor
244 
245  explicit IndexPreTransform (Index *index);
246 
248 
249  /// ltrans is the last transform before the index
251 
252  void prepend_transform (VectorTransform * ltrans);
253 
254  virtual void set_typename () override;
255 
256  virtual void train (idx_t n, const float *x) override;
257 
258  virtual void add (idx_t n, const float *x) override;
259 
260  virtual void add_with_ids (idx_t n, const float * x, const long *xids)
261  override;
262 
263 
264  virtual void reset () override;
265 
266  /** removes IDs from the index. Not supported by all indexes.
267  */
268  virtual long remove_ids (const IDSelector & sel) override;
269 
270  virtual void search (
271  idx_t n, const float *x, idx_t k,
272  float *distances, idx_t *labels) const override;
273 
274  void reconstruct_n (idx_t i0, idx_t ni, float *recons)
275  const override;
276 
277  /// apply the transforms in the chain. The returned float * may be
278  /// equal to x, otherwise it should be deallocated.
279  const float * apply_chain (idx_t n, const float *x) const;
280 
281  virtual ~IndexPreTransform ();
282 
283 };
284 
285 
286 
287 } // namespace faiss
288 
289 
290 
291 #endif
void transform_transpose(idx_t n, const float *y, float *x) const
Index * index
! chain of tranforms
Randomly rotate a set of vectors.
int niter
Number of outer training iterations.
RandomRotationMatrix(int d_in, int d_out)
both d_in &gt; d_out and d_out &lt; d_in are supported
void init(int seed)
must be called before the transform is used
virtual void reset() override
removes all elements from the database.
int niter_pq
Number of training iterations for the PQ.
std::vector< float > A
! whether to use the bias term
LinearTransform(int d_in=0, int d_out=0, bool have_bias=false)
both d_in &gt; d_out and d_out &lt; d_in are supported
VectorTransform(int d_in=0, int d_out=0)
! output dimension
virtual void train(Index::idx_t n, const float *x) override
std::vector< float > mean
Mean, size d_in.
const float * apply_chain(idx_t n, const float *x) const
std::vector< float > PCAMat
PCA matrix, size d_in * d_in.
virtual void train(idx_t n, const float *x) override
std::vector< float > b
bias vector, size d_out
int max_points_per_d
if there are too many training points, resample
virtual void train(Index::idx_t n, const float *x) override
int balanced_bins
try to distribute output eigenvectors in this many bins
long idx_t
all indices are this type
Definition: Index.h:64
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
virtual void apply_noalloc(idx_t n, const float *x, float *xt) const
same as apply, but result is pre-allocated
bool own_fields
! the sub-index
virtual void reverse_transform(idx_t n, const float *xt, float *x) const override
virtual void train(idx_t n, const float *x)
virtual void reverse_transform(idx_t n, const float *xt, float *x) const override
virtual void reverse_transform(idx_t n, const float *xt, float *x) const
virtual void reverse_transform(idx_t n, const float *xt, float *x) const override
reverse transform correct only when the mapping is a permuation
virtual void reverse_transform(idx_t n, const float *xt, float *x) const override
void copy_from(const PCAMatrix &other)
copy pre-trained PCA matrix
int d_out
! input dimension
OPQMatrix(int d=0, int M=1, int d2=-1)
if d2 != -1, output vectors of this dimension
void prepare_Ab()
called after mean, PCAMat and eigenvalues are computed
virtual void add(idx_t n, const float *x) override
virtual void apply_noalloc(idx_t n, const float *x, float *xt) const override
same as apply, but result is pre-allocated
std::vector< float > eigenvalues
eigenvalues of covariance matrix (= squared singular values)
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
virtual void add_with_ids(idx_t n, const float *x, const long *xids) override
bool random_rotation
random rotation after PCA
float * apply(idx_t n, const float *x) const
virtual long remove_ids(const IDSelector &sel) override
virtual void apply_noalloc(idx_t n, const float *x, float *xt) const =0
same as apply, but result is pre-allocated
int M
nb of subquantizers