Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/VectorTransform.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved
10 // -*- c++ -*-
11 
12 #include "VectorTransform.h"
13 
14 #include <cstdio>
15 #include <cmath>
16 #include <cstring>
17 
18 #include "utils.h"
19 #include "FaissAssert.h"
20 #include "IndexPQ.h"
21 
22 using namespace faiss;
23 
24 
25 extern "C" {
26 
27 // this is to keep the clang syntax checker happy
28 #ifndef FINTEGER
29 #define FINTEGER int
30 #endif
31 
32 
33 /* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
34 
35 int sgemm_ (
36  const char *transa, const char *transb, FINTEGER *m, FINTEGER *
37  n, FINTEGER *k, const float *alpha, const float *a,
38  FINTEGER *lda, const float *b,
39  FINTEGER *ldb, float *beta,
40  float *c, FINTEGER *ldc);
41 
42 int ssyrk_ (
43  const char *uplo, const char *trans, FINTEGER *n, FINTEGER *k,
44  float *alpha, float *a, FINTEGER *lda,
45  float *beta, float *c, FINTEGER *ldc);
46 
47 /* Lapack functions from http://www.netlib.org/clapack/old/single/ */
48 
49 int ssyev_ (
50  const char *jobz, const char *uplo, FINTEGER *n, float *a,
51  FINTEGER *lda, float *w, float *work, FINTEGER *lwork,
52  FINTEGER *info);
53 
54 int dsyev_ (
55  const char *jobz, const char *uplo, FINTEGER *n, double *a,
56  FINTEGER *lda, double *w, double *work, FINTEGER *lwork,
57  FINTEGER *info);
58 
59 int sgesvd_(
60  const char *jobu, const char *jobvt, FINTEGER *m, FINTEGER *n,
61  float *a, FINTEGER *lda, float *s, float *u, FINTEGER *ldu, float *vt,
62  FINTEGER *ldvt, float *work, FINTEGER *lwork, FINTEGER *info);
63 
64 }
65 
66 /*********************************************
67  * VectorTransform
68  *********************************************/
69 
70 
71 
72 float * VectorTransform::apply (Index::idx_t n, const float * x) const
73 {
74  float * xt = new float[n * d_out];
75  apply_noalloc (n, x, xt);
76  return xt;
77 }
78 
79 
80 void VectorTransform::train (idx_t, const float *) {
81  // does nothing by default
82 }
83 
84 
86  idx_t , const float *,
87  float *) const
88 {
89  FAISS_THROW_MSG ("reverse transform not implemented");
90 }
91 
92 
93 
94 
95 /*********************************************
96  * LinearTransform
97  *********************************************/
98 /// both d_in > d_out and d_out < d_in are supported
99 LinearTransform::LinearTransform (int d_in, int d_out,
100  bool have_bias):
101  VectorTransform (d_in, d_out), have_bias (have_bias),
102  verbose (false)
103 {}
104 
106  float * xt) const
107 {
108  FAISS_THROW_IF_NOT_MSG(is_trained, "Transformation not trained yet");
109 
110  float c_factor;
111  if (have_bias) {
112  FAISS_THROW_IF_NOT_MSG (b.size() == d_out, "Bias not initialized");
113  float * xi = xt;
114  for (int i = 0; i < n; i++)
115  for(int j = 0; j < d_out; j++)
116  *xi++ = b[j];
117  c_factor = 1.0;
118  } else {
119  c_factor = 0.0;
120  }
121 
122  FAISS_THROW_IF_NOT_MSG (A.size() == d_out * d_in,
123  "Transformation matrix not initialized");
124 
125  float one = 1;
126  FINTEGER nbiti = d_out, ni = n, di = d_in;
127  sgemm_ ("Transposed", "Not transposed",
128  &nbiti, &ni, &di,
129  &one, A.data(), &di, x, &di, &c_factor, xt, &nbiti);
130 
131 }
132 
133 
134 void LinearTransform::transform_transpose (idx_t n, const float * y,
135  float *x) const
136 {
137  if (have_bias) { // allocate buffer to store bias-corrected data
138  float *y_new = new float [n * d_out];
139  const float *yr = y;
140  float *yw = y_new;
141  for (idx_t i = 0; i < n; i++) {
142  for (int j = 0; j < d_out; j++) {
143  *yw++ = *yr++ - b [j];
144  }
145  }
146  y = y_new;
147  }
148 
149  {
150  FINTEGER dii = d_in, doi = d_out, ni = n;
151  float one = 1.0, zero = 0.0;
152  sgemm_ ("Not", "Not", &dii, &ni, &doi,
153  &one, A.data (), &dii, y, &doi, &zero, x, &dii);
154  }
155 
156  if (have_bias) delete [] y;
157 }
158 
159 
160 /*********************************************
161  * RandomRotationMatrix
162  *********************************************/
163 
165 {
166 
167  if(d_out <= d_in) {
168  A.resize (d_out * d_in);
169  float *q = A.data();
170  float_randn(q, d_out * d_in, seed);
171  matrix_qr(d_in, d_out, q);
172  } else {
173  A.resize (d_out * d_out);
174  float *q = A.data();
175  float_randn(q, d_out * d_out, seed);
176  matrix_qr(d_out, d_out, q);
177  // remove columns
178  int i, j;
179  for (i = 0; i < d_out; i++) {
180  for(j = 0; j < d_in; j++) {
181  q[i * d_in + j] = q[i * d_out + j];
182  }
183  }
184  A.resize(d_in * d_out);
185  }
186 
187 }
188 
189 void RandomRotationMatrix::reverse_transform (idx_t n, const float * xt,
190  float *x) const
191 {
192  transform_transpose (n, xt, x);
193 }
194 
195 /*********************************************
196  * PCAMatrix
197  *********************************************/
198 
199 PCAMatrix::PCAMatrix (int d_in, int d_out,
200  float eigen_power, bool random_rotation):
201  LinearTransform(d_in, d_out, true),
202  eigen_power(eigen_power), random_rotation(random_rotation)
203 {
204  is_trained = false;
205  max_points_per_d = 1000;
206  balanced_bins = 0;
207 }
208 
209 
210 namespace {
211 
212 /// Compute the eigenvalue decomposition of symmetric matrix cov,
213 /// dimensions d_in-by-d_in. Output eigenvectors in cov.
214 
215 void eig(size_t d_in, double *cov, double *eigenvalues, int verbose)
216 {
217  { // compute eigenvalues and vectors
218  FINTEGER info = 0, lwork = -1, di = d_in;
219  double workq;
220 
221  dsyev_ ("Vectors as well", "Upper",
222  &di, cov, &di, eigenvalues, &workq, &lwork, &info);
223  lwork = FINTEGER(workq);
224  double *work = new double[lwork];
225 
226  dsyev_ ("Vectors as well", "Upper",
227  &di, cov, &di, eigenvalues, work, &lwork, &info);
228 
229  delete [] work;
230 
231  if (info != 0) {
232  fprintf (stderr, "WARN ssyev info returns %d, "
233  "a very bad PCA matrix is learnt\n",
234  int(info));
235  // do not throw exception, as the matrix could still be useful
236  }
237 
238 
239  if(verbose && d_in <= 10) {
240  printf("info=%ld new eigvals=[", long(info));
241  for(int j = 0; j < d_in; j++) printf("%g ", eigenvalues[j]);
242  printf("]\n");
243 
244  double *ci = cov;
245  printf("eigenvecs=\n");
246  for(int i = 0; i < d_in; i++) {
247  for(int j = 0; j < d_in; j++)
248  printf("%10.4g ", *ci++);
249  printf("\n");
250  }
251  }
252 
253  }
254 
255  // revert order of eigenvectors & values
256 
257  for(int i = 0; i < d_in / 2; i++) {
258 
259  std::swap(eigenvalues[i], eigenvalues[d_in - 1 - i]);
260  double *v1 = cov + i * d_in;
261  double *v2 = cov + (d_in - 1 - i) * d_in;
262  for(int j = 0; j < d_in; j++)
263  std::swap(v1[j], v2[j]);
264  }
265 
266 }
267 
268 
269 }
270 
271 void PCAMatrix::train (Index::idx_t n, const float *x)
272 {
273  const float * x_in = x;
274 
275  x = fvecs_maybe_subsample (d_in, (size_t*)&n,
276  max_points_per_d * d_in, x, verbose);
277 
278  ScopeDeleter<float> del_x (x != x_in ? x : nullptr);
279 
280  // compute mean
281  mean.clear(); mean.resize(d_in, 0.0);
282  if (have_bias) { // we may want to skip the bias
283  const float *xi = x;
284  for (int i = 0; i < n; i++) {
285  for(int j = 0; j < d_in; j++)
286  mean[j] += *xi++;
287  }
288  for(int j = 0; j < d_in; j++)
289  mean[j] /= n;
290  }
291  if(verbose) {
292  printf("mean=[");
293  for(int j = 0; j < d_in; j++) printf("%g ", mean[j]);
294  printf("]\n");
295  }
296 
297  if(n >= d_in) {
298  // compute covariance matrix, store it in PCA matrix
299  PCAMat.resize(d_in * d_in);
300  float * cov = PCAMat.data();
301  { // initialize with mean * mean^T term
302  float *ci = cov;
303  for(int i = 0; i < d_in; i++) {
304  for(int j = 0; j < d_in; j++)
305  *ci++ = - n * mean[i] * mean[j];
306  }
307  }
308  {
309  FINTEGER di = d_in, ni = n;
310  float one = 1.0;
311  ssyrk_ ("Up", "Non transposed",
312  &di, &ni, &one, (float*)x, &di, &one, cov, &di);
313 
314  }
315  if(verbose && d_in <= 10) {
316  float *ci = cov;
317  printf("cov=\n");
318  for(int i = 0; i < d_in; i++) {
319  for(int j = 0; j < d_in; j++)
320  printf("%10g ", *ci++);
321  printf("\n");
322  }
323  }
324 
325  std::vector<double> covd (d_in * d_in);
326  for (size_t i = 0; i < d_in * d_in; i++) covd [i] = cov [i];
327 
328  std::vector<double> eigenvaluesd (d_in);
329 
330  eig (d_in, covd.data (), eigenvaluesd.data (), verbose);
331 
332  for (size_t i = 0; i < d_in * d_in; i++) PCAMat [i] = covd [i];
333  eigenvalues.resize (d_in);
334 
335  for (size_t i = 0; i < d_in; i++)
336  eigenvalues [i] = eigenvaluesd [i];
337 
338 
339  } else {
340 
341  std::vector<float> xc (n * d_in);
342 
343  for (size_t i = 0; i < n; i++)
344  for(size_t j = 0; j < d_in; j++)
345  xc [i * d_in + j] = x [i * d_in + j] - mean[j];
346 
347  // compute Gram matrix
348  std::vector<float> gram (n * n);
349  {
350  FINTEGER di = d_in, ni = n;
351  float one = 1.0, zero = 0.0;
352  ssyrk_ ("Up", "Transposed",
353  &ni, &di, &one, xc.data(), &di, &zero, gram.data(), &ni);
354  }
355 
356  if(verbose && d_in <= 10) {
357  float *ci = gram.data();
358  printf("gram=\n");
359  for(int i = 0; i < n; i++) {
360  for(int j = 0; j < n; j++)
361  printf("%10g ", *ci++);
362  printf("\n");
363  }
364  }
365 
366  std::vector<double> gramd (n * n);
367  for (size_t i = 0; i < n * n; i++)
368  gramd [i] = gram [i];
369 
370  std::vector<double> eigenvaluesd (n);
371 
372  // eig will fill in only the n first eigenvals
373 
374  eig (n, gramd.data (), eigenvaluesd.data (), verbose);
375 
376  PCAMat.resize(d_in * n);
377 
378  for (size_t i = 0; i < n * n; i++)
379  gram [i] = gramd [i];
380 
381  eigenvalues.resize (d_in);
382  // fill in only the n first ones
383  for (size_t i = 0; i < n; i++)
384  eigenvalues [i] = eigenvaluesd [i];
385 
386  { // compute PCAMat = x' * v
387  FINTEGER di = d_in, ni = n;
388  float one = 1.0;
389 
390  sgemm_ ("Non", "Non Trans",
391  &di, &ni, &ni,
392  &one, xc.data(), &di, gram.data(), &ni,
393  &one, PCAMat.data(), &di);
394  }
395 
396  if(verbose && d_in <= 10) {
397  float *ci = PCAMat.data();
398  printf("PCAMat=\n");
399  for(int i = 0; i < n; i++) {
400  for(int j = 0; j < d_in; j++)
401  printf("%10g ", *ci++);
402  printf("\n");
403  }
404  }
405  fvec_renorm_L2 (d_in, n, PCAMat.data());
406 
407  }
408 
409  prepare_Ab();
410  is_trained = true;
411 }
412 
413 void PCAMatrix::copy_from (const PCAMatrix & other)
414 {
415  FAISS_THROW_IF_NOT (other.is_trained);
416  mean = other.mean;
417  eigenvalues = other.eigenvalues;
418  PCAMat = other.PCAMat;
419  prepare_Ab ();
420  is_trained = true;
421 }
422 
424 {
425 
426  if (!random_rotation) {
427  FAISS_THROW_IF_NOT_MSG (
428  d_out * d_in <= PCAMat.size(),
429  "PCA matrix was trained on too few examples "
430  "to output this number of dimensions");
431  A = PCAMat;
432  A.resize(d_out * d_in); // strip off useless dimensions
433 
434  // first scale the components
435  if (eigen_power != 0) {
436  float *ai = A.data();
437  for (int i = 0; i < d_out; i++) {
438  float factor = pow(eigenvalues[i], eigen_power);
439  for(int j = 0; j < d_in; j++)
440  *ai++ *= factor;
441  }
442  }
443 
444  if (balanced_bins != 0) {
445  FAISS_THROW_IF_NOT (d_out % balanced_bins == 0);
446  int dsub = d_out / balanced_bins;
447  std::vector <float> Ain;
448  std::swap(A, Ain);
449  A.resize(d_out * d_in);
450 
451  std::vector <float> accu(balanced_bins);
452  std::vector <int> counter(balanced_bins);
453 
454  // greedy assignment
455  for (int i = 0; i < d_out; i++) {
456  // find best bin
457  int best_j = -1;
458  float min_w = 1e30;
459  for (int j = 0; j < balanced_bins; j++) {
460  if (counter[j] < dsub && accu[j] < min_w) {
461  min_w = accu[j];
462  best_j = j;
463  }
464  }
465  int row_dst = best_j * dsub + counter[best_j];
466  accu[best_j] += eigenvalues[i];
467  counter[best_j] ++;
468  memcpy (&A[row_dst * d_in], &Ain[i * d_in],
469  d_in * sizeof (A[0]));
470  }
471 
472  if (verbose) {
473  printf(" bin accu=[");
474  for (int i = 0; i < balanced_bins; i++)
475  printf("%g ", accu[i]);
476  printf("]\n");
477  }
478  }
479 
480 
481  } else {
482  FAISS_THROW_IF_NOT_MSG (balanced_bins == 0,
483  "both balancing bins and applying a random rotation "
484  "does not make sense");
486 
487  rr.init(5);
488 
489  // apply scaling on the rotation matrix (right multiplication)
490  if (eigen_power != 0) {
491  for (int i = 0; i < d_out; i++) {
492  float factor = pow(eigenvalues[i], eigen_power);
493  for(int j = 0; j < d_out; j++)
494  rr.A[j * d_out + i] *= factor;
495  }
496  }
497 
498  A.resize(d_in * d_out);
499  {
500  FINTEGER dii = d_in, doo = d_out;
501  float one = 1.0, zero = 0.0;
502 
503  sgemm_ ("Not", "Not", &dii, &doo, &doo,
504  &one, PCAMat.data(), &dii, rr.A.data(), &doo, &zero,
505  A.data(), &dii);
506 
507  }
508 
509  }
510 
511  b.clear(); b.resize(d_out);
512 
513  for (int i = 0; i < d_out; i++) {
514  float accu = 0;
515  for (int j = 0; j < d_in; j++)
516  accu -= mean[j] * A[j + i * d_in];
517  b[i] = accu;
518  }
519 
520 }
521 
522 void PCAMatrix::reverse_transform (idx_t n, const float * xt,
523  float *x) const
524 {
525  FAISS_THROW_IF_NOT_MSG (eigen_power == 0,
526  "reverse only implemented for orthogonal transforms");
527  transform_transpose (n, xt, x);
528 }
529 
530 /*********************************************
531  * OPQMatrix
532  *********************************************/
533 
534 
535 OPQMatrix::OPQMatrix (int d, int M, int d2):
536  LinearTransform (d, d2 == -1 ? d : d2, false), M(M),
537  niter (50),
538  niter_pq (4), niter_pq_0 (40),
539  verbose(false)
540 {
541  is_trained = false;
542  // OPQ is quite expensive to train, so set this right.
543  max_train_points = 256 * 256;
544 }
545 
546 
547 
548 void OPQMatrix::train (Index::idx_t n, const float *x)
549 {
550 
551  const float * x_in = x;
552 
553  x = fvecs_maybe_subsample (d_in, (size_t*)&n,
554  max_train_points, x, verbose);
555 
556  ScopeDeleter<float> del_x (x != x_in ? x : nullptr);
557 
558  // To support d_out > d_in, we pad input vectors with 0s to d_out
559  size_t d = d_out <= d_in ? d_in : d_out;
560  size_t d2 = d_out;
561 
562 #if 0
563  // what this test shows: the only way of getting bit-exact
564  // reproducible results with sgeqrf and sgesvd seems to be forcing
565  // single-threading.
566  { // test repro
567  std::vector<float> r (d * d);
568  float * rotation = r.data();
569  float_randn (rotation, d * d, 1234);
570  printf("CS0: %016lx\n",
571  ivec_checksum (128*128, (int*)rotation));
572  matrix_qr (d, d, rotation);
573  printf("CS1: %016lx\n",
574  ivec_checksum (128*128, (int*)rotation));
575  return;
576  }
577 #endif
578 
579  if (verbose) {
580  printf ("OPQMatrix::train: training an OPQ rotation matrix "
581  "for M=%d from %ld vectors in %dD -> %dD\n",
582  M, n, d_in, d_out);
583  }
584 
585  std::vector<float> xtrain (n * d);
586  // center x
587  {
588  std::vector<float> sum (d);
589  const float *xi = x;
590  for (size_t i = 0; i < n; i++) {
591  for (int j = 0; j < d_in; j++)
592  sum [j] += *xi++;
593  }
594  for (int i = 0; i < d; i++) sum[i] /= n;
595  float *yi = xtrain.data();
596  xi = x;
597  for (size_t i = 0; i < n; i++) {
598  for (int j = 0; j < d_in; j++)
599  *yi++ = *xi++ - sum[j];
600  yi += d - d_in;
601  }
602  }
603  float *rotation;
604 
605  if (A.size () == 0) {
606  A.resize (d * d);
607  rotation = A.data();
608  if (verbose)
609  printf(" OPQMatrix::train: making random %ld*%ld rotation\n",
610  d, d);
611  float_randn (rotation, d * d, 1234);
612  matrix_qr (d, d, rotation);
613  // we use only the d * d2 upper part of the matrix
614  A.resize (d * d2);
615  } else {
616  FAISS_THROW_IF_NOT (A.size() == d * d2);
617  rotation = A.data();
618  }
619 
620 
621  std::vector<float>
622  xproj (d2 * n), pq_recons (d2 * n), xxr (d * n),
623  tmp(d * d * 4);
624 
625  std::vector<uint8_t> codes (M * n);
626  ProductQuantizer pq_regular (d2, M, 8);
627  double t0 = getmillisecs();
628  for (int iter = 0; iter < niter; iter++) {
629 
630  { // torch.mm(xtrain, rotation:t())
631  FINTEGER di = d, d2i = d2, ni = n;
632  float zero = 0, one = 1;
633  sgemm_ ("Transposed", "Not transposed",
634  &d2i, &ni, &di,
635  &one, rotation, &di,
636  xtrain.data(), &di,
637  &zero, xproj.data(), &d2i);
638  }
639 
640  pq_regular.cp.max_points_per_centroid = 1000;
641  pq_regular.cp.niter = iter == 0 ? niter_pq_0 : niter_pq;
642  pq_regular.cp.verbose = verbose;
643  pq_regular.train (n, xproj.data());
644 
645  pq_regular.compute_codes (xproj.data(), codes.data(), n);
646  pq_regular.decode (codes.data(), pq_recons.data(), n);
647 
648  float pq_err = fvec_L2sqr (pq_recons.data(), xproj.data(), n * d2) / n;
649 
650  if (verbose)
651  printf (" Iteration %d (%d PQ iterations):"
652  "%.3f s, obj=%g\n", iter, pq_regular.cp.niter,
653  (getmillisecs () - t0) / 1000.0, pq_err);
654 
655  {
656  float *u = tmp.data(), *vt = &tmp [d * d];
657  float *sing_val = &tmp [2 * d * d];
658  FINTEGER di = d, d2i = d2, ni = n;
659  float one = 1, zero = 0;
660 
661  // torch.mm(xtrain:t(), pq_recons)
662  sgemm_ ("Not", "Transposed",
663  &d2i, &di, &ni,
664  &one, pq_recons.data(), &d2i,
665  xtrain.data(), &di,
666  &zero, xxr.data(), &d2i);
667 
668 
669  FINTEGER lwork = -1, info = -1;
670  float worksz;
671  // workspace query
672  sgesvd_ ("All", "All",
673  &d2i, &di, xxr.data(), &d2i,
674  sing_val,
675  vt, &d2i, u, &di,
676  &worksz, &lwork, &info);
677 
678  lwork = int(worksz);
679  std::vector<float> work (lwork);
680  // u and vt swapped
681  sgesvd_ ("All", "All",
682  &d2i, &di, xxr.data(), &d2i,
683  sing_val,
684  vt, &d2i, u, &di,
685  work.data(), &lwork, &info);
686 
687  sgemm_ ("Transposed", "Transposed",
688  &di, &d2i, &d2i,
689  &one, u, &di, vt, &d2i,
690  &zero, rotation, &di);
691 
692  }
693  pq_regular.train_type = ProductQuantizer::Train_hot_start;
694  }
695 
696  // revert A matrix
697  if (d > d_in) {
698  for (long i = 0; i < d_out; i++)
699  memmove (&A[i * d_in], &A[i * d], sizeof(A[0]) * d_in);
700  A.resize (d_in * d_out);
701  }
702 
703  is_trained = true;
704 }
705 
706 
707 
708 
709 void OPQMatrix::reverse_transform (idx_t n, const float * xt,
710  float *x) const
711 {
712  transform_transpose (n, xt, x);
713 }
714 
715 
716 /*********************************************
717  * NormalizationTransform
718  *********************************************/
719 
720 NormalizationTransform::NormalizationTransform (int d, float norm):
721  VectorTransform (d, d), norm (norm)
722 {
723 }
724 
725 NormalizationTransform::NormalizationTransform ():
726  VectorTransform (-1, -1), norm (-1)
727 {
728 }
729 
731  (idx_t n, const float* x, float* xt) const
732 {
733  if (norm == 2.0) {
734  memcpy (xt, x, sizeof (x[0]) * n * d_in);
735  fvec_renorm_L2 (d_in, n, xt);
736  } else {
737  FAISS_THROW_MSG ("not implemented");
738  }
739 }
740 
741 /*********************************************
742  * IndexPreTransform
743  *********************************************/
744 
745 IndexPreTransform::IndexPreTransform ():
746  index(nullptr), own_fields (false)
747 {
748 }
749 
750 
751 IndexPreTransform::IndexPreTransform (
752  Index * index):
753  Index (index->d, index->metric_type),
754  index (index), own_fields (false)
755 {
756  is_trained = index->is_trained;
757 }
758 
759 
760 IndexPreTransform::IndexPreTransform (
761  VectorTransform * ltrans,
762  Index * index):
763  Index (index->d, index->metric_type),
764  index (index), own_fields (false)
765 {
766  is_trained = index->is_trained;
767  prepend_transform (ltrans);
768 }
769 
770 void IndexPreTransform::prepend_transform (VectorTransform *ltrans)
771 {
772  FAISS_THROW_IF_NOT (ltrans->d_out == d);
773  is_trained = is_trained && ltrans->is_trained;
774  chain.insert (chain.begin(), ltrans);
775  d = ltrans->d_in;
776 }
777 
778 
779 IndexPreTransform::~IndexPreTransform ()
780 {
781  if (own_fields) {
782  for (int i = 0; i < chain.size(); i++)
783  delete chain[i];
784  delete index;
785  }
786 }
787 
788 
789 
790 
791 void IndexPreTransform::train (idx_t n, const float *x)
792 {
793  int last_untrained = 0;
794  if (!index->is_trained) {
795  last_untrained = chain.size();
796  } else {
797  for (int i = chain.size() - 1; i >= 0; i--) {
798  if (!chain[i]->is_trained) {
799  last_untrained = i;
800  break;
801  }
802  }
803  }
804  const float *prev_x = x;
806 
807  for (int i = 0; i <= last_untrained; i++) {
808  if (i < chain.size()) {
809  VectorTransform *ltrans = chain [i];
810  if (!ltrans->is_trained)
811  ltrans->train(n, prev_x);
812  } else {
813  index->train (n, prev_x);
814  }
815  if (i == last_untrained) break;
816 
817  float * xt = chain[i]->apply (n, prev_x);
818  if (prev_x != x) delete prev_x;
819  prev_x = xt;
820  del.set(xt);
821  }
822 
823  is_trained = true;
824 }
825 
826 
827 const float *IndexPreTransform::apply_chain (idx_t n, const float *x) const
828 {
829  const float *prev_x = x;
831 
832  for (int i = 0; i < chain.size(); i++) {
833  float * xt = chain[i]->apply (n, prev_x);
834  ScopeDeleter<float> del2 (xt);
835  del2.swap (del);
836  prev_x = xt;
837  }
838  del.release ();
839  return prev_x;
840 }
841 
842 void IndexPreTransform::add (idx_t n, const float *x)
843 {
844  FAISS_THROW_IF_NOT (is_trained);
845  const float *xt = apply_chain (n, x);
846  ScopeDeleter<float> del(xt == x ? nullptr : xt);
847  index->add (n, xt);
848  ntotal = index->ntotal;
849 }
850 
851 void IndexPreTransform::add_with_ids (idx_t n, const float * x,
852  const long *xids)
853 {
854  FAISS_THROW_IF_NOT (is_trained);
855  const float *xt = apply_chain (n, x);
856  ScopeDeleter<float> del(xt == x ? nullptr : xt);
857  index->add_with_ids (n, xt, xids);
858  ntotal = index->ntotal;
859 }
860 
861 
862 
863 
864 void IndexPreTransform::search (idx_t n, const float *x, idx_t k,
865  float *distances, idx_t *labels) const
866 {
867  FAISS_THROW_IF_NOT (is_trained);
868  const float *xt = apply_chain (n, x);
869  ScopeDeleter<float> del(xt == x ? nullptr : xt);
870  index->search (n, xt, k, distances, labels);
871 }
872 
873 
875  index->reset();
876  ntotal = 0;
877 }
878 
880  long nremove = index->remove_ids (sel);
881  ntotal = index->ntotal;
882  return nremove;
883 }
884 
885 
886 void IndexPreTransform::reconstruct_n (idx_t i0, idx_t ni, float *recons) const
887 {
888  float *x = chain.empty() ? recons : new float [ni * index->d];
889  ScopeDeleter<float> del (recons == x ? nullptr : x);
890  // initial reconstruction
891  index->reconstruct_n (i0, ni, x);
892 
893  // revert transformations from last to first
894  for (int i = chain.size() - 1; i >= 0; i--) {
895  float *x_pre = i == 0 ? recons : new float [chain[i]->d_in * ni];
896  ScopeDeleter<float> del2 (x_pre == recons ? nullptr : x_pre);
897  chain [i]->reverse_transform (ni, x, x_pre);
898  del2.swap (del); // delete [] x;
899  x = x_pre;
900  }
901 }
902 
903 
904 
905 /*********************************************
906  * RemapDimensionsTransform
907  *********************************************/
908 
909 
910 RemapDimensionsTransform::RemapDimensionsTransform (
911  int d_in, int d_out, const int *map_in):
912  VectorTransform (d_in, d_out)
913 {
914  map.resize (d_out);
915  for (int i = 0; i < d_out; i++) {
916  map[i] = map_in[i];
917  FAISS_THROW_IF_NOT (map[i] == -1 || (map[i] >= 0 && map[i] < d_in));
918  }
919 }
920 
921 RemapDimensionsTransform::RemapDimensionsTransform (
922  int d_in, int d_out, bool uniform): VectorTransform (d_in, d_out)
923 {
924  map.resize (d_out, -1);
925 
926  if (uniform) {
927  if (d_in < d_out) {
928  for (int i = 0; i < d_in; i++) {
929  map [i * d_out / d_in] = i;
930  }
931  } else {
932  for (int i = 0; i < d_out; i++) {
933  map [i] = i * d_in / d_out;
934  }
935  }
936  } else {
937  for (int i = 0; i < d_in && i < d_out; i++)
938  map [i] = i;
939  }
940 }
941 
942 
943 void RemapDimensionsTransform::apply_noalloc (idx_t n, const float * x,
944  float *xt) const
945 {
946  for (idx_t i = 0; i < n; i++) {
947  for (int j = 0; j < d_out; j++) {
948  xt[j] = map[j] < 0 ? 0 : x[map[j]];
949  }
950  x += d_in;
951  xt += d_out;
952  }
953 }
954 
955 void RemapDimensionsTransform::reverse_transform (idx_t n, const float * xt,
956  float *x) const
957 {
958  memset (x, 0, sizeof (*x) * n * d_in);
959  for (idx_t i = 0; i < n; i++) {
960  for (int j = 0; j < d_out; j++) {
961  if (map[j] >= 0) x[map[j]] = xt[j];
962  }
963  x += d_in;
964  xt += d_out;
965  }
966 }
void transform_transpose(idx_t n, const float *y, float *x) const
Index * index
! chain of tranforms
Randomly rotate a set of vectors.
int niter
clustering iterations
Definition: Clustering.h:25
int niter
Number of outer training iterations.
void decode(const uint8_t *code, float *x) const
decode a vector from a given code (or n vectors if third argument)
float fvec_L2sqr(const float *x, const float *y, size_t d)
Squared L2 distance between two vectors.
Definition: utils.cpp:481
void init(int seed)
must be called before the transform is used
void reset() override
removes all elements from the database.
virtual void reset()=0
removes all elements from the database.
int niter_pq
Number of training iterations for the PQ.
std::vector< float > A
! whether to use the bias term
const float * fvecs_maybe_subsample(size_t d, size_t *n, size_t nmax, const float *x, bool verbose, long seed)
Definition: utils.cpp:1941
virtual void train(idx_t, const float *)
Definition: Index.h:89
LinearTransform(int d_in=0, int d_out=0, bool have_bias=false)
both d_in &gt; d_out and d_out &lt; d_in are supported
virtual void add_with_ids(idx_t n, const float *x, const long *xids)
Definition: Index.cpp:30
void train(Index::idx_t n, const float *x) override
std::vector< float > mean
Mean, size d_in.
const float * apply_chain(idx_t n, const float *x) const
std::vector< float > PCAMat
PCA matrix, size d_in * d_in.
void train(idx_t n, const float *x) override
void compute_codes(const float *x, uint8_t *codes, size_t n) const
same as compute_code for several vectors
int d
vector dimension
Definition: Index.h:64
std::vector< float > b
bias vector, size d_out
virtual void reconstruct_n(idx_t i0, idx_t ni, float *recons) const
Definition: Index.cpp:49
virtual void add(idx_t n, const float *x)=0
void train(Index::idx_t n, const float *x) override
int balanced_bins
try to distribute output eigenvectors in this many bins
long idx_t
all indices are this type
Definition: Index.h:62
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:65
void apply_noalloc(idx_t n, const float *x, float *xt) const override
same as apply, but result is pre-allocated
the centroids are already initialized
double getmillisecs()
ms elapsed since some arbitrary epoch
Definition: utils.cpp:70
virtual long remove_ids(const IDSelector &sel)
Definition: Index.cpp:37
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
void matrix_qr(int m, int n, float *a)
Definition: utils.cpp:1289
bool own_fields
! the sub-index
int niter_pq_0
same, for the first outer iteration
void reverse_transform(idx_t n, const float *xt, float *x) const override
ClusteringParameters cp
parameters used during clustering
size_t ivec_checksum(size_t n, const int *a)
compute a checksum on a table.
Definition: utils.cpp:1638
virtual void train(idx_t n, const float *x)
void reverse_transform(idx_t n, const float *xt, float *x) const override
virtual void reverse_transform(idx_t n, const float *xt, float *x) const
void reverse_transform(idx_t n, const float *xt, float *x) const override
reverse transform correct only when the mapping is a permuation
void reverse_transform(idx_t n, const float *xt, float *x) const override
size_t max_train_points
if there are too many training points, resample
void copy_from(const PCAMatrix &other)
copy pre-trained PCA matrix
int d_out
! input dimension
OPQMatrix(int d=0, int M=1, int d2=-1)
if d2 != -1, output vectors of this dimension
void prepare_Ab()
called after mean, PCAMat and eigenvalues are computed
void add(idx_t n, const float *x) override
void apply_noalloc(idx_t n, const float *x, float *xt) const override
same as apply, but result is pre-allocated
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:69
std::vector< float > eigenvalues
eigenvalues of covariance matrix (= squared singular values)
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void add_with_ids(idx_t n, const float *x, const long *xids) override
bool random_rotation
random rotation after PCA
size_t max_points_per_d
ratio between # training vectors and dimension
int max_points_per_centroid
to limit size of dataset
Definition: Clustering.h:33
float * apply(idx_t n, const float *x) const
long remove_ids(const IDSelector &sel) override
virtual void apply_noalloc(idx_t n, const float *x, float *xt) const =0
same as apply, but result is pre-allocated
int M
nb of subquantizers
void apply_noalloc(idx_t n, const float *x, float *xt) const override
same as apply, but result is pre-allocated