Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/VectorTransform.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the CC-by-NC license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved
10 // -*- c++ -*-
11 
12 #include "VectorTransform.h"
13 
14 #include <cstdio>
15 #include <cmath>
16 #include <cstring>
17 
18 #include "utils.h"
19 #include "FaissAssert.h"
20 #include "IndexPQ.h"
21 
22 using namespace faiss;
23 
24 
25 extern "C" {
26 
27 // this is to keep the clang syntax checker happy
28 #ifndef FINTEGER
29 #define FINTEGER int
30 #endif
31 
32 
33 /* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
34 
35 int sgemm_ (
36  const char *transa, const char *transb, FINTEGER *m, FINTEGER *
37  n, FINTEGER *k, const float *alpha, const float *a,
38  FINTEGER *lda, const float *b,
39  FINTEGER *ldb, float *beta,
40  float *c, FINTEGER *ldc);
41 
42 int ssyrk_ (
43  const char *uplo, const char *trans, FINTEGER *n, FINTEGER *k,
44  float *alpha, float *a, FINTEGER *lda,
45  float *beta, float *c, FINTEGER *ldc);
46 
47 /* Lapack functions from http://www.netlib.org/clapack/old/single/ */
48 
49 int ssyev_ (
50  const char *jobz, const char *uplo, FINTEGER *n, float *a,
51  FINTEGER *lda, float *w, float *work, FINTEGER *lwork,
52  FINTEGER *info);
53 
54 int dsyev_ (
55  const char *jobz, const char *uplo, FINTEGER *n, double *a,
56  FINTEGER *lda, double *w, double *work, FINTEGER *lwork,
57  FINTEGER *info);
58 
59 int sgesvd_(
60  const char *jobu, const char *jobvt, FINTEGER *m, FINTEGER *n,
61  float *a, FINTEGER *lda, float *s, float *u, FINTEGER *ldu, float *vt,
62  FINTEGER *ldvt, float *work, FINTEGER *lwork, FINTEGER *info);
63 
64 }
65 
66 /*********************************************
67  * VectorTransform
68  *********************************************/
69 
70 
71 
72 float * VectorTransform::apply (Index::idx_t n, const float * x) const
73 {
74  float * xt = new float[n * d_out];
75  apply_noalloc (n, x, xt);
76  return xt;
77 }
78 
79 
80 void VectorTransform::train (idx_t, const float *) {
81  // does nothing by default
82 }
83 
84 
86  idx_t , const float *,
87  float *) const
88 {
89  FAISS_THROW_MSG ("reverse transform not implemented");
90 }
91 
92 
93 
94 
95 /*********************************************
96  * LinearTransform
97  *********************************************/
98 /// both d_in > d_out and d_out < d_in are supported
99 LinearTransform::LinearTransform (int d_in, int d_out,
100  bool have_bias):
101  VectorTransform (d_in, d_out), have_bias (have_bias),
102  verbose (false)
103 {}
104 
106  float * xt) const
107 {
108  FAISS_THROW_IF_NOT_MSG(is_trained, "Transformation not trained yet");
109 
110  float c_factor;
111  if (have_bias) {
112  FAISS_THROW_IF_NOT_MSG (b.size() == d_out, "Bias not initialized");
113  float * xi = xt;
114  for (int i = 0; i < n; i++)
115  for(int j = 0; j < d_out; j++)
116  *xi++ = b[j];
117  c_factor = 1.0;
118  } else {
119  c_factor = 0.0;
120  }
121 
122  FAISS_THROW_IF_NOT_MSG (A.size() == d_out * d_in,
123  "Transformation matrix not initialized");
124 
125  float one = 1;
126  FINTEGER nbiti = d_out, ni = n, di = d_in;
127  sgemm_ ("Transposed", "Not transposed",
128  &nbiti, &ni, &di,
129  &one, A.data(), &di, x, &di, &c_factor, xt, &nbiti);
130 
131 }
132 
133 
134 void LinearTransform::transform_transpose (idx_t n, const float * y,
135  float *x) const
136 {
137  if (have_bias) { // allocate buffer to store bias-corrected data
138  float *y_new = new float [n * d_out];
139  const float *yr = y;
140  float *yw = y_new;
141  for (idx_t i = 0; i < n; i++) {
142  for (int j = 0; j < d_out; j++) {
143  *yw++ = *yr++ - b [j];
144  }
145  }
146  y = y_new;
147  }
148 
149  {
150  FINTEGER dii = d_in, doi = d_out, ni = n;
151  float one = 1.0, zero = 0.0;
152  sgemm_ ("Not", "Not", &dii, &ni, &doi,
153  &one, A.data (), &dii, y, &doi, &zero, x, &dii);
154  }
155 
156  if (have_bias) delete [] y;
157 }
158 
159 
160 /*********************************************
161  * RandomRotationMatrix
162  *********************************************/
163 
165 {
166 
167  if(d_out <= d_in) {
168  A.resize (d_out * d_in);
169  float *q = A.data();
170  float_randn(q, d_out * d_in, seed);
171  matrix_qr(d_in, d_out, q);
172  } else {
173  A.resize (d_out * d_out);
174  float *q = A.data();
175  float_randn(q, d_out * d_out, seed);
176  matrix_qr(d_out, d_out, q);
177  // remove columns
178  int i, j;
179  for (i = 0; i < d_out; i++) {
180  for(j = 0; j < d_in; j++) {
181  q[i * d_in + j] = q[i * d_out + j];
182  }
183  }
184  A.resize(d_in * d_out);
185  }
186 
187 }
188 
189 void RandomRotationMatrix::reverse_transform (idx_t n, const float * xt,
190  float *x) const
191 {
192  transform_transpose (n, xt, x);
193 }
194 
195 /*********************************************
196  * PCAMatrix
197  *********************************************/
198 
199 PCAMatrix::PCAMatrix (int d_in, int d_out,
200  float eigen_power, bool random_rotation):
201  LinearTransform(d_in, d_out, true),
202  eigen_power(eigen_power), random_rotation(random_rotation)
203 {
204  is_trained = false;
205  max_points_per_d = 1000;
206  balanced_bins = 0;
207 }
208 
209 
210 namespace {
211 
212 /// Compute the eigenvalue decomposition of symmetric matrix cov,
213 /// dimensions d_in-by-d_in. Output eigenvectors in cov.
214 
215 void eig(size_t d_in, double *cov, double *eigenvalues, int verbose)
216 {
217  { // compute eigenvalues and vectors
218  FINTEGER info = 0, lwork = -1, di = d_in;
219  double workq;
220 
221  dsyev_ ("Vectors as well", "Upper",
222  &di, cov, &di, eigenvalues, &workq, &lwork, &info);
223  lwork = FINTEGER(workq);
224  double *work = new double[lwork];
225 
226  dsyev_ ("Vectors as well", "Upper",
227  &di, cov, &di, eigenvalues, work, &lwork, &info);
228 
229  delete [] work;
230 
231  if (info != 0) {
232  fprintf (stderr, "WARN ssyev info returns %d, "
233  "a very bad PCA matrix is learnt\n",
234  int(info));
235  // do not throw exception, as the matrix could still be useful
236  }
237 
238 
239  if(verbose && d_in <= 10) {
240  printf("info=%ld new eigvals=[", long(info));
241  for(int j = 0; j < d_in; j++) printf("%g ", eigenvalues[j]);
242  printf("]\n");
243 
244  double *ci = cov;
245  printf("eigenvecs=\n");
246  for(int i = 0; i < d_in; i++) {
247  for(int j = 0; j < d_in; j++)
248  printf("%10.4g ", *ci++);
249  printf("\n");
250  }
251  }
252 
253  }
254 
255  // revert order of eigenvectors & values
256 
257  for(int i = 0; i < d_in / 2; i++) {
258 
259  std::swap(eigenvalues[i], eigenvalues[d_in - 1 - i]);
260  double *v1 = cov + i * d_in;
261  double *v2 = cov + (d_in - 1 - i) * d_in;
262  for(int j = 0; j < d_in; j++)
263  std::swap(v1[j], v2[j]);
264  }
265 
266 }
267 
268 
269 }
270 
271 void PCAMatrix::train (Index::idx_t n, const float *x)
272 {
273  const float * x_in = x;
274 
275  x = fvecs_maybe_subsample (d_in, (size_t*)&n,
276  max_points_per_d * d_in, x, verbose);
277 
278  ScopeDeleter<float> del_x (x != x_in ? x : nullptr);
279 
280  // compute mean
281  mean.clear(); mean.resize(d_in, 0.0);
282  if (have_bias) { // we may want to skip the bias
283  const float *xi = x;
284  for (int i = 0; i < n; i++) {
285  for(int j = 0; j < d_in; j++)
286  mean[j] += *xi++;
287  }
288  for(int j = 0; j < d_in; j++)
289  mean[j] /= n;
290  }
291  if(verbose) {
292  printf("mean=[");
293  for(int j = 0; j < d_in; j++) printf("%g ", mean[j]);
294  printf("]\n");
295  }
296 
297  if(n >= d_in) {
298  // compute covariance matrix, store it in PCA matrix
299  PCAMat.resize(d_in * d_in);
300  float * cov = PCAMat.data();
301  { // initialize with mean * mean^T term
302  float *ci = cov;
303  for(int i = 0; i < d_in; i++) {
304  for(int j = 0; j < d_in; j++)
305  *ci++ = - n * mean[i] * mean[j];
306  }
307  }
308  {
309  FINTEGER di = d_in, ni = n;
310  float one = 1.0;
311  ssyrk_ ("Up", "Non transposed",
312  &di, &ni, &one, (float*)x, &di, &one, cov, &di);
313 
314  }
315  if(verbose && d_in <= 10) {
316  float *ci = cov;
317  printf("cov=\n");
318  for(int i = 0; i < d_in; i++) {
319  for(int j = 0; j < d_in; j++)
320  printf("%10g ", *ci++);
321  printf("\n");
322  }
323  }
324 
325  std::vector<double> covd (d_in * d_in);
326  for (size_t i = 0; i < d_in * d_in; i++) covd [i] = cov [i];
327 
328  std::vector<double> eigenvaluesd (d_in);
329 
330  eig (d_in, covd.data (), eigenvaluesd.data (), verbose);
331 
332  for (size_t i = 0; i < d_in * d_in; i++) PCAMat [i] = covd [i];
333  eigenvalues.resize (d_in);
334 
335  for (size_t i = 0; i < d_in; i++)
336  eigenvalues [i] = eigenvaluesd [i];
337 
338 
339  } else {
340 
341  std::vector<float> xc (n * d_in);
342 
343  for (size_t i = 0; i < n; i++)
344  for(size_t j = 0; j < d_in; j++)
345  xc [i * d_in + j] = x [i * d_in + j] - mean[j];
346 
347  // compute Gram matrix
348  std::vector<float> gram (n * n);
349  {
350  FINTEGER di = d_in, ni = n;
351  float one = 1.0, zero = 0.0;
352  ssyrk_ ("Up", "Transposed",
353  &ni, &di, &one, xc.data(), &di, &zero, gram.data(), &ni);
354  }
355 
356  if(verbose && d_in <= 10) {
357  float *ci = gram.data();
358  printf("gram=\n");
359  for(int i = 0; i < n; i++) {
360  for(int j = 0; j < n; j++)
361  printf("%10g ", *ci++);
362  printf("\n");
363  }
364  }
365 
366  std::vector<double> gramd (n * n);
367  for (size_t i = 0; i < n * n; i++)
368  gramd [i] = gram [i];
369 
370  std::vector<double> eigenvaluesd (n);
371 
372  // eig will fill in only the n first eigenvals
373 
374  eig (n, gramd.data (), eigenvaluesd.data (), verbose);
375 
376  PCAMat.resize(d_in * d_in);
377 
378  for (size_t i = 0; i < n * n; i++)
379  gram [i] = gramd [i];
380 
381  eigenvalues.resize (d_in);
382  // fill in only the n first ones
383  for (size_t i = 0; i < n; i++)
384  eigenvalues [i] = eigenvaluesd [i];
385 
386  { // compute PCAMat = x' * v
387  FINTEGER di = d_in, ni = n;
388  float one = 1.0;
389 
390  sgemm_ ("Non", "Non Trans",
391  &di, &ni, &ni,
392  &one, xc.data(), &di, gram.data(), &ni,
393  &one, PCAMat.data(), &di);
394  }
395 
396 
397  if(verbose && d_in <= 10) {
398  float *ci = PCAMat.data();
399  printf("PCAMat=\n");
400  for(int i = 0; i < n; i++) {
401  for(int j = 0; j < d_in; j++)
402  printf("%10g ", *ci++);
403  printf("\n");
404  }
405  }
406  fvec_renorm_L2 (d_in, n, PCAMat.data());
407 
408  }
409 
410 
411 
412  prepare_Ab();
413  is_trained = true;
414 }
415 
416 void PCAMatrix::copy_from (const PCAMatrix & other)
417 {
418  FAISS_THROW_IF_NOT (other.is_trained);
419  mean = other.mean;
420  eigenvalues = other.eigenvalues;
421  PCAMat = other.PCAMat;
422  prepare_Ab ();
423  is_trained = true;
424 }
425 
427 {
428 
429  if (!random_rotation) {
430  A = PCAMat;
431  A.resize(d_out * d_in); // strip off useless dimensions
432 
433  // first scale the components
434  if (eigen_power != 0) {
435  float *ai = A.data();
436  for (int i = 0; i < d_out; i++) {
437  float factor = pow(eigenvalues[i], eigen_power);
438  for(int j = 0; j < d_in; j++)
439  *ai++ *= factor;
440  }
441  }
442 
443  if (balanced_bins != 0) {
444  FAISS_THROW_IF_NOT (d_out % balanced_bins == 0);
445  int dsub = d_out / balanced_bins;
446  std::vector <float> Ain;
447  std::swap(A, Ain);
448  A.resize(d_out * d_in);
449 
450  std::vector <float> accu(balanced_bins);
451  std::vector <int> counter(balanced_bins);
452 
453  // greedy assignment
454  for (int i = 0; i < d_out; i++) {
455  // find best bin
456  int best_j = -1;
457  float min_w = 1e30;
458  for (int j = 0; j < balanced_bins; j++) {
459  if (counter[j] < dsub && accu[j] < min_w) {
460  min_w = accu[j];
461  best_j = j;
462  }
463  }
464  int row_dst = best_j * dsub + counter[best_j];
465  accu[best_j] += eigenvalues[i];
466  counter[best_j] ++;
467  memcpy (&A[row_dst * d_in], &Ain[i * d_in],
468  d_in * sizeof (A[0]));
469  }
470 
471  if (verbose) {
472  printf(" bin accu=[");
473  for (int i = 0; i < balanced_bins; i++)
474  printf("%g ", accu[i]);
475  printf("]\n");
476  }
477  }
478 
479 
480  } else {
481  FAISS_THROW_IF_NOT_MSG (balanced_bins == 0,
482  "both balancing bins and applying a random rotation "
483  "does not make sense");
485 
486  rr.init(5);
487 
488  // apply scaling on the rotation matrix (right multiplication)
489  if (eigen_power != 0) {
490  for (int i = 0; i < d_out; i++) {
491  float factor = pow(eigenvalues[i], eigen_power);
492  for(int j = 0; j < d_out; j++)
493  rr.A[j * d_out + i] *= factor;
494  }
495  }
496 
497  A.resize(d_in * d_out);
498  {
499  FINTEGER dii = d_in, doo = d_out;
500  float one = 1.0, zero = 0.0;
501 
502  sgemm_ ("Not", "Not", &dii, &doo, &doo,
503  &one, PCAMat.data(), &dii, rr.A.data(), &doo, &zero,
504  A.data(), &dii);
505 
506  }
507 
508  }
509 
510  b.clear(); b.resize(d_out);
511 
512  for (int i = 0; i < d_out; i++) {
513  float accu = 0;
514  for (int j = 0; j < d_in; j++)
515  accu -= mean[j] * A[j + i * d_in];
516  b[i] = accu;
517  }
518 
519 }
520 
521 void PCAMatrix::reverse_transform (idx_t n, const float * xt,
522  float *x) const
523 {
524  FAISS_THROW_IF_NOT_MSG (eigen_power == 0,
525  "reverse only implemented for orthogonal transforms");
526  transform_transpose (n, xt, x);
527 }
528 
529 /*********************************************
530  * OPQMatrix
531  *********************************************/
532 
533 
534 OPQMatrix::OPQMatrix (int d, int M, int d2):
535  LinearTransform (d, d2 == -1 ? d : d2, false), M(M),
536  niter (50),
537  niter_pq (4), niter_pq_0 (40),
538  verbose(false)
539 {
540  is_trained = false;
541  // OPQ is quite expensive to train, so set this right.
542  max_train_points = 256 * 256;
543 }
544 
545 
546 
547 void OPQMatrix::train (Index::idx_t n, const float *x)
548 {
549 
550  const float * x_in = x;
551 
552  x = fvecs_maybe_subsample (d_in, (size_t*)&n,
553  max_train_points, x, verbose);
554 
555  ScopeDeleter<float> del_x (x != x_in ? x : nullptr);
556 
557  // To support d_out > d_in, we pad input vectors with 0s to d_out
558  size_t d = d_out <= d_in ? d_in : d_out;
559  size_t d2 = d_out;
560 
561 #if 0
562  // what this test shows: the only way of getting bit-exact
563  // reproducible results with sgeqrf and sgesvd seems to be forcing
564  // single-threading.
565  { // test repro
566  std::vector<float> r (d * d);
567  float * rotation = r.data();
568  float_randn (rotation, d * d, 1234);
569  printf("CS0: %016lx\n",
570  ivec_checksum (128*128, (int*)rotation));
571  matrix_qr (d, d, rotation);
572  printf("CS1: %016lx\n",
573  ivec_checksum (128*128, (int*)rotation));
574  return;
575  }
576 #endif
577 
578  if (verbose) {
579  printf ("OPQMatrix::train: training an OPQ rotation matrix "
580  "for M=%d from %ld vectors in %dD -> %dD\n",
581  M, n, d_in, d_out);
582  }
583 
584  std::vector<float> xtrain (n * d);
585  // center x
586  {
587  std::vector<float> sum (d);
588  const float *xi = x;
589  for (size_t i = 0; i < n; i++) {
590  for (int j = 0; j < d_in; j++)
591  sum [j] += *xi++;
592  }
593  for (int i = 0; i < d; i++) sum[i] /= n;
594  float *yi = xtrain.data();
595  xi = x;
596  for (size_t i = 0; i < n; i++) {
597  for (int j = 0; j < d_in; j++)
598  *yi++ = *xi++ - sum[j];
599  yi += d - d_in;
600  }
601  }
602  float *rotation;
603 
604  if (A.size () == 0) {
605  A.resize (d * d);
606  rotation = A.data();
607  if (verbose)
608  printf(" OPQMatrix::train: making random %ld*%ld rotation\n",
609  d, d);
610  float_randn (rotation, d * d, 1234);
611  matrix_qr (d, d, rotation);
612  // we use only the d * d2 upper part of the matrix
613  A.resize (d * d2);
614  } else {
615  FAISS_THROW_IF_NOT (A.size() == d * d2);
616  rotation = A.data();
617  }
618 
619 
620  std::vector<float>
621  xproj (d2 * n), pq_recons (d2 * n), xxr (d * n),
622  tmp(d * d * 4);
623 
624  std::vector<uint8_t> codes (M * n);
625  ProductQuantizer pq_regular (d2, M, 8);
626  double t0 = getmillisecs();
627  for (int iter = 0; iter < niter; iter++) {
628 
629  { // torch.mm(xtrain, rotation:t())
630  FINTEGER di = d, d2i = d2, ni = n;
631  float zero = 0, one = 1;
632  sgemm_ ("Transposed", "Not transposed",
633  &d2i, &ni, &di,
634  &one, rotation, &di,
635  xtrain.data(), &di,
636  &zero, xproj.data(), &d2i);
637  }
638 
639  pq_regular.cp.max_points_per_centroid = 1000;
640  pq_regular.cp.niter = iter == 0 ? niter_pq_0 : niter_pq;
641  pq_regular.cp.verbose = verbose;
642  pq_regular.train (n, xproj.data());
643 
644  pq_regular.compute_codes (xproj.data(), codes.data(), n);
645  pq_regular.decode (codes.data(), pq_recons.data(), n);
646 
647  float pq_err = fvec_L2sqr (pq_recons.data(), xproj.data(), n * d2) / n;
648 
649  if (verbose)
650  printf (" Iteration %d (%d PQ iterations):"
651  "%.3f s, obj=%g\n", iter, pq_regular.cp.niter,
652  (getmillisecs () - t0) / 1000.0, pq_err);
653 
654  {
655  float *u = tmp.data(), *vt = &tmp [d * d];
656  float *sing_val = &tmp [2 * d * d];
657  FINTEGER di = d, d2i = d2, ni = n;
658  float one = 1, zero = 0;
659 
660  // torch.mm(xtrain:t(), pq_recons)
661  sgemm_ ("Not", "Transposed",
662  &d2i, &di, &ni,
663  &one, pq_recons.data(), &d2i,
664  xtrain.data(), &di,
665  &zero, xxr.data(), &d2i);
666 
667 
668  FINTEGER lwork = -1, info = -1;
669  float worksz;
670  // workspace query
671  sgesvd_ ("All", "All",
672  &d2i, &di, xxr.data(), &d2i,
673  sing_val,
674  vt, &d2i, u, &di,
675  &worksz, &lwork, &info);
676 
677  lwork = int(worksz);
678  std::vector<float> work (lwork);
679  // u and vt swapped
680  sgesvd_ ("All", "All",
681  &d2i, &di, xxr.data(), &d2i,
682  sing_val,
683  vt, &d2i, u, &di,
684  work.data(), &lwork, &info);
685 
686  sgemm_ ("Transposed", "Transposed",
687  &di, &d2i, &d2i,
688  &one, u, &di, vt, &d2i,
689  &zero, rotation, &di);
690 
691  }
692  pq_regular.train_type = ProductQuantizer::Train_hot_start;
693  }
694 
695  // revert A matrix
696  if (d > d_in) {
697  for (long i = 0; i < d_out; i++)
698  memmove (&A[i * d_in], &A[i * d], sizeof(A[0]) * d_in);
699  A.resize (d_in * d_out);
700  }
701 
702  is_trained = true;
703 }
704 
705 
706 
707 
708 void OPQMatrix::reverse_transform (idx_t n, const float * xt,
709  float *x) const
710 {
711  transform_transpose (n, xt, x);
712 }
713 
714 
715 /*********************************************
716  * NormalizationTransform
717  *********************************************/
718 
719 NormalizationTransform::NormalizationTransform (int d, float norm):
720  VectorTransform (d, d), norm (norm)
721 {
722 }
723 
724 NormalizationTransform::NormalizationTransform ():
725  VectorTransform (-1, -1), norm (-1)
726 {
727 }
728 
730  (idx_t n, const float* x, float* xt) const
731 {
732  if (norm == 2.0) {
733  memcpy (xt, x, sizeof (x[0]) * n * d_in);
734  fvec_renorm_L2 (d_in, n, xt);
735  } else {
736  FAISS_THROW_MSG ("not implemented");
737  }
738 }
739 
740 /*********************************************
741  * IndexPreTransform
742  *********************************************/
743 
744 IndexPreTransform::IndexPreTransform ():
745  index(nullptr), own_fields (false)
746 {
747 }
748 
749 
750 IndexPreTransform::IndexPreTransform (
751  Index * index):
752  Index (index->d, index->metric_type),
753  index (index), own_fields (false)
754 {
755  is_trained = index->is_trained;
756 }
757 
758 
759 IndexPreTransform::IndexPreTransform (
760  VectorTransform * ltrans,
761  Index * index):
762  Index (index->d, index->metric_type),
763  index (index), own_fields (false)
764 {
765  is_trained = index->is_trained;
766  prepend_transform (ltrans);
767 }
768 
769 void IndexPreTransform::prepend_transform (VectorTransform *ltrans)
770 {
771  FAISS_THROW_IF_NOT (ltrans->d_out == d);
772  is_trained = is_trained && ltrans->is_trained;
773  chain.insert (chain.begin(), ltrans);
774  d = ltrans->d_in;
775 }
776 
777 
778 IndexPreTransform::~IndexPreTransform ()
779 {
780  if (own_fields) {
781  for (int i = 0; i < chain.size(); i++)
782  delete chain[i];
783  delete index;
784  }
785 }
786 
787 
788 
789 
790 void IndexPreTransform::train (idx_t n, const float *x)
791 {
792  int last_untrained = 0;
793  if (index->is_trained) {
794  last_untrained = chain.size();
795  } else {
796  for (int i = chain.size() - 1; i >= 0; i--) {
797  if (!chain[i]->is_trained) {
798  last_untrained = i;
799  break;
800  }
801  }
802  }
803  const float *prev_x = x;
805 
806  for (int i = 0; i <= last_untrained; i++) {
807  if (i < chain.size()) {
808  VectorTransform *ltrans = chain [i];
809  if (!ltrans->is_trained)
810  ltrans->train(n, prev_x);
811  } else {
812  index->train (n, prev_x);
813  }
814  if (i == last_untrained) break;
815 
816  float * xt = chain[i]->apply (n, prev_x);
817  if (prev_x != x) delete prev_x;
818  prev_x = xt;
819  del.set(xt);
820  }
821 
822  is_trained = true;
823 }
824 
825 
826 const float *IndexPreTransform::apply_chain (idx_t n, const float *x) const
827 {
828  const float *prev_x = x;
830 
831  for (int i = 0; i < chain.size(); i++) {
832  float * xt = chain[i]->apply (n, prev_x);
833  ScopeDeleter<float> del2 (xt);
834  del2.swap (del);
835  prev_x = xt;
836  }
837  del.release ();
838  return prev_x;
839 }
840 
841 void IndexPreTransform::add (idx_t n, const float *x)
842 {
843  FAISS_THROW_IF_NOT (is_trained);
844  const float *xt = apply_chain (n, x);
845  ScopeDeleter<float> del(xt == x ? nullptr : xt);
846  index->add (n, xt);
847  ntotal = index->ntotal;
848 }
849 
850 void IndexPreTransform::add_with_ids (idx_t n, const float * x,
851  const long *xids)
852 {
853  FAISS_THROW_IF_NOT (is_trained);
854  const float *xt = apply_chain (n, x);
855  ScopeDeleter<float> del(xt == x ? nullptr : xt);
856  index->add_with_ids (n, xt, xids);
857  ntotal = index->ntotal;
858 }
859 
860 
861 
862 
863 void IndexPreTransform::search (idx_t n, const float *x, idx_t k,
864  float *distances, idx_t *labels) const
865 {
866  FAISS_THROW_IF_NOT (is_trained);
867  const float *xt = apply_chain (n, x);
868  ScopeDeleter<float> del(xt == x ? nullptr : xt);
869  index->search (n, xt, k, distances, labels);
870 }
871 
872 
874  index->reset();
875  ntotal = 0;
876 }
877 
879  long nremove = index->remove_ids (sel);
880  ntotal = index->ntotal;
881  return nremove;
882 }
883 
884 
885 void IndexPreTransform::reconstruct_n (idx_t i0, idx_t ni, float *recons) const
886 {
887  float *x = chain.empty() ? recons : new float [ni * index->d];
888  ScopeDeleter<float> del (recons == x ? nullptr : x);
889  // initial reconstruction
890  index->reconstruct_n (i0, ni, x);
891 
892  // revert transformations from last to first
893  for (int i = chain.size() - 1; i >= 0; i--) {
894  float *x_pre = i == 0 ? recons : new float [chain[i]->d_in * ni];
895  ScopeDeleter<float> del2 (x_pre == recons ? nullptr : x_pre);
896  chain [i]->reverse_transform (ni, x, x_pre);
897  del2.swap (del); // delete [] x;
898  x = x_pre;
899  }
900 }
901 
902 
903 
904 /*********************************************
905  * RemapDimensionsTransform
906  *********************************************/
907 
908 
909 RemapDimensionsTransform::RemapDimensionsTransform (
910  int d_in, int d_out, const int *map_in):
911  VectorTransform (d_in, d_out)
912 {
913  map.resize (d_out);
914  for (int i = 0; i < d_out; i++) {
915  map[i] = map_in[i];
916  FAISS_THROW_IF_NOT (map[i] == -1 || (map[i] >= 0 && map[i] < d_in));
917  }
918 }
919 
920 RemapDimensionsTransform::RemapDimensionsTransform (
921  int d_in, int d_out, bool uniform): VectorTransform (d_in, d_out)
922 {
923  map.resize (d_out, -1);
924 
925  if (uniform) {
926  if (d_in < d_out) {
927  for (int i = 0; i < d_in; i++) {
928  map [i * d_out / d_in] = i;
929  }
930  } else {
931  for (int i = 0; i < d_out; i++) {
932  map [i] = i * d_in / d_out;
933  }
934  }
935  } else {
936  for (int i = 0; i < d_in && i < d_out; i++)
937  map [i] = i;
938  }
939 }
940 
941 
942 void RemapDimensionsTransform::apply_noalloc (idx_t n, const float * x,
943  float *xt) const
944 {
945  for (idx_t i = 0; i < n; i++) {
946  for (int j = 0; j < d_out; j++) {
947  xt[j] = map[j] < 0 ? 0 : x[map[j]];
948  }
949  x += d_in;
950  xt += d_out;
951  }
952 }
953 
954 void RemapDimensionsTransform::reverse_transform (idx_t n, const float * xt,
955  float *x) const
956 {
957  memset (x, 0, sizeof (*x) * n * d_in);
958  for (idx_t i = 0; i < n; i++) {
959  for (int j = 0; j < d_out; j++) {
960  if (map[j] >= 0) x[map[j]] = xt[j];
961  }
962  x += d_in;
963  xt += d_out;
964  }
965 }
void transform_transpose(idx_t n, const float *y, float *x) const
Index * index
! chain of tranforms
Randomly rotate a set of vectors.
int niter
clustering iterations
Definition: Clustering.h:25
int niter
Number of outer training iterations.
void decode(const uint8_t *code, float *x) const
decode a vector from a given code (or n vectors if third argument)
float fvec_L2sqr(const float *x, const float *y, size_t d)
Squared L2 distance between two vectors.
Definition: utils.cpp:481
void init(int seed)
must be called before the transform is used
void reset() override
removes all elements from the database.
virtual void reset()=0
removes all elements from the database.
int niter_pq
Number of training iterations for the PQ.
std::vector< float > A
! whether to use the bias term
const float * fvecs_maybe_subsample(size_t d, size_t *n, size_t nmax, const float *x, bool verbose, long seed)
Definition: utils.cpp:1941
LinearTransform(int d_in=0, int d_out=0, bool have_bias=false)
both d_in &gt; d_out and d_out &lt; d_in are supported
virtual void add_with_ids(idx_t n, const float *x, const long *xids)
Definition: Index.cpp:31
void train(Index::idx_t n, const float *x) override
std::vector< float > mean
Mean, size d_in.
const float * apply_chain(idx_t n, const float *x) const
std::vector< float > PCAMat
PCA matrix, size d_in * d_in.
void train(idx_t n, const float *x) override
void compute_codes(const float *x, uint8_t *codes, size_t n) const
same as compute_code for several vectors
int d
vector dimension
Definition: Index.h:64
std::vector< float > b
bias vector, size d_out
virtual void reconstruct_n(idx_t i0, idx_t ni, float *recons) const
Definition: Index.cpp:50
virtual void add(idx_t n, const float *x)=0
void train(Index::idx_t n, const float *x) override
int balanced_bins
try to distribute output eigenvectors in this many bins
long idx_t
all indices are this type
Definition: Index.h:62
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:65
void apply_noalloc(idx_t n, const float *x, float *xt) const override
same as apply, but result is pre-allocated
the centroids are already initialized
double getmillisecs()
ms elapsed since some arbitrary epoch
Definition: utils.cpp:70
virtual long remove_ids(const IDSelector &sel)
Definition: Index.cpp:37
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
void matrix_qr(int m, int n, float *a)
Definition: utils.cpp:1289
bool own_fields
! the sub-index
int niter_pq_0
same, for the first outer iteration
void reverse_transform(idx_t n, const float *xt, float *x) const override
ClusteringParameters cp
parameters used during clustering
size_t ivec_checksum(size_t n, const int *a)
compute a checksum on a table.
Definition: utils.cpp:1638
virtual void train(idx_t n, const float *x)
void reverse_transform(idx_t n, const float *xt, float *x) const override
virtual void reverse_transform(idx_t n, const float *xt, float *x) const
void reverse_transform(idx_t n, const float *xt, float *x) const override
reverse transform correct only when the mapping is a permuation
void reverse_transform(idx_t n, const float *xt, float *x) const override
size_t max_train_points
if there are too many training points, resample
void copy_from(const PCAMatrix &other)
copy pre-trained PCA matrix
int d_out
! input dimension
OPQMatrix(int d=0, int M=1, int d2=-1)
if d2 != -1, output vectors of this dimension
void prepare_Ab()
called after mean, PCAMat and eigenvalues are computed
void add(idx_t n, const float *x) override
void apply_noalloc(idx_t n, const float *x, float *xt) const override
same as apply, but result is pre-allocated
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:69
std::vector< float > eigenvalues
eigenvalues of covariance matrix (= squared singular values)
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
virtual void train(idx_t n, const float *x)
Definition: Index.h:89
void add_with_ids(idx_t n, const float *x, const long *xids) override
bool random_rotation
random rotation after PCA
size_t max_points_per_d
ratio between # training vectors and dimension
int max_points_per_centroid
to limit size of dataset
Definition: Clustering.h:33
float * apply(idx_t n, const float *x) const
long remove_ids(const IDSelector &sel) override
virtual void apply_noalloc(idx_t n, const float *x, float *xt) const =0
same as apply, but result is pre-allocated
int M
nb of subquantizers
void apply_noalloc(idx_t n, const float *x, float *xt) const override
same as apply, but result is pre-allocated