Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
VectorTransform.cpp
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved
11 // -*- c++ -*-
12 
13 #include "VectorTransform.h"
14 
15 #include <cstdio>
16 #include <cmath>
17 #include <cstring>
18 
19 #include "utils.h"
20 #include "FaissAssert.h"
21 #include "IndexPQ.h"
22 
23 using namespace faiss;
24 
25 
26 extern "C" {
27 
28 // this is to keep the clang syntax checker happy
29 #ifndef FINTEGER
30 #define FINTEGER int
31 #endif
32 
33 
34 /* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
35 
36 int sgemm_ (
37  const char *transa, const char *transb, FINTEGER *m, FINTEGER *
38  n, FINTEGER *k, const float *alpha, const float *a,
39  FINTEGER *lda, const float *b,
40  FINTEGER *ldb, float *beta,
41  float *c, FINTEGER *ldc);
42 
43 int ssyrk_ (
44  const char *uplo, const char *trans, FINTEGER *n, FINTEGER *k,
45  float *alpha, float *a, FINTEGER *lda,
46  float *beta, float *c, FINTEGER *ldc);
47 
48 /* Lapack functions from http://www.netlib.org/clapack/old/single/ */
49 
50 int ssyev_ (
51  const char *jobz, const char *uplo, FINTEGER *n, float *a,
52  FINTEGER *lda, float *w, float *work, FINTEGER *lwork,
53  FINTEGER *info);
54 
55 int sgesvd_(
56  const char *jobu, const char *jobvt, FINTEGER *m, FINTEGER *n,
57  float *a, FINTEGER *lda, float *s, float *u, FINTEGER *ldu, float *vt,
58  FINTEGER *ldvt, float *work, FINTEGER *lwork, FINTEGER *info);
59 
60 }
61 
62 /*********************************************
63  * VectorTransform
64  *********************************************/
65 
66 
67 
68 float * VectorTransform::apply (Index::idx_t n, const float * x) const
69 {
70  float * xt = new float[n * d_out];
71  apply_noalloc (n, x, xt);
72  return xt;
73 }
74 
75 
76 void VectorTransform::train (idx_t, const float *) {
77  // does nothing by default
78 }
79 
80 
82  idx_t , const float *,
83  float *) const
84 {
85  FAISS_ASSERT (!"reverse transform not implemented");
86 }
87 
88 
89 
90 
91 /*********************************************
92  * LinearTransform
93  *********************************************/
94 /// both d_in > d_out and d_out < d_in are supported
95 LinearTransform::LinearTransform (int d_in, int d_out,
96  bool have_bias):
97  VectorTransform (d_in, d_out), have_bias (have_bias),
98  max_points_per_d (1 << 20), verbose (false)
99 {}
100 
102  float * xt) const
103 {
104  FAISS_ASSERT(is_trained || !"Transformation not trained yet");
105 
106  float c_factor;
107  if (have_bias) {
108  FAISS_ASSERT (b.size() == d_out || !"Bias not initialized");
109  float * xi = xt;
110  for (int i = 0; i < n; i++)
111  for(int j = 0; j < d_out; j++)
112  *xi++ = b[j];
113  c_factor = 1.0;
114  } else {
115  c_factor = 0.0;
116  }
117 
118  FAISS_ASSERT (A.size() == d_out * d_in ||
119  !"Transformation matrix not initialized");
120 
121  float one = 1;
122  FINTEGER nbiti = d_out, ni = n, di = d_in;
123  sgemm_ ("Transposed", "Not transposed",
124  &nbiti, &ni, &di,
125  &one, A.data(), &di, x, &di, &c_factor, xt, &nbiti);
126 
127 }
128 
129 
130 void LinearTransform::transform_transpose (idx_t n, const float * y,
131  float *x) const
132 {
133  if (have_bias) { // allocate buffer to store bias-corrected data
134  float *y_new = new float [n * d_out];
135  const float *yr = y;
136  float *yw = y_new;
137  for (idx_t i = 0; i < n; i++) {
138  for (int j = 0; j < d_out; j++) {
139  *yw++ = *yr++ - b [j];
140  }
141  }
142  y = y_new;
143  }
144 
145  {
146  FINTEGER dii = d_in, doi = d_out, ni = n;
147  float one = 1.0, zero = 0.0;
148  sgemm_ ("Not", "Not", &dii, &ni, &doi,
149  &one, A.data (), &dii, y, &doi, &zero, x, &dii);
150  }
151 
152  if (have_bias) delete [] y;
153 }
154 
155 const float * LinearTransform::maybe_subsample_train_set (
156  Index::idx_t *n, const float *x)
157 {
158  if (*n <= max_points_per_d * d_in) return x;
159 
160  size_t n2 = max_points_per_d * d_in;
161  if (verbose) {
162  printf (" Input training set too big, sampling "
163  "%ld / %ld vectors\n", n2, *n);
164  }
165  std::vector<int> subset (*n);
166  rand_perm (subset.data (), *n, 1234);
167  float *x_subset = new float[n2 * d_in];
168  for (long i = 0; i < n2; i++)
169  memcpy (&x_subset[i * d_in],
170  &x[subset[i] * size_t(d_in)],
171  sizeof (x[0]) * d_in);
172  *n = n2;
173  return x_subset;
174 }
175 
176 
177 /*********************************************
178  * RandomRotationMatrix
179  *********************************************/
180 
182 {
183 
184  if(d_out <= d_in) {
185  A.resize (d_out * d_in);
186  float *q = A.data();
187  float_randn(q, d_out * d_in, seed);
188  matrix_qr(d_in, d_out, q);
189  } else {
190  A.resize (d_out * d_out);
191  float *q = A.data();
192  float_randn(q, d_out * d_out, seed);
193  matrix_qr(d_out, d_out, q);
194  // remove columns
195  int i, j;
196  for (i = 0; i < d_out; i++) {
197  for(j = 0; j < d_in; j++) {
198  q[i * d_in + j] = q[i * d_out + j];
199  }
200  }
201  A.resize(d_in * d_out);
202  }
203 
204 }
205 
206 void RandomRotationMatrix::reverse_transform (idx_t n, const float * xt,
207  float *x) const
208 {
209  transform_transpose (n, xt, x);
210 }
211 
212 /*********************************************
213  * PCAMatrix
214  *********************************************/
215 
216 PCAMatrix::PCAMatrix (int d_in, int d_out,
217  float eigen_power, bool random_rotation):
218  LinearTransform(d_in, d_out, true),
219  eigen_power(eigen_power), random_rotation(random_rotation)
220 {
221  is_trained = false;
222  max_points_per_d = 1000;
223  balanced_bins = 0;
224 }
225 
226 
227 void PCAMatrix::train (Index::idx_t n, const float *x)
228 {
229  const float * x_in = x;
230 
231  x = maybe_subsample_train_set(&n, x);
232 
233  // compute mean
234  mean.clear(); mean.resize(d_in, 0.0);
235  if (have_bias) { // we may want to skip the bias
236  const float *xi = x;
237  for (int i = 0; i < n; i++) {
238  for(int j = 0; j < d_in; j++)
239  mean[j] += *xi++;
240  }
241  for(int j = 0; j < d_in; j++)
242  mean[j] /= n;
243  }
244  if(verbose) {
245  printf("mean=[");
246  for(int j = 0; j < d_in; j++) printf("%g ", mean[j]);
247  printf("]\n");
248  }
249 
250  if(n >= d_in) {
251  // compute covariance matrix, store it in PCA matrix
252  PCAMat.resize(d_in * d_in);
253  float * cov = PCAMat.data();
254  { // initialize with mean * mean^T term
255  float *ci = cov;
256  for(int i = 0; i < d_in; i++) {
257  for(int j = 0; j < d_in; j++)
258  *ci++ = - n * mean[i] * mean[j];
259  }
260  }
261  {
262  FINTEGER di = d_in, ni = n;
263  float one = 1.0;
264  ssyrk_ ("Up", "Non transposed",
265  &di, &ni, &one, (float*)x, &di, &one, cov, &di);
266 
267  }
268  if(verbose && d_in <= 10) {
269  float *ci = cov;
270  printf("cov=\n");
271  for(int i = 0; i < d_in; i++) {
272  for(int j = 0; j < d_in; j++)
273  printf("%10g ", *ci++);
274  printf("\n");
275  }
276  }
277 
278  { // compute eigenvalues and vectors
279  eigenvalues.resize(d_in);
280  FINTEGER info = 0, lwork = -1, di = d_in;
281  float workq;
282 
283  ssyev_ ("Vectors as well", "Upper",
284  &di, cov, &di, eigenvalues.data(), &workq, &lwork, &info);
285  lwork = FINTEGER(workq);
286  float *work = new float[lwork];
287 
288  ssyev_ ("Vectors as well", "Upper",
289  &di, cov, &di, eigenvalues.data(), work, &lwork, &info);
290 
291  if (info != 0) {
292  fprintf (stderr, "WARN ssyev info returns %d, "
293  "a very bad PCA matrix is learnt\n",
294  int(info));
295 
296  }
297 
298  delete [] work;
299 
300  if(verbose && d_in <= 10) {
301  printf("info=%ld new eigvals=[", long(info));
302  for(int j = 0; j < d_in; j++) printf("%g ", eigenvalues[j]);
303  printf("]\n");
304 
305  float *ci = cov;
306  printf("eigenvecs=\n");
307  for(int i = 0; i < d_in; i++) {
308  for(int j = 0; j < d_in; j++)
309  printf("%10.4g ", *ci++);
310  printf("\n");
311  }
312  }
313 
314  }
315 
316  // revert order of eigenvectors & values
317 
318  for(int i = 0; i < d_in / 2; i++) {
319 
320  std::swap(eigenvalues[i], eigenvalues[d_in - 1 - i]);
321  float *v1 = cov + i * d_in;
322  float *v2 = cov + (d_in - 1 - i) * d_in;
323  for(int j = 0; j < d_in; j++)
324  std::swap(v1[j], v2[j]);
325  }
326 
327  } else {
328  FAISS_ASSERT(!"Gramm matrix version not implemented "
329  "-- provide more training examples than dimensions");
330  }
331 
332 
333  if (x != x_in) delete [] x;
334 
335  prepare_Ab();
336  is_trained = true;
337 }
338 
339 void PCAMatrix::copy_from (const PCAMatrix & other)
340 {
341  FAISS_ASSERT (other.is_trained);
342  mean = other.mean;
343  eigenvalues = other.eigenvalues;
344  PCAMat = other.PCAMat;
345  prepare_Ab ();
346  is_trained = true;
347 }
348 
350 {
351 
352  if (!random_rotation) {
353  A = PCAMat;
354  A.resize(d_out * d_in); // strip off useless dimensions
355 
356  // first scale the components
357  if (eigen_power != 0) {
358  float *ai = A.data();
359  for (int i = 0; i < d_out; i++) {
360  float factor = pow(eigenvalues[i], eigen_power);
361  for(int j = 0; j < d_in; j++)
362  *ai++ *= factor;
363  }
364  }
365 
366  if (balanced_bins != 0) {
367  FAISS_ASSERT (d_out % balanced_bins == 0);
368  int dsub = d_out / balanced_bins;
369  std::vector <float> Ain;
370  std::swap(A, Ain);
371  A.resize(d_out * d_in);
372 
373  std::vector <float> accu(balanced_bins);
374  std::vector <int> counter(balanced_bins);
375 
376  // greedy assignment
377  for (int i = 0; i < d_out; i++) {
378  // find best bin
379  int best_j = -1;
380  float min_w = 1e30;
381  for (int j = 0; j < balanced_bins; j++) {
382  if (counter[j] < dsub && accu[j] < min_w) {
383  min_w = accu[j];
384  best_j = j;
385  }
386  }
387  int row_dst = best_j * dsub + counter[best_j];
388  accu[best_j] += eigenvalues[i];
389  counter[best_j] ++;
390  memcpy (&A[row_dst * d_in], &Ain[i * d_in],
391  d_in * sizeof (A[0]));
392  }
393 
394  if (verbose) {
395  printf(" bin accu=[");
396  for (int i = 0; i < balanced_bins; i++)
397  printf("%g ", accu[i]);
398  printf("]\n");
399  }
400  }
401 
402 
403  } else {
404  FAISS_ASSERT (balanced_bins == 0 ||
405  !"both balancing bins and applying a random rotation "
406  "does not make sense");
408 
409  rr.init(5);
410 
411  // apply scaling on the rotation matrix (right multiplication)
412  if (eigen_power != 0) {
413  for (int i = 0; i < d_out; i++) {
414  float factor = pow(eigenvalues[i], eigen_power);
415  for(int j = 0; j < d_out; j++)
416  rr.A[j * d_out + i] *= factor;
417  }
418  }
419 
420  A.resize(d_in * d_out);
421  {
422  FINTEGER dii = d_in, doo = d_out;
423  float one = 1.0, zero = 0.0;
424 
425  sgemm_ ("Not", "Not", &dii, &doo, &doo,
426  &one, PCAMat.data(), &dii, rr.A.data(), &doo, &zero,
427  A.data(), &dii);
428 
429  }
430 
431  }
432 
433  b.clear(); b.resize(d_out);
434 
435  for (int i = 0; i < d_out; i++) {
436  float accu = 0;
437  for (int j = 0; j < d_in; j++)
438  accu -= mean[j] * A[j + i * d_in];
439  b[i] = accu;
440  }
441 
442 }
443 
444 void PCAMatrix::reverse_transform (idx_t n, const float * xt,
445  float *x) const
446 {
447  FAISS_ASSERT (eigen_power == 0 ||
448  !"reverse only implemented for orthogonal transforms");
449  transform_transpose (n, xt, x);
450 }
451 
452 /*********************************************
453  * OPQMatrix
454  *********************************************/
455 
456 
457 OPQMatrix::OPQMatrix (int d, int M, int d2):
458  LinearTransform (d, d2 == -1 ? d : d2, false), M(M),
459  niter (50),
460  niter_pq (4), niter_pq_0 (40),
461  verbose(false)
462 {
463  is_trained = false;
464  max_points_per_d = 1000;
465 }
466 
467 
468 
469 void OPQMatrix::train (Index::idx_t n, const float *x)
470 {
471 
472  const float * x_in = x;
473 
474  x = maybe_subsample_train_set (&n, x);
475 
476  // To support d_out > d_in, we pad input vectors with 0s to d_out
477  size_t d = d_out <= d_in ? d_in : d_out;
478  size_t d2 = d_out;
479 
480 #if 0
481  // what this test shows: the only way of getting bit-exact
482  // reproducible results with sgeqrf and sgesvd seems to be forcing
483  // single-threading.
484  { // test repro
485  std::vector<float> r (d * d);
486  float * rotation = r.data();
487  float_randn (rotation, d * d, 1234);
488  printf("CS0: %016lx\n",
489  ivec_checksum (128*128, (int*)rotation));
490  matrix_qr (d, d, rotation);
491  printf("CS1: %016lx\n",
492  ivec_checksum (128*128, (int*)rotation));
493  return;
494  }
495 #endif
496 
497  if (verbose) {
498  printf ("OPQMatrix::train: training an OPQ rotation matrix "
499  "for M=%d from %ld vectors in %dD -> %dD\n",
500  M, n, d_in, d_out);
501  }
502 
503  std::vector<float> xtrain (n * d);
504  // center x
505  {
506  std::vector<float> sum (d);
507  const float *xi = x;
508  for (size_t i = 0; i < n; i++) {
509  for (int j = 0; j < d_in; j++)
510  sum [j] += *xi++;
511  }
512  for (int i = 0; i < d; i++) sum[i] /= n;
513  float *yi = xtrain.data();
514  xi = x;
515  for (size_t i = 0; i < n; i++) {
516  for (int j = 0; j < d_in; j++)
517  *yi++ = *xi++ - sum[j];
518  yi += d - d_in;
519  }
520  }
521  float *rotation;
522 
523  if (A.size () == 0) {
524  A.resize (d * d);
525  rotation = A.data();
526  if (verbose)
527  printf(" OPQMatrix::train: making random %ld*%ld rotation\n",
528  d, d);
529  float_randn (rotation, d * d, 1234);
530  matrix_qr (d, d, rotation);
531  // we use only the d * d2 upper part of the matrix
532  A.resize (d * d2);
533  } else {
534  FAISS_ASSERT (A.size() == d * d2);
535  rotation = A.data();
536  }
537 
538 
539  std::vector<float>
540  xproj (d2 * n), pq_recons (d2 * n), xxr (d * n),
541  tmp(d * d * 4);
542 
543  std::vector<uint8_t> codes (M * n);
544  ProductQuantizer pq_regular (d2, M, 8);
545  double t0 = getmillisecs();
546  for (int iter = 0; iter < niter; iter++) {
547 
548  { // torch.mm(xtrain, rotation:t())
549  FINTEGER di = d, d2i = d2, ni = n;
550  float zero = 0, one = 1;
551  sgemm_ ("Transposed", "Not transposed",
552  &d2i, &ni, &di,
553  &one, rotation, &di,
554  xtrain.data(), &di,
555  &zero, xproj.data(), &d2i);
556  }
557 
558  pq_regular.cp.max_points_per_centroid = 1000;
559  pq_regular.cp.niter = iter == 0 ? niter_pq_0 : niter_pq;
560  pq_regular.cp.verbose = verbose;
561  pq_regular.train (n, xproj.data());
562 
563  pq_regular.compute_codes (xproj.data(), codes.data(), n);
564  pq_regular.decode (codes.data(), pq_recons.data(), n);
565 
566  float pq_err = fvec_L2sqr (pq_recons.data(), xproj.data(), n * d2) / n;
567 
568  if (verbose)
569  printf (" Iteration %d (%d PQ iterations):"
570  "%.3f s, obj=%g\n", iter, pq_regular.cp.niter,
571  (getmillisecs () - t0) / 1000.0, pq_err);
572 
573  {
574  float *u = tmp.data(), *vt = &tmp [d * d];
575  float *sing_val = &tmp [2 * d * d];
576  FINTEGER di = d, d2i = d2, ni = n;
577  float one = 1, zero = 0;
578 
579  // torch.mm(xtrain:t(), pq_recons)
580  sgemm_ ("Not", "Transposed",
581  &d2i, &di, &ni,
582  &one, pq_recons.data(), &d2i,
583  xtrain.data(), &di,
584  &zero, xxr.data(), &d2i);
585 
586 
587  FINTEGER lwork = -1, info = -1;
588  float worksz;
589  // workspace query
590  sgesvd_ ("All", "All",
591  &d2i, &di, xxr.data(), &d2i,
592  sing_val,
593  vt, &d2i, u, &di,
594  &worksz, &lwork, &info);
595 
596  lwork = int(worksz);
597  std::vector<float> work (lwork);
598  // u and vt swapped
599  sgesvd_ ("All", "All",
600  &d2i, &di, xxr.data(), &d2i,
601  sing_val,
602  vt, &d2i, u, &di,
603  work.data(), &lwork, &info);
604 
605  sgemm_ ("Transposed", "Transposed",
606  &di, &d2i, &d2i,
607  &one, u, &di, vt, &d2i,
608  &zero, rotation, &di);
609 
610  }
611  pq_regular.train_type = ProductQuantizer::Train_hot_start;
612  }
613 
614  // revert A matrix
615  if (d > d_in) {
616  for (long i = 0; i < d_out; i++)
617  memmove (&A[i * d_in], &A[i * d], sizeof(A[0]) * d_in);
618  A.resize (d_in * d_out);
619  }
620 
621  if (x != x_in)
622  delete [] x;
623 
624  is_trained = true;
625 }
626 
627 
628 
629 
630 void OPQMatrix::reverse_transform (idx_t n, const float * xt,
631  float *x) const
632 {
633  transform_transpose (n, xt, x);
634 }
635 
636 /*********************************************
637  * IndexPreTransform
638  *********************************************/
639 
640 IndexPreTransform::IndexPreTransform ():
641  index(nullptr), own_fields (false)
642 {
643 }
644 
645 
646 IndexPreTransform::IndexPreTransform (
647  Index * index):
648  Index (index->d, index->metric_type),
649  index (index), own_fields (false)
650 {
651  is_trained = index->is_trained;
652  set_typename();
653 }
654 
655 
656 
657 
658 IndexPreTransform::IndexPreTransform (
659  VectorTransform * ltrans,
660  Index * index):
661  Index (index->d, index->metric_type),
662  index (index), own_fields (false)
663 {
664  is_trained = index->is_trained;
665  prepend_transform (ltrans);
666  set_typename();
667 }
668 
669 void IndexPreTransform::prepend_transform (VectorTransform *ltrans)
670 {
671  FAISS_ASSERT (ltrans->d_out == d);
672  is_trained = is_trained && ltrans->is_trained;
673  chain.insert (chain.begin(), ltrans);
674  d = ltrans->d_in;
675  set_typename ();
676 }
677 
678 
679 void IndexPreTransform::set_typename ()
680 {
681  // TODO correct this according to actual type
682  index_typename = "PreLT[" + index->index_typename + "]";
683 }
684 
685 
686 IndexPreTransform::~IndexPreTransform ()
687 {
688  if (own_fields) {
689  for (int i = 0; i < chain.size(); i++)
690  delete chain[i];
691  delete index;
692  }
693 }
694 
695 
696 
697 
698 void IndexPreTransform::train (idx_t n, const float *x)
699 {
700  int last_untrained = 0;
701  for (int i = 0; i < chain.size(); i++)
702  if (!chain[i]->is_trained) last_untrained = i;
703  if (!index->is_trained) last_untrained = chain.size();
704  const float *prev_x = x;
705 
706  for (int i = 0; i <= last_untrained; i++) {
707  if (i < chain.size()) {
708  VectorTransform *ltrans = chain [i];
709  if (!ltrans->is_trained)
710  ltrans->train(n, prev_x);
711  } else {
712  index->train (n, prev_x);
713  }
714  if (i == last_untrained) break;
715 
716  float * xt = chain[i]->apply (n, prev_x);
717  if (prev_x != x) delete [] prev_x;
718  prev_x = xt;
719  }
720 
721  if (prev_x != x) delete [] prev_x;
722  is_trained = true;
723 }
724 
725 
726 const float *IndexPreTransform::apply_chain (idx_t n, const float *x) const
727 {
728  const float *prev_x = x;
729  for (int i = 0; i < chain.size(); i++) {
730  float * xt = chain[i]->apply (n, prev_x);
731  if (prev_x != x) delete [] prev_x;
732  prev_x = xt;
733  }
734  return prev_x;
735 }
736 
737 void IndexPreTransform::add (idx_t n, const float *x)
738 {
739  FAISS_ASSERT (is_trained);
740  const float *xt = apply_chain (n, x);
741  index->add (n, xt);
742  if (xt != x) delete [] xt;
743  ntotal = index->ntotal;
744 }
745 
746 void IndexPreTransform::add_with_ids (idx_t n, const float * x,
747  const long *xids)
748 {
749  FAISS_ASSERT (is_trained);
750  const float *xt = apply_chain (n, x);
751  index->add_with_ids (n, xt, xids);
752  if (xt != x) delete [] xt;
753  ntotal = index->ntotal;
754 }
755 
756 
757 
758 
759 void IndexPreTransform::search (idx_t n, const float *x, idx_t k,
760  float *distances, idx_t *labels) const
761 {
762  FAISS_ASSERT (is_trained);
763  const float *xt = apply_chain (n, x);
764  index->search (n, xt, k, distances, labels);
765  if (xt != x) delete [] xt;
766 }
767 
768 
770  index->reset();
771  ntotal = 0;
772 }
773 
775  long nremove = index->remove_ids (sel);
776  ntotal = index->ntotal;
777  return nremove;
778 }
779 
780 
781 void IndexPreTransform::reconstruct_n (idx_t i0, idx_t ni, float *recons) const
782 {
783  float *x = chain.empty() ? recons : new float [ni * index->d];
784  // initial reconstruction
785  index->reconstruct_n (i0, ni, x);
786 
787  // revert transformations from last to first
788  for (int i = chain.size() - 1; i >= 0; i--) {
789  float *x_pre = i == 0 ? recons : new float [chain[i]->d_in * ni];
790  chain [i]->reverse_transform (ni, x, x_pre);
791  delete [] x;
792  x = x_pre;
793  }
794 }
795 
796 
797 
798 /*********************************************
799  * RemapDimensionsTransform
800  *********************************************/
801 
802 
803 RemapDimensionsTransform::RemapDimensionsTransform (
804  int d_in, int d_out, const int *map_in):
805  VectorTransform (d_in, d_out)
806 {
807  map.resize (d_out);
808  for (int i = 0; i < d_out; i++) {
809  map[i] = map_in[i];
810  FAISS_ASSERT (map[i] == -1 || (map[i] >= 0 && map[i] < d_in));
811  }
812 }
813 
814 RemapDimensionsTransform::RemapDimensionsTransform (
815  int d_in, int d_out, bool uniform): VectorTransform (d_in, d_out)
816 {
817  map.resize (d_out, -1);
818 
819  if (uniform) {
820  if (d_in < d_out) {
821  for (int i = 0; i < d_in; i++) {
822  map [i * d_out / d_in] = i;
823  }
824  } else {
825  for (int i = 0; i < d_out; i++) {
826  map [i] = i * d_in / d_out;
827  }
828  }
829  } else {
830  for (int i = 0; i < d_in && i < d_out; i++)
831  map [i] = i;
832  }
833 }
834 
835 
836 void RemapDimensionsTransform::apply_noalloc (idx_t n, const float * x,
837  float *xt) const
838 {
839  for (idx_t i = 0; i < n; i++) {
840  for (int j = 0; j < d_out; j++) {
841  xt[j] = map[j] < 0 ? 0 : x[map[j]];
842  }
843  x += d_in;
844  xt += d_out;
845  }
846 }
847 
848 void RemapDimensionsTransform::reverse_transform (idx_t n, const float * xt,
849  float *x) const
850 {
851  memset (x, 0, sizeof (*x) * n * d_in);
852  for (idx_t i = 0; i < n; i++) {
853  for (int j = 0; j < d_out; j++) {
854  if (map[j] >= 0) x[map[j]] = xt[j];
855  }
856  x += d_in;
857  xt += d_out;
858  }
859 }
void transform_transpose(idx_t n, const float *y, float *x) const
Index * index
! chain of tranforms
Randomly rotate a set of vectors.
int niter
clustering iterations
Definition: Clustering.h:26
int niter
Number of outer training iterations.
void decode(const uint8_t *code, float *x) const
decode a vector from a given code (or n vectors if third argument)
float fvec_L2sqr(const float *x, const float *y, size_t d)
Squared L2 distance between two vectors.
Definition: utils.cpp:430
void init(int seed)
must be called before the transform is used
virtual void reset() override
removes all elements from the database.
virtual void reset()=0
removes all elements from the database.
int niter_pq
Number of training iterations for the PQ.
std::vector< float > A
! whether to use the bias term
LinearTransform(int d_in=0, int d_out=0, bool have_bias=false)
both d_in &gt; d_out and d_out &lt; d_in are supported
virtual void add_with_ids(idx_t n, const float *x, const long *xids)
Definition: Index.cpp:32
virtual void train(Index::idx_t n, const float *x) override
std::vector< float > mean
Mean, size d_in.
const float * apply_chain(idx_t n, const float *x) const
std::vector< float > PCAMat
PCA matrix, size d_in * d_in.
virtual void train(idx_t n, const float *x) override
void compute_codes(const float *x, uint8_t *codes, size_t n) const
same as compute_code for several vectors
int d
vector dimension
Definition: Index.h:66
std::vector< float > b
bias vector, size d_out
int max_points_per_d
if there are too many training points, resample
virtual void reconstruct_n(idx_t i0, idx_t ni, float *recons) const
Definition: Index.cpp:50
virtual void add(idx_t n, const float *x)=0
virtual void train(Index::idx_t n, const float *x) override
int balanced_bins
try to distribute output eigenvectors in this many bins
long idx_t
all indices are this type
Definition: Index.h:64
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
the centroids are already initialized
double getmillisecs()
ms elapsed since some arbitrary epoch
Definition: utils.cpp:71
virtual long remove_ids(const IDSelector &sel)
Definition: Index.cpp:38
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
virtual void apply_noalloc(idx_t n, const float *x, float *xt) const
same as apply, but result is pre-allocated
void matrix_qr(int m, int n, float *a)
Definition: utils.cpp:1206
bool own_fields
! the sub-index
virtual void reverse_transform(idx_t n, const float *xt, float *x) const override
ClusteringParameters cp
parameters used during clustering
size_t ivec_checksum(size_t n, const int *a)
compute a checksum on a table.
Definition: utils.cpp:1490
virtual void train(idx_t n, const float *x)
virtual void reverse_transform(idx_t n, const float *xt, float *x) const override
virtual void reverse_transform(idx_t n, const float *xt, float *x) const
virtual void reverse_transform(idx_t n, const float *xt, float *x) const override
reverse transform correct only when the mapping is a permuation
virtual void reverse_transform(idx_t n, const float *xt, float *x) const override
void copy_from(const PCAMatrix &other)
copy pre-trained PCA matrix
int d_out
! input dimension
OPQMatrix(int d=0, int M=1, int d2=-1)
if d2 != -1, output vectors of this dimension
void prepare_Ab()
called after mean, PCAMat and eigenvalues are computed
virtual void add(idx_t n, const float *x) override
virtual void apply_noalloc(idx_t n, const float *x, float *xt) const override
same as apply, but result is pre-allocated
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:71
std::vector< float > eigenvalues
eigenvalues of covariance matrix (= squared singular values)
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
virtual void train(idx_t n, const float *x)
Definition: Index.h:92
virtual void add_with_ids(idx_t n, const float *x, const long *xids) override
bool random_rotation
random rotation after PCA
int max_points_per_centroid
to limit size of dataset
Definition: Clustering.h:35
float * apply(idx_t n, const float *x) const
virtual long remove_ids(const IDSelector &sel) override
virtual void apply_noalloc(idx_t n, const float *x, float *xt) const =0
same as apply, but result is pre-allocated
int M
nb of subquantizers