Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/IndexLSH.cpp
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 
12 #include "IndexLSH.h"
13 
14 #include <cstdio>
15 #include <cstring>
16 
17 #include <algorithm>
18 
19 #include "utils.h"
20 #include "hamming.h"
21 #include "FaissAssert.h"
22 
23 namespace faiss {
24 
25 /***************************************************************
26  * IndexLSH
27  ***************************************************************/
28 
29 
30 IndexLSH::IndexLSH (idx_t d, int nbits, bool rotate_data, bool train_thresholds):
31  Index(d), nbits(nbits), rotate_data(rotate_data),
32  train_thresholds (train_thresholds), rrot(d, nbits)
33 {
34  is_trained = !train_thresholds;
35 
36  bytes_per_vec = (nbits + 7) / 8;
37 
38  if (rotate_data) {
39  rrot.init(5);
40  } else {
41  FAISS_ASSERT(d >= nbits);
42  }
43  set_typename();
44 }
45 
46 IndexLSH::IndexLSH ():
47  nbits (0), bytes_per_vec(0), rotate_data (false), train_thresholds (false)
48 {
49 }
50 
51 
52 void IndexLSH::set_typename()
53 {
54  std::stringstream s;
55  s << "LSH_" << nbits << (rotate_data ? "r" : "");
56  index_typename = s.str();
57 }
58 
59 const float * IndexLSH::apply_preprocess (idx_t n, const float *x) const
60 {
61 
62  float *xt = nullptr;
63  if (rotate_data) {
64  // also applies bias if exists
65  xt = rrot.apply (n, x);
66  } else if (d != nbits) {
67  xt = new float [nbits * n];
68  float *xp = xt;
69  for (idx_t i = 0; i < n; i++) {
70  const float *xl = x + i * d;
71  for (int j = 0; j < nbits; j++)
72  *xp++ = xl [j];
73  }
74  }
75 
76  if (train_thresholds) {
77 
78  if (xt == NULL) {
79  xt = new float [nbits * n];
80  memcpy (xt, x, sizeof(*x) * n * nbits);
81  }
82 
83  float *xp = xt;
84  for (idx_t i = 0; i < n; i++)
85  for (int j = 0; j < nbits; j++)
86  *xp++ -= thresholds [j];
87  }
88 
89  return xt ? xt : x;
90 }
91 
92 
93 
94 void IndexLSH::train (idx_t n, const float *x)
95 {
96  if (train_thresholds) {
97  thresholds.resize (nbits);
98  train_thresholds = false;
99  const float *xt = apply_preprocess (n, x);
100  train_thresholds = true;
101 
102  float * transposed_x = new float [n * nbits];
103 
104  for (idx_t i = 0; i < n; i++)
105  for (idx_t j = 0; j < nbits; j++)
106  transposed_x [j * n + i] = xt [i * nbits + j];
107  if (xt != x) delete [] xt;
108 
109  for (idx_t i = 0; i < nbits; i++) {
110  float *xi = transposed_x + i * n;
111  // std::nth_element
112  std::sort (xi, xi + n);
113  if (n % 2 == 1)
114  thresholds [i] = xi [n / 2];
115  else
116  thresholds [i] = (xi [n / 2 - 1] + xi [n / 2]) / 2;
117 
118  }
119  }
120  is_trained = true;
121 }
122 
123 
124 void IndexLSH::add (idx_t n, const float *x)
125 {
126  FAISS_ASSERT (is_trained);
127  const float *xt = apply_preprocess (n, x);
128 
129  codes.resize ((ntotal + n) * bytes_per_vec);
130  fvecs2bitvecs (xt, &codes[ntotal * bytes_per_vec], nbits, n);
131  if (x != xt)
132  delete [] xt;
133  ntotal += n;
134 }
135 
136 
138  idx_t n,
139  const float *x,
140  idx_t k,
141  float *distances,
142  idx_t *labels) const
143 {
144  FAISS_ASSERT (is_trained);
145  const float *xt = apply_preprocess (n, x);
146 
147  uint8_t * qcodes = new uint8_t [n * bytes_per_vec];
148  fvecs2bitvecs (xt, qcodes, nbits, n);
149 
150  if (x != xt)
151  delete [] xt;
152 
153  int * idistances = new int [n * k];
154  int_maxheap_array_t res = { size_t(n), size_t(k), labels, idistances};
155 
156  hammings_knn (&res, qcodes, codes.data(),
157  ntotal, bytes_per_vec, true);
158 
159  delete [] qcodes;
160 
161  // convert distances to floats
162  for (int i = 0; i < k * n; i++)
163  distances[i] = idistances[i];
164  delete [] idistances;
165 
166 }
167 
168 
170  if (!train_thresholds) return;
171  FAISS_ASSERT (nbits == vt->d_out);
172  if (!vt->have_bias) {
173  vt->b.resize (nbits, 0);
174  vt->have_bias = true;
175  }
176  for (int i = 0; i < nbits; i++)
177  vt->b[i] -= thresholds[i];
178  train_thresholds = false;
179  thresholds.clear();
180 }
181 
183  codes.clear();
184  ntotal = 0;
185 }
186 
187 
188 
189 } // namespace faiss
int bytes_per_vec
nb of 8-bits per encoded vector
Definition: IndexLSH.h:29
std::vector< float > thresholds
thresholds to compare with
Definition: IndexLSH.h:35
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexLSH.cpp:137
int d
vector dimension
Definition: Index.h:66
std::vector< float > b
bias vector, size d_out
RandomRotationMatrix rrot
optional random rotation
Definition: IndexLSH.h:33
void transfer_thresholds(LinearTransform *vt)
Definition: IndexLSH.cpp:169
long idx_t
all indices are this type
Definition: Index.h:64
void hammings_knn(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t ncodes, int order)
Definition: hamming.cpp:471
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
virtual void reset() override
removes all elements from the database.
Definition: IndexLSH.cpp:182
virtual void add(idx_t n, const float *x) override
Definition: IndexLSH.cpp:124
virtual void train(idx_t n, const float *x) override
Definition: IndexLSH.cpp:94
int d_out
! input dimension
int nbits
nb of bits per vector
Definition: IndexLSH.h:28
const float * apply_preprocess(idx_t n, const float *x) const
Definition: IndexLSH.cpp:59
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:71
float * apply(idx_t n, const float *x) const
std::vector< uint8_t > codes
encoded dataset
Definition: IndexLSH.h:38