20 #include "FaissAssert.h"
29 IndexLSH::IndexLSH (idx_t d,
int nbits,
bool rotate_data,
bool train_thresholds):
30 Index(d), nbits(nbits), rotate_data(rotate_data),
31 train_thresholds (train_thresholds), rrot(d, nbits)
35 bytes_per_vec = (nbits + 7) / 8;
40 FAISS_THROW_IF_NOT (d >= nbits);
44 IndexLSH::IndexLSH ():
45 nbits (0), bytes_per_vec(0), rotate_data (false), train_thresholds (false)
57 }
else if (d != nbits) {
58 xt =
new float [nbits * n];
60 for (
idx_t i = 0; i < n; i++) {
61 const float *xl = x + i *
d;
62 for (
int j = 0; j <
nbits; j++)
67 if (train_thresholds) {
70 xt =
new float [nbits * n];
71 memcpy (xt, x,
sizeof(*x) * n * nbits);
75 for (
idx_t i = 0; i < n; i++)
76 for (
int j = 0; j <
nbits; j++)
87 if (train_thresholds) {
89 train_thresholds =
false;
92 train_thresholds =
true;
94 float * transposed_x =
new float [n *
nbits];
97 for (
idx_t i = 0; i < n; i++)
99 transposed_x [j * n + i] = xt [i * nbits + j];
102 float *xi = transposed_x + i * n;
104 std::sort (xi, xi + n);
108 thresholds [i] = (xi [n / 2 - 1] + xi [n / 2]) / 2;
123 fvecs2bitvecs (xt, &
codes[
ntotal * bytes_per_vec], nbits, n);
142 fvecs2bitvecs (xt, qcodes, nbits, n);
144 int * idistances =
new int [n * k];
154 for (
int i = 0; i < k * n; i++)
155 distances[i] = idistances[i];
161 if (!train_thresholds)
return;
162 FAISS_THROW_IF_NOT (nbits == vt->
d_out);
163 if (!vt->have_bias) {
164 vt->
b.resize (nbits, 0);
165 vt->have_bias =
true;
167 for (
int i = 0; i <
nbits; i++)
169 train_thresholds =
false;
int bytes_per_vec
nb of 8-bits per encoded vector
std::vector< float > thresholds
thresholds to compare with
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
RandomRotationMatrix rrot
optional random rotation
void transfer_thresholds(LinearTransform *vt)
long idx_t
all indices are this type
void hammings_knn(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t ncodes, int order)
idx_t ntotal
total nb of indexed vectors
void reset() override
removes all elements from the database.
void add(idx_t n, const float *x) override
void train(idx_t n, const float *x) override
int nbits
nb of bits per vector
const float * apply_preprocess(idx_t n, const float *x) const
bool is_trained
set if the Index does not require training, or if training is done already
std::vector< uint8_t > codes
encoded dataset