docs/html/IndexLSH_8cpp_source.html

 /**

  * Copyright (c) 2015-present, Facebook, Inc.

  * All rights reserved.

  *

  * This source code is licensed under the CC-by-NC license found in the

  * LICENSE file in the root directory of this source tree.

  */


 // Copyright 2004-present Facebook. All Rights Reserved.


 #include "IndexLSH.h"


 #include <cstdio>

 #include <cstring>


 #include <algorithm>


 #include "utils.h"

 #include "hamming.h"

 #include "FaissAssert.h"


 namespace faiss {


 /***************************************************************

  * IndexLSH

  ***************************************************************/


 IndexLSH::IndexLSH (idx_t d, int nbits, bool rotate_data, bool train_thresholds):

     Index(d), nbits(nbits), rotate_data(rotate_data),

     train_thresholds (train_thresholds), rrot(d, nbits)

 {

     is_trained = !train_thresholds;


     bytes_per_vec = (nbits + 7) / 8;


     if (rotate_data) {

         rrot.init(5);

     } else {

         FAISS_ASSERT(d >= nbits);

     }

     set_typename();

 }


 IndexLSH::IndexLSH ():

     nbits (0), bytes_per_vec(0), rotate_data (false), train_thresholds (false)

 {

 }


 void IndexLSH::set_typename()

 {

     std::stringstream s;

     s << "LSH_" << nbits << (rotate_data ? "r" : "");

     index_typename = s.str();

 }


 const float * IndexLSH::apply_preprocess (idx_t n, const float *x) const

 {


     float *xt = nullptr;

     if (rotate_data) {

         // also applies bias if exists

         xt = rrot.apply (n, x);

     } else if (d != nbits) {

         xt = new float [nbits * n];

         float *xp = xt;

         for (idx_t i = 0; i < n; i++) {

             const float *xl = x + i * d;

             for (int j = 0; j < nbits; j++)

                 *xp++ = xl [j];

         }

     }


     if (train_thresholds) {


         if (xt == NULL) {

             xt = new float [nbits * n];

             memcpy (xt, x, sizeof(*x) * n * nbits);

         }


         float *xp = xt;

         for (idx_t i = 0; i < n; i++)

             for (int j = 0; j < nbits; j++)

                 *xp++ -= thresholds [j];

     }


     return xt ? xt : x;

 }


 void IndexLSH::train (idx_t n, const float *x)

 {

     if (train_thresholds) {

         thresholds.resize (nbits);

         train_thresholds = false;

         const float *xt = apply_preprocess (n, x);

         train_thresholds = true;


         float * transposed_x = new float [n * nbits];


         for (idx_t i = 0; i < n; i++)

             for (idx_t j = 0; j < nbits; j++)

                 transposed_x [j * n + i] = xt [i * nbits + j];

         if (xt != x) delete [] xt;


         for (idx_t i = 0; i < nbits; i++) {

             float *xi = transposed_x + i * n;

             // std::nth_element

             std::sort (xi, xi + n);

             if (n % 2 == 1)

                 thresholds [i] = xi [n / 2];

             else

                 thresholds [i] = (xi [n / 2 - 1] + xi [n / 2]) / 2;


         }

     }

     is_trained = true;

 }


 void IndexLSH::add (idx_t n, const float *x)

 {

     FAISS_ASSERT (is_trained);

     const float *xt = apply_preprocess (n, x);


     codes.resize ((ntotal + n) * bytes_per_vec);

     fvecs2bitvecs (xt, &codes[ntotal * bytes_per_vec], nbits, n);

     if (x != xt)

         delete [] xt;

     ntotal += n;

 }


 void IndexLSH::search (

         idx_t n,

         const float *x,

         idx_t k,

         float *distances,

         idx_t *labels) const

 {

     FAISS_ASSERT (is_trained);

     const float *xt = apply_preprocess (n, x);


     uint8_t * qcodes = new uint8_t [n * bytes_per_vec];

     fvecs2bitvecs (xt, qcodes, nbits, n);


     if (x != xt)

         delete [] xt;


     int * idistances = new int [n * k];

     int_maxheap_array_t res = { size_t(n), size_t(k), labels, idistances};


     hammings_knn (&res, qcodes, codes.data(),

                   ntotal, bytes_per_vec, true);


     delete [] qcodes;


     // convert distances to floats

     for (int i = 0; i < k * n; i++)

         distances[i] = idistances[i];

     delete [] idistances;


 }


 void IndexLSH::transfer_thresholds (LinearTransform *vt) {

     if (!train_thresholds) return;

     FAISS_ASSERT (nbits == vt->d_out);

     if (!vt->have_bias) {

         vt->b.resize (nbits, 0);

         vt->have_bias = true;

     }

     for (int i = 0; i < nbits; i++)

         vt->b[i] -= thresholds[i];

     train_thresholds = false;

     thresholds.clear();

 }


 void IndexLSH::reset() {

     codes.clear();

     ntotal = 0;

 }


 } // namespace faiss

faiss::IndexLSH::bytes_per_vec
int bytes_per_vec
nb of 8-bits per encoded vector
Definition: IndexLSH.h:29

faiss::IndexLSH::thresholds
std::vector< float > thresholds
thresholds to compare with
Definition: IndexLSH.h:35

faiss::IndexLSH::search
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexLSH.cpp:137

faiss::Index::d
int d
vector dimension
Definition: Index.h:66

faiss::LinearTransform::b
std::vector< float > b
bias vector, size d_out
Definition: VectorTransform.h:87

faiss::IndexLSH::rrot
RandomRotationMatrix rrot
optional random rotation
Definition: IndexLSH.h:33

faiss::IndexLSH::transfer_thresholds
void transfer_thresholds(LinearTransform *vt)
Definition: IndexLSH.cpp:169

faiss::LinearTransform
Definition: VectorTransform.h:78

faiss::HeapArray
Definition: Heap.h:351

faiss::Index::idx_t
long idx_t
all indices are this type
Definition: Index.h:64

faiss::hammings_knn
void hammings_knn(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t ncodes, int order)
Definition: hamming.cpp:471

faiss::Index::ntotal
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67

faiss::IndexLSH::reset
virtual void reset() override
removes all elements from the database.
Definition: IndexLSH.cpp:182

faiss::IndexLSH::add
virtual void add(idx_t n, const float *x) override
Definition: IndexLSH.cpp:124

faiss::IndexLSH::train
virtual void train(idx_t n, const float *x) override
Definition: IndexLSH.cpp:94

faiss::VectorTransform::d_out
int d_out
! input dimension
Definition: VectorTransform.h:34

faiss::IndexLSH::nbits
int nbits
nb of bits per vector
Definition: IndexLSH.h:28

faiss::IndexLSH::apply_preprocess
const float * apply_preprocess(idx_t n, const float *x) const
Definition: IndexLSH.cpp:59

faiss::Index::is_trained
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:71

faiss::VectorTransform::apply
float * apply(idx_t n, const float *x) const
Definition: VectorTransform.cpp:68

faiss::IndexLSH::codes
std::vector< uint8_t > codes
encoded dataset
Definition: IndexLSH.h:38