docs/html/IndexIVFPQ_8h_source.html

 /**

  * Copyright (c) 2015-present, Facebook, Inc.

  * All rights reserved.

  *

  * This source code is licensed under the CC-by-NC license found in the

  * LICENSE file in the root directory of this source tree.

  */


 // Copyright 2004-present Facebook. All Rights Reserved.

 // -*- c++ -*-


 #ifndef FAISS_INDEX_IVFPQ_H

 #define FAISS_INDEX_IVFPQ_H


 #include <vector>


 #include "IndexIVF.h"

 #include "IndexPQ.h"


 namespace faiss {


 /** Inverted file with Product Quantizer encoding. Each residual

  * vector is encoded as a product quantizer code.

  */

 struct IndexIVFPQ: IndexIVF {

     bool by_residual;              ///< Encode residual or plain vector?

     int use_precomputed_table;     ///< if by_residual, build precompute tables

     size_t code_size;              ///< code size per vector in bytes

     ProductQuantizer pq;           ///< produces the codes


     bool do_polysemous_training;   ///< reorder PQ centroids after training?

     PolysemousTraining *polysemous_training; ///< if NULL, use default


     // search-time parameters

     size_t scan_table_threshold;   ///< use table computation or on-the-fly?

     size_t max_codes;              ///< max nb of codes to visit to do a query

     int polysemous_ht;             ///< Hamming thresh for polysemous filtering


     std::vector < std::vector<uint8_t> > codes; // binary codes, size nlist


     /// if use_precompute_table

     /// size nlist * pq.M * pq.ksub

     std::vector <float> precomputed_table;


     IndexIVFPQ (

             Index * quantizer, size_t d, size_t nlist,

             size_t M, size_t nbits_per_idx);


     void add_with_ids(idx_t n, const float* x, const long* xids = nullptr)

         override;


     /// same as add_core, also:

     /// - output 2nd level residuals if residuals_2 != NULL

     /// - use precomputed list numbers if precomputed_idx != NULL

     void add_core_o (idx_t n, const float *x,

                      const long *xids, float *residuals_2,

                      const long *precomputed_idx = nullptr);


     void search(

         idx_t n,

         const float* x,

         idx_t k,

         float* distances,

         idx_t* labels) const override;


     void reset() override;


     long remove_ids(const IDSelector& sel) override;


     /// trains the product quantizer

     void train_residual(idx_t n, const float* x) override;


     /// same as train_residual, also output 2nd level residuals

     void train_residual_o (idx_t n, const float *x, float *residuals_2);


     /** Reconstruct a subset of the indexed vectors

      *

      * @param i0     first vector to reconstruct

      * @param ni     nb of vectors to reconstruct

      * @param recons output array of reconstructed vectors, size ni * d

      */

     void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;


     void reconstruct(idx_t key, float* recons) const override;


     /** Find exact duplicates in the dataset.

      *

      * the duplicates are returned in pre-allocated arrays (see the

      * max sizes).

      *

      * @params lims   limits between groups of duplicates

      *                (max size ntotal / 2 + 1)

      * @params ids    ids[lims[i]] : ids[lims[i+1]-1] is a group of

      *                duplicates (max size ntotal)

      * @return n      number of groups found

      */

     size_t find_duplicates (idx_t *ids, size_t *lims) const;


     // map a vector to a binary code knowning the index

     void encode (long key, const float * x, uint8_t * code) const;


     /** Encode multiple vectors

      *

      * @param n       nb vectors to encode

      * @param keys    posting list ids for those vectors (size n)

      * @param x       vectors (size n * d)

      * @param codes   output codes (size n * code_size)

      * @param compute_keys  if false, assume keys are precomputed,

      *                      otherwise compute them

      */

     void encode_multiple (size_t n, long *keys,

                           const float * x, uint8_t * codes,

                           bool compute_keys = false) const;


     /// inverse of encode_multiple

     void decode_multiple (size_t n, const long *keys,

                           const uint8_t * xcodes, float * x) const;


     /** search a set of vectors, that are pre-quantized by the IVF

      *  quantizer. Fill in the corresponding heaps with the query

      *  results.

      *

      * @param nx     nb of vectors to query

      * @param qx     query vectors, size nx * d

      * @param keys   coarse quantization indices, size nx * nprobe

      * @param coarse_dis

      *               distances to coarse centroids, size nx * nprobe

      * @param res    heaps for all the results, gives the nprobe

      * @param store_pairs store inv list index + inv list offset

      *                     instead in upper/lower 32 bit of result,

      *                     instead of ids (used for reranking).

      */

     virtual void search_knn_with_key (

             size_t nx,

             const float * qx,

             const long * keys,

             const float * coarse_dis,

             float_maxheap_array_t* res,

             bool store_pairs = false) const;


     /// build precomputed table

     void precompute_table ();


     /// used to implement merging

     void merge_from_residuals(IndexIVF& other) override;


     /** copy a subset of the entries index to the other index

      *

      * if subset_type == 0: copies ids in [a1, a2)

      * if subset_type == 1: copies ids if id % a1 == a2

      */

     void copy_subset_to (IndexIVFPQ & other, int subset_type,

                          long a1, long a2) const;


     IndexIVFPQ ();


 };


 /// statistics are robust to internal threading, but not if

 /// IndexIVFPQ::search is called by multiple threads

 struct IndexIVFPQStats {

     size_t nq;       // nb of queries run

     size_t nlist;    // nb of inverted lists scanned

     size_t ncode;    // nb of codes visited

     size_t nrefine;  // nb of refines (IVFPQR)


     size_t n_hamming_pass;

     // nb of passed Hamming distance tests (for polysemous)


     // timings measured with the CPU RTC

     // on all threads

     size_t assign_cycles;

     size_t search_cycles;

     size_t refine_cycles; // only for IVFPQR


     // single thread (double-counted with search_cycles)

     size_t init_query_cycles;

     size_t init_list_cycles;

     size_t scan_cycles;

     size_t heap_cycles;


     IndexIVFPQStats () {reset (); }

     void reset ();

 };


 // global var that collects them all

 extern IndexIVFPQStats indexIVFPQ_stats;


 /** Index with an additional level of PQ refinement */

 struct IndexIVFPQR: IndexIVFPQ {

     ProductQuantizer refine_pq;           ///< 3rd level quantizer

     std::vector <uint8_t> refine_codes;   ///< corresponding codes


     /// factor between k requested in search and the k requested from the IVFPQ

     float k_factor;


     IndexIVFPQR (

             Index * quantizer, size_t d, size_t nlist,

             size_t M, size_t nbits_per_idx,

             size_t M_refine, size_t nbits_per_idx_refine);


     void reset() override;


     long remove_ids(const IDSelector& sel) override;


     /// trains the two product quantizers

     void train_residual(idx_t n, const float* x) override;


     void add_with_ids(idx_t n, const float* x, const long* xids) override;


     /// same as add_with_ids, but optionally use the precomputed list ids

     void add_core (idx_t n, const float *x, const long *xids,

                      const long *precomputed_idx = nullptr);


     void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;


     void search(

         idx_t n,

         const float* x,

         idx_t k,

         float* distances,

         idx_t* labels) const override;


     void merge_from_residuals(IndexIVF& other) override;


     IndexIVFPQR();

 };


 /** Index with 32-bit ids and flat tables. Must be constructed from an

  *  exisiting IndexIVFPQ. Cannot be copy-constructed/assigned. The

  *  actual data is stored in the compact_* tables, the ids and codes

  *  tables are not used.  */

 struct IndexIVFPQCompact: IndexIVFPQ {


     explicit IndexIVFPQCompact (const IndexIVFPQ &other);


     /// how were the compact tables allocated?

     enum Alloc_type_t {

         Alloc_type_none,     ///< alloc from outside

         Alloc_type_new,      ///< was allocated with new

         Alloc_type_mmap      ///< was mmapped

     };


     Alloc_type_t alloc_type;


     uint32_t *limits;        ///< size nlist + 1

     uint32_t *compact_ids;   ///< size ntotal

     uint8_t *compact_codes;  ///< size ntotal * code_size


     // file and buffer this was mmapped (will be unmapped when object

     // is deleted)

     char * mmap_buffer;

     long mmap_length;


     void search_knn_with_key(

         size_t nx,

         const float* qx,

         const long* keys,

         const float* coarse_dis,

         float_maxheap_array_t* res,

         bool store_pairs = false) const override;


     /// the three following functions will fail at runtime

     void add(idx_t, const float*) override;

     void reset() override;

     void train(idx_t, const float*) override;


     ~IndexIVFPQCompact() override;


     IndexIVFPQCompact ();


 };


 } // namespace faiss


 #endif

faiss::IndexIVFPQCompact::compact_ids
uint32_t * compact_ids
size ntotal
Definition: IndexIVFPQ.h:256

faiss::IndexIVF
Definition: IndexIVF.h:45

faiss::IndexIVFPQCompact::compact_codes
uint8_t * compact_codes
size ntotal * code_size
Definition: IndexIVFPQ.h:257

faiss::IndexIVFPQ::precompute_table
void precompute_table()
build precomputed table
Definition: IndexIVFPQ.cpp:391

faiss::IndexIVFPQ::copy_subset_to
void copy_subset_to(IndexIVFPQ &other, int subset_type, long a1, long a2) const
Definition: IndexIVFPQ.cpp:332

faiss::IndexIVFPQ::reconstruct
void reconstruct(idx_t key, float *recons) const override
Definition: IndexIVFPQ.cpp:302

faiss::IndexIVFPQR
Definition: IndexIVFPQ.h:198

faiss::IndexIVFPQR::refine_pq
ProductQuantizer refine_pq
3rd level quantizer
Definition: IndexIVFPQ.h:199

faiss::IndexIVFPQ::polysemous_training
PolysemousTraining * polysemous_training
if NULL, use default
Definition: IndexIVFPQ.h:36

faiss::IndexIVFPQCompact::add
void add(idx_t, const float *) override
the three following functions will fail at runtime
Definition: IndexIVFPQ.cpp:1455

faiss::IndexIVFPQStats
Definition: IndexIVFPQ.h:167

faiss::IndexIVFPQCompact::search_knn_with_key
void search_knn_with_key(size_t nx, const float *qx, const long *keys, const float *coarse_dis, float_maxheap_array_t *res, bool store_pairs=false) const override
Definition: IndexIVFPQ.cpp:1483

faiss::IndexIVFPQR::reconstruct_n
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
Definition: IndexIVFPQ.cpp:1345

faiss::IDSelector
Definition: AuxIndexStructures.h:57

faiss::IndexIVFPQCompact::Alloc_type_mmap
was mmapped
Definition: IndexIVFPQ.h:250

faiss::IndexIVFPQ::merge_from_residuals
void merge_from_residuals(IndexIVF &other) override
used to implement merging
Definition: IndexIVFPQ.cpp:322

faiss::IndexIVFPQ::decode_multiple
void decode_multiple(size_t n, const long *keys, const uint8_t *xcodes, float *x) const
inverse of encode_multiple
Definition: IndexIVFPQ.cpp:168

faiss::IndexIVFPQ::train_residual_o
void train_residual_o(idx_t n, const float *x, float *residuals_2)
same as train_residual, also output 2nd level residuals
Definition: IndexIVFPQ.cpp:72

faiss::IndexIVFPQ::do_polysemous_training
bool do_polysemous_training
reorder PQ centroids after training?
Definition: IndexIVFPQ.h:35

faiss::IndexIVFPQ::scan_table_threshold
size_t scan_table_threshold
use table computation or on-the-fly?
Definition: IndexIVFPQ.h:39

faiss::IndexIVFPQR::train_residual
void train_residual(idx_t n, const float *x) override
trains the two product quantizers
Definition: IndexIVFPQ.cpp:1213

faiss::IndexIVFPQR::add_core
void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx=nullptr)
same as add_with_ids, but optionally use the precomputed list ids
Definition: IndexIVFPQ.cpp:1237

faiss::IndexIVFPQCompact::limits
uint32_t * limits
size nlist + 1
Definition: IndexIVFPQ.h:255

faiss::IndexIVFPQ::precomputed_table
std::vector< float > precomputed_table
Definition: IndexIVFPQ.h:47

faiss::IndexIVFPQ::polysemous_ht
int polysemous_ht
Hamming thresh for polysemous filtering.
Definition: IndexIVFPQ.h:41

faiss::IndexIVFPQ::search_knn_with_key
virtual void search_knn_with_key(size_t nx, const float *qx, const long *keys, const float *coarse_dis, float_maxheap_array_t *res, bool store_pairs=false) const
Definition: IndexIVFPQ.cpp:963

faiss::IndexIVFPQR::reset
void reset() override
removes all elements from the database.
Definition: IndexIVFPQ.cpp:1204

faiss::IndexIVFPQ::add_with_ids
void add_with_ids(idx_t n, const float *x, const long *xids=nullptr) override
Definition: IndexIVFPQ.cpp:185

faiss::IndexIVFPQCompact
Definition: IndexIVFPQ.h:242

faiss::IndexIVF::ids
std::vector< std::vector< long > > ids
Inverted lists for indexes.
Definition: IndexIVF.h:55

faiss::Index::d
int d
vector dimension
Definition: Index.h:64

faiss::IndexIVF::quantizer
Index * quantizer
quantizer that maps vectors to inverted lists
Definition: IndexIVF.h:49

faiss::IndexIVFPQ::max_codes
size_t max_codes
max nb of codes to visit to do a query
Definition: IndexIVFPQ.h:40

faiss::IndexIVFPQCompact::Alloc_type_t
Alloc_type_t
how were the compact tables allocated?
Definition: IndexIVFPQ.h:247

faiss::IndexIVFPQR::refine_codes
std::vector< uint8_t > refine_codes
corresponding codes
Definition: IndexIVFPQ.h:200

faiss::IndexIVFPQ::remove_ids
long remove_ids(const IDSelector &sel) override
Definition: IndexIVFPQ.cpp:1091

faiss::IndexIVFPQ::train_residual
void train_residual(idx_t n, const float *x) override
trains the product quantizer
Definition: IndexIVFPQ.cpp:66

faiss::IndexIVFPQ
Definition: IndexIVFPQ.h:29

faiss::IndexIVFPQ::encode_multiple
void encode_multiple(size_t n, long *keys, const float *x, uint8_t *codes, bool compute_keys=false) const
Definition: IndexIVFPQ.cpp:149

faiss::IndexIVFPQCompact::train
void train(idx_t, const float *) override
Trains the quantizer and calls train_residual to train sub-quantizers.
Definition: IndexIVFPQ.cpp:1463

faiss::HeapArray
Definition: Heap.h:350

faiss::Index::idx_t
long idx_t
all indices are this type
Definition: Index.h:62

faiss::IndexIVFPQ::reset
void reset() override
removes all elements from the database.
Definition: IndexIVFPQ.cpp:1083

faiss::PolysemousTraining
optimizes the order of indices in a ProductQuantizer
Definition: PolysemousTraining.h:125

faiss::IndexIVFPQR::merge_from_residuals
void merge_from_residuals(IndexIVF &other) override
used to implement merging
Definition: IndexIVFPQ.cpp:1362

faiss::IndexIVFPQ::by_residual
bool by_residual
Encode residual or plain vector?
Definition: IndexIVFPQ.h:30

faiss::IndexIVFPQCompact::Alloc_type_none
alloc from outside
Definition: IndexIVFPQ.h:248

faiss::IndexIVFPQ::pq
ProductQuantizer pq
produces the codes
Definition: IndexIVFPQ.h:33

faiss::IndexIVF::nlist
size_t nlist
number of possible key values
Definition: IndexIVF.h:46

faiss::Index
Definition: Index.h:60

faiss::IndexIVFPQ::reconstruct_n
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
Definition: IndexIVFPQ.cpp:273

faiss::IndexIVFPQ::add_core_o
void add_core_o(idx_t n, const float *x, const long *xids, float *residuals_2, const long *precomputed_idx=nullptr)
Definition: IndexIVFPQ.cpp:191

faiss::IndexIVFPQ::code_size
size_t code_size
code size per vector in bytes
Definition: IndexIVFPQ.h:32

faiss::IndexIVFPQR::remove_ids
long remove_ids(const IDSelector &sel) override
Definition: IndexIVFPQ.cpp:1371

faiss::ProductQuantizer
Definition: ProductQuantizer.h:25

faiss::IndexIVFPQCompact::reset
void reset() override
removes all elements from the database.
Definition: IndexIVFPQ.cpp:1459

faiss::IndexIVFPQCompact::Alloc_type_new
was allocated with new
Definition: IndexIVFPQ.h:249

faiss::IndexIVFPQR::add_with_ids
void add_with_ids(idx_t n, const float *x, const long *xids) override
Definition: IndexIVFPQ.cpp:1233

faiss::IndexIVFPQR::search
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexIVFPQ.cpp:1256

faiss::IndexIVFPQ::search
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexIVFPQ.cpp:1062

faiss::IndexIVFPQ::find_duplicates
size_t find_duplicates(idx_t *ids, size_t *lims) const
Definition: IndexIVFPQ.cpp:1148

faiss::IndexIVFPQR::k_factor
float k_factor
factor between k requested in search and the k requested from the IVFPQ
Definition: IndexIVFPQ.h:203

faiss::IndexIVFPQ::use_precomputed_table
int use_precomputed_table
if by_residual, build precompute tables
Definition: IndexIVFPQ.h:31