/** * Copyright (c) 2015-present, Facebook, Inc. * All rights reserved. * * This source code is licensed under the CC-by-NC license found in the * LICENSE file in the root directory of this source tree. */ // Copyright 2004-present Facebook. All Rights Reserved. // -*- c++ -*- #ifndef FAISS_INDEX_PQ_H #define FAISS_INDEX_PQ_H #include #include #include "Index.h" #include "ProductQuantizer.h" #include "PolysemousTraining.h" namespace faiss { /** Index based on a product quantizer. Stored vectors are * approximated by PQ codes. */ struct IndexPQ: Index { /// The product quantizer used to encode the vectors ProductQuantizer pq; /// Codes. Size ntotal * pq.code_size std::vector codes; /** Constructor. * * @param d dimensionality of the input vectors * @param M number of subquantizers * @param nbits number of bit per subvector index */ IndexPQ (int d, ///< dimensionality of the input vectors size_t M, ///< number of subquantizers size_t nbits, ///< number of bit per subvector index MetricType metric = METRIC_L2); IndexPQ (); virtual void set_typename () override; virtual void train (idx_t n, const float *x) override; virtual void add (idx_t n, const float *x) override; virtual void search ( idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override; virtual void reset() override; virtual void reconstruct_n (idx_t i0, idx_t ni, float *recons) const override; virtual void reconstruct (idx_t key, float * recons) const override; /****************************************************** * Polysemous codes implementation ******************************************************/ bool do_polysemous_training; ///< false = standard PQ /// parameters used for the polysemous training PolysemousTraining polysemous_training; /// how to perform the search in search_core enum Search_type_t { ST_PQ, ///< asymmetric product quantizer (default) ST_HE, ///< Hamming distance on codes ST_generalized_HE, ///< nb of same codes ST_SDC, ///< symmetric product quantizer (SDC) ST_polysemous, ///< HE filter (using ht) + PQ combination ST_polysemous_generalize, ///< Filter on generalized Hamming }; Search_type_t search_type; // just encode the sign of the components, instead of using the PQ encoder // used only for the queries bool encode_signs; /// Hamming threshold used for polysemy int polysemous_ht; // actual polysemous search void search_core_polysemous (idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const; /// prepare query for a polysemous search, but instead of /// computing the result, just get the histogram of Hamming /// distances. May be computed on a provided dataset if xb != NULL /// @param dist_histogram (M * nbits + 1) void hamming_distance_histogram (idx_t n, const float *x, idx_t nb, const float *xb, long *dist_histogram); /** compute pairwise distances between queries and database * * @param n nb of query vectors * @param x query vector, size n * d * @param dis output distances, size n * ntotal */ void hamming_distance_table (idx_t n, const float *x, int32_t *dis) const; }; /// statistics are robust to internal threading, but not if /// IndexPQ::search is called by multiple threads struct IndexPQStats { size_t nq; // nb of queries run size_t ncode; // nb of codes visited size_t n_hamming_pass; // nb of passed Hamming distance tests (for polysemy) IndexPQStats () {reset (); } void reset (); }; extern IndexPQStats indexPQ_stats; /** Quantizer where centroids are virtual: they are the Cartesian * product of sub-centroids. */ struct MultiIndexQuantizer: Index { ProductQuantizer pq; MultiIndexQuantizer (int d, ///< dimension of the input vectors size_t M, ///< number of subquantizers size_t nbits); ///< number of bit per subvector index virtual void set_typename (); virtual void train (idx_t n, const float *x); virtual void search (idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const; /// add and reset will crash at runtime virtual void add (idx_t n, const float *x); virtual void reset (); MultiIndexQuantizer () {} virtual void reconstruct (idx_t key, float * recons) const; }; } // namespace faiss #endif