Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/hoss/faiss/IndexPQ.h
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 // -*- c++ -*-
9 
10 #ifndef FAISS_INDEX_PQ_H
11 #define FAISS_INDEX_PQ_H
12 
13 #include <stdint.h>
14 
15 #include <vector>
16 
17 #include "Index.h"
18 #include "ProductQuantizer.h"
19 #include "PolysemousTraining.h"
20 
21 namespace faiss {
22 
23 
24 /** Index based on a product quantizer. Stored vectors are
25  * approximated by PQ codes. */
26 struct IndexPQ: Index {
27 
28  /// The product quantizer used to encode the vectors
30 
31  /// Codes. Size ntotal * pq.code_size
32  std::vector<uint8_t> codes;
33 
34  /** Constructor.
35  *
36  * @param d dimensionality of the input vectors
37  * @param M number of subquantizers
38  * @param nbits number of bit per subvector index
39  */
40  IndexPQ (int d, ///< dimensionality of the input vectors
41  size_t M, ///< number of subquantizers
42  size_t nbits, ///< number of bit per subvector index
43  MetricType metric = METRIC_L2);
44 
45  IndexPQ ();
46 
47  void train(idx_t n, const float* x) override;
48 
49  void add(idx_t n, const float* x) override;
50 
51  void search(
52  idx_t n,
53  const float* x,
54  idx_t k,
55  float* distances,
56  idx_t* labels) const override;
57 
58  void reset() override;
59 
60  void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
61 
62  void reconstruct(idx_t key, float* recons) const override;
63 
64  long remove_ids(const IDSelector& sel) override;
65 
66  /******************************************************
67  * Polysemous codes implementation
68  ******************************************************/
69  bool do_polysemous_training; ///< false = standard PQ
70 
71  /// parameters used for the polysemous training
73 
74  /// how to perform the search in search_core
76  ST_PQ, ///< asymmetric product quantizer (default)
77  ST_HE, ///< Hamming distance on codes
78  ST_generalized_HE, ///< nb of same codes
79  ST_SDC, ///< symmetric product quantizer (SDC)
80  ST_polysemous, ///< HE filter (using ht) + PQ combination
81  ST_polysemous_generalize, ///< Filter on generalized Hamming
82  };
83 
84  Search_type_t search_type;
85 
86  // just encode the sign of the components, instead of using the PQ encoder
87  // used only for the queries
88  bool encode_signs;
89 
90  /// Hamming threshold used for polysemy
92 
93  // actual polysemous search
94  void search_core_polysemous (idx_t n, const float *x, idx_t k,
95  float *distances, idx_t *labels) const;
96 
97  /// prepare query for a polysemous search, but instead of
98  /// computing the result, just get the histogram of Hamming
99  /// distances. May be computed on a provided dataset if xb != NULL
100  /// @param dist_histogram (M * nbits + 1)
101  void hamming_distance_histogram (idx_t n, const float *x,
102  idx_t nb, const float *xb,
103  long *dist_histogram);
104 
105  /** compute pairwise distances between queries and database
106  *
107  * @param n nb of query vectors
108  * @param x query vector, size n * d
109  * @param dis output distances, size n * ntotal
110  */
111  void hamming_distance_table (idx_t n, const float *x,
112  int32_t *dis) const;
113 
114 };
115 
116 
117 /// statistics are robust to internal threading, but not if
118 /// IndexPQ::search is called by multiple threads
119 struct IndexPQStats {
120  size_t nq; // nb of queries run
121  size_t ncode; // nb of codes visited
122 
123  size_t n_hamming_pass; // nb of passed Hamming distance tests (for polysemy)
124 
125  IndexPQStats () {reset (); }
126  void reset ();
127 };
128 
129 extern IndexPQStats indexPQ_stats;
130 
131 
132 
133 /** Quantizer where centroids are virtual: they are the Cartesian
134  * product of sub-centroids. */
136  ProductQuantizer pq;
137 
138  MultiIndexQuantizer (int d, ///< dimension of the input vectors
139  size_t M, ///< number of subquantizers
140  size_t nbits); ///< number of bit per subvector index
141 
142  void train(idx_t n, const float* x) override;
143 
144  void search(
145  idx_t n, const float* x, idx_t k,
146  float* distances, idx_t* labels) const override;
147 
148  /// add and reset will crash at runtime
149  void add(idx_t n, const float* x) override;
150  void reset() override;
151 
152  MultiIndexQuantizer () {}
153 
154  void reconstruct(idx_t key, float* recons) const override;
155 };
156 
157 
158 /** MultiIndexQuantizer where the PQ assignmnet is performed by sub-indexes
159  */
161 
162  /// M Indexes on d / M dimensions
163  std::vector<Index*> assign_indexes;
164  bool own_fields;
165 
167  int d, size_t M, size_t nbits,
168  Index **indexes);
169 
171  int d, size_t nbits,
172  Index *assign_index_0,
173  Index *assign_index_1);
174 
175  void train(idx_t n, const float* x) override;
176 
177  void search(
178  idx_t n, const float* x, idx_t k,
179  float* distances, idx_t* labels) const override;
180 
181 };
182 
183 
184 } // namespace faiss
185 
186 
187 #endif
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
Definition: IndexPQ.h:32
Hamming distance on codes.
Definition: IndexPQ.h:77
bool do_polysemous_training
false = standard PQ
Definition: IndexPQ.h:69
void train(idx_t n, const float *x) override
Definition: IndexPQ.cpp:52
void reset() override
removes all elements from the database.
Definition: IndexPQ.cpp:958
void train(idx_t n, const float *x) override
Definition: IndexPQ.cpp:856
int d
vector dimension
Definition: Index.h:66
long idx_t
all indices are this type
Definition: Index.h:62
void hamming_distance_histogram(idx_t n, const float *x, idx_t nb, const float *xb, long *dist_histogram)
Definition: IndexPQ.cpp:411
void train(idx_t n, const float *x) override
Definition: IndexPQ.cpp:1010
Search_type_t
how to perform the search in search_core
Definition: IndexPQ.h:75
long remove_ids(const IDSelector &sel) override
Definition: IndexPQ.cpp:85
Filter on generalized Hamming.
Definition: IndexPQ.h:81
ProductQuantizer pq
The product quantizer used to encode the vectors.
Definition: IndexPQ.h:29
void add(idx_t n, const float *x) override
Definition: IndexPQ.cpp:76
optimizes the order of indices in a ProductQuantizer
void hamming_distance_table(idx_t n, const float *x, int32_t *dis) const
Definition: IndexPQ.cpp:399
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexPQ.cpp:868
void reconstruct(idx_t key, float *recons) const override
Definition: IndexPQ.cpp:123
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
Definition: IndexPQ.cpp:113
asymmetric product quantizer (default)
Definition: IndexPQ.h:76
void reconstruct(idx_t key, float *recons) const override
Definition: IndexPQ.cpp:940
HE filter (using ht) + PQ combination.
Definition: IndexPQ.h:80
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexPQ.cpp:1020
void add(idx_t n, const float *x) override
add and reset will crash at runtime
Definition: IndexPQ.cpp:952
void reset() override
removes all elements from the database.
Definition: IndexPQ.cpp:107
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexPQ.cpp:143
symmetric product quantizer (SDC)
Definition: IndexPQ.h:79
int polysemous_ht
Hamming threshold used for polysemy.
Definition: IndexPQ.h:91
PolysemousTraining polysemous_training
parameters used for the polysemous training
Definition: IndexPQ.h:72
std::vector< Index * > assign_indexes
M Indexes on d / M dimensions.
Definition: IndexPQ.h:163
MetricType
Some algorithms support both an inner product version and a L2 search version.
Definition: Index.h:44