Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/tmp/faiss/IndexPQ.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // -*- c++ -*-
10 
11 #ifndef FAISS_INDEX_PQ_H
12 #define FAISS_INDEX_PQ_H
13 
14 #include <stdint.h>
15 
16 #include <vector>
17 
18 #include "Index.h"
19 #include "ProductQuantizer.h"
20 #include "PolysemousTraining.h"
21 
22 namespace faiss {
23 
24 
25 /** Index based on a product quantizer. Stored vectors are
26  * approximated by PQ codes. */
27 struct IndexPQ: Index {
28 
29  /// The product quantizer used to encode the vectors
31 
32  /// Codes. Size ntotal * pq.code_size
33  std::vector<uint8_t> codes;
34 
35  /** Constructor.
36  *
37  * @param d dimensionality of the input vectors
38  * @param M number of subquantizers
39  * @param nbits number of bit per subvector index
40  */
41  IndexPQ (int d, ///< dimensionality of the input vectors
42  size_t M, ///< number of subquantizers
43  size_t nbits, ///< number of bit per subvector index
44  MetricType metric = METRIC_L2);
45 
46  IndexPQ ();
47 
48  void train(idx_t n, const float* x) override;
49 
50  void add(idx_t n, const float* x) override;
51 
52  void search(
53  idx_t n,
54  const float* x,
55  idx_t k,
56  float* distances,
57  idx_t* labels) const override;
58 
59  void reset() override;
60 
61  void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
62 
63  void reconstruct(idx_t key, float* recons) const override;
64 
65  long remove_ids(const IDSelector& sel) override;
66 
67  /******************************************************
68  * Polysemous codes implementation
69  ******************************************************/
70  bool do_polysemous_training; ///< false = standard PQ
71 
72  /// parameters used for the polysemous training
74 
75  /// how to perform the search in search_core
77  ST_PQ, ///< asymmetric product quantizer (default)
78  ST_HE, ///< Hamming distance on codes
79  ST_generalized_HE, ///< nb of same codes
80  ST_SDC, ///< symmetric product quantizer (SDC)
81  ST_polysemous, ///< HE filter (using ht) + PQ combination
82  ST_polysemous_generalize, ///< Filter on generalized Hamming
83  };
84 
85  Search_type_t search_type;
86 
87  // just encode the sign of the components, instead of using the PQ encoder
88  // used only for the queries
89  bool encode_signs;
90 
91  /// Hamming threshold used for polysemy
93 
94  // actual polysemous search
95  void search_core_polysemous (idx_t n, const float *x, idx_t k,
96  float *distances, idx_t *labels) const;
97 
98  /// prepare query for a polysemous search, but instead of
99  /// computing the result, just get the histogram of Hamming
100  /// distances. May be computed on a provided dataset if xb != NULL
101  /// @param dist_histogram (M * nbits + 1)
102  void hamming_distance_histogram (idx_t n, const float *x,
103  idx_t nb, const float *xb,
104  long *dist_histogram);
105 
106  /** compute pairwise distances between queries and database
107  *
108  * @param n nb of query vectors
109  * @param x query vector, size n * d
110  * @param dis output distances, size n * ntotal
111  */
112  void hamming_distance_table (idx_t n, const float *x,
113  int32_t *dis) const;
114 
115 };
116 
117 
118 /// statistics are robust to internal threading, but not if
119 /// IndexPQ::search is called by multiple threads
120 struct IndexPQStats {
121  size_t nq; // nb of queries run
122  size_t ncode; // nb of codes visited
123 
124  size_t n_hamming_pass; // nb of passed Hamming distance tests (for polysemy)
125 
126  IndexPQStats () {reset (); }
127  void reset ();
128 };
129 
130 extern IndexPQStats indexPQ_stats;
131 
132 
133 
134 /** Quantizer where centroids are virtual: they are the Cartesian
135  * product of sub-centroids. */
137  ProductQuantizer pq;
138 
139  MultiIndexQuantizer (int d, ///< dimension of the input vectors
140  size_t M, ///< number of subquantizers
141  size_t nbits); ///< number of bit per subvector index
142 
143  void train(idx_t n, const float* x) override;
144 
145  void search(
146  idx_t n, const float* x, idx_t k,
147  float* distances, idx_t* labels) const override;
148 
149  /// add and reset will crash at runtime
150  void add(idx_t n, const float* x) override;
151  void reset() override;
152 
153  MultiIndexQuantizer () {}
154 
155  void reconstruct(idx_t key, float* recons) const override;
156 };
157 
158 
159 /** MultiIndexQuantizer where the PQ assignmnet is performed by sub-indexes
160  */
162 
163  /// M Indexes on d / M dimensions
164  std::vector<Index*> assign_indexes;
165  bool own_fields;
166 
168  int d, size_t M, size_t nbits,
169  Index **indexes);
170 
172  int d, size_t nbits,
173  Index *assign_index_0,
174  Index *assign_index_1);
175 
176  void train(idx_t n, const float* x) override;
177 
178  void search(
179  idx_t n, const float* x, idx_t k,
180  float* distances, idx_t* labels) const override;
181 
182 };
183 
184 
185 } // namespace faiss
186 
187 
188 #endif
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
Definition: IndexPQ.h:33
Hamming distance on codes.
Definition: IndexPQ.h:78
bool do_polysemous_training
false = standard PQ
Definition: IndexPQ.h:70
void train(idx_t n, const float *x) override
Definition: IndexPQ.cpp:53
void reset() override
removes all elements from the database.
Definition: IndexPQ.cpp:959
void train(idx_t n, const float *x) override
Definition: IndexPQ.cpp:857
int d
vector dimension
Definition: Index.h:66
void hamming_distance_histogram(idx_t n, const float *x, idx_t nb, const float *xb, long *dist_histogram)
Definition: IndexPQ.cpp:412
void train(idx_t n, const float *x) override
Definition: IndexPQ.cpp:1011
Search_type_t
how to perform the search in search_core
Definition: IndexPQ.h:76
long remove_ids(const IDSelector &sel) override
Definition: IndexPQ.cpp:86
Filter on generalized Hamming.
Definition: IndexPQ.h:82
long idx_t
all indices are this type
Definition: Index.h:64
ProductQuantizer pq
The product quantizer used to encode the vectors.
Definition: IndexPQ.h:30
void add(idx_t n, const float *x) override
Definition: IndexPQ.cpp:77
optimizes the order of indices in a ProductQuantizer
void hamming_distance_table(idx_t n, const float *x, int32_t *dis) const
Definition: IndexPQ.cpp:400
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexPQ.cpp:869
void reconstruct(idx_t key, float *recons) const override
Definition: IndexPQ.cpp:124
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
Definition: IndexPQ.cpp:114
asymmetric product quantizer (default)
Definition: IndexPQ.h:77
void reconstruct(idx_t key, float *recons) const override
Definition: IndexPQ.cpp:941
HE filter (using ht) + PQ combination.
Definition: IndexPQ.h:81
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexPQ.cpp:1021
void add(idx_t n, const float *x) override
add and reset will crash at runtime
Definition: IndexPQ.cpp:953
void reset() override
removes all elements from the database.
Definition: IndexPQ.cpp:108
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexPQ.cpp:144
symmetric product quantizer (SDC)
Definition: IndexPQ.h:80
int polysemous_ht
Hamming threshold used for polysemy.
Definition: IndexPQ.h:92
PolysemousTraining polysemous_training
parameters used for the polysemous training
Definition: IndexPQ.h:73
std::vector< Index * > assign_indexes
M Indexes on d / M dimensions.
Definition: IndexPQ.h:164
MetricType
Some algorithms support both an inner product version and a L2 search version.
Definition: Index.h:45