Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/IndexPQ.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 // -*- c++ -*-
11 
12 #ifndef FAISS_INDEX_PQ_H
13 #define FAISS_INDEX_PQ_H
14 
15 #include <stdint.h>
16 
17 #include <vector>
18 
19 #include "Index.h"
20 #include "ProductQuantizer.h"
21 #include "PolysemousTraining.h"
22 
23 namespace faiss {
24 
25 
26 /** Index based on a product quantizer. Stored vectors are
27  * approximated by PQ codes. */
28 struct IndexPQ: Index {
29 
30  /// The product quantizer used to encode the vectors
32 
33  /// Codes. Size ntotal * pq.code_size
34  std::vector<uint8_t> codes;
35 
36  /** Constructor.
37  *
38  * @param d dimensionality of the input vectors
39  * @param M number of subquantizers
40  * @param nbits number of bit per subvector index
41  */
42  IndexPQ (int d, ///< dimensionality of the input vectors
43  size_t M, ///< number of subquantizers
44  size_t nbits, ///< number of bit per subvector index
45  MetricType metric = METRIC_L2);
46 
47  IndexPQ ();
48 
49  void train(idx_t n, const float* x) override;
50 
51  void add(idx_t n, const float* x) override;
52 
53  void search(
54  idx_t n,
55  const float* x,
56  idx_t k,
57  float* distances,
58  idx_t* labels) const override;
59 
60  void reset() override;
61 
62  void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
63 
64  void reconstruct(idx_t key, float* recons) const override;
65 
66  /******************************************************
67  * Polysemous codes implementation
68  ******************************************************/
69  bool do_polysemous_training; ///< false = standard PQ
70 
71  /// parameters used for the polysemous training
73 
74  /// how to perform the search in search_core
76  ST_PQ, ///< asymmetric product quantizer (default)
77  ST_HE, ///< Hamming distance on codes
78  ST_generalized_HE, ///< nb of same codes
79  ST_SDC, ///< symmetric product quantizer (SDC)
80  ST_polysemous, ///< HE filter (using ht) + PQ combination
81  ST_polysemous_generalize, ///< Filter on generalized Hamming
82  };
83 
84  Search_type_t search_type;
85 
86  // just encode the sign of the components, instead of using the PQ encoder
87  // used only for the queries
88  bool encode_signs;
89 
90  /// Hamming threshold used for polysemy
92 
93  // actual polysemous search
94  void search_core_polysemous (idx_t n, const float *x, idx_t k,
95  float *distances, idx_t *labels) const;
96 
97  /// prepare query for a polysemous search, but instead of
98  /// computing the result, just get the histogram of Hamming
99  /// distances. May be computed on a provided dataset if xb != NULL
100  /// @param dist_histogram (M * nbits + 1)
101  void hamming_distance_histogram (idx_t n, const float *x,
102  idx_t nb, const float *xb,
103  long *dist_histogram);
104 
105  /** compute pairwise distances between queries and database
106  *
107  * @param n nb of query vectors
108  * @param x query vector, size n * d
109  * @param dis output distances, size n * ntotal
110  */
111  void hamming_distance_table (idx_t n, const float *x,
112  int32_t *dis) const;
113 
114 };
115 
116 
117 /// statistics are robust to internal threading, but not if
118 /// IndexPQ::search is called by multiple threads
119 struct IndexPQStats {
120  size_t nq; // nb of queries run
121  size_t ncode; // nb of codes visited
122 
123  size_t n_hamming_pass; // nb of passed Hamming distance tests (for polysemy)
124 
125  IndexPQStats () {reset (); }
126  void reset ();
127 };
128 
129 extern IndexPQStats indexPQ_stats;
130 
131 
132 
133 /** Quantizer where centroids are virtual: they are the Cartesian
134  * product of sub-centroids. */
136  ProductQuantizer pq;
137 
138  MultiIndexQuantizer (int d, ///< dimension of the input vectors
139  size_t M, ///< number of subquantizers
140  size_t nbits); ///< number of bit per subvector index
141 
142  void train(idx_t n, const float* x) override;
143 
144  void search(
145  idx_t n,
146  const float* x,
147  idx_t k,
148  float* distances,
149  idx_t* labels) const override;
150 
151  /// add and reset will crash at runtime
152  void add(idx_t n, const float* x) override;
153  void reset() override;
154 
155  MultiIndexQuantizer () {}
156 
157  void reconstruct(idx_t key, float* recons) const override;
158 };
159 
160 
161 } // namespace faiss
162 
163 
164 
165 #endif
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
Definition: IndexPQ.h:34
Hamming distance on codes.
Definition: IndexPQ.h:77
bool do_polysemous_training
false = standard PQ
Definition: IndexPQ.h:69
void train(idx_t n, const float *x) override
Definition: IndexPQ.cpp:54
void reset() override
removes all elements from the database.
Definition: IndexPQ.cpp:890
void train(idx_t n, const float *x) override
Definition: IndexPQ.cpp:794
int d
vector dimension
Definition: Index.h:64
void hamming_distance_histogram(idx_t n, const float *x, idx_t nb, const float *xb, long *dist_histogram)
Definition: IndexPQ.cpp:382
Search_type_t
how to perform the search in search_core
Definition: IndexPQ.h:75
Filter on generalized Hamming.
Definition: IndexPQ.h:81
long idx_t
all indices are this type
Definition: Index.h:62
ProductQuantizer pq
The product quantizer used to encode the vectors.
Definition: IndexPQ.h:31
void add(idx_t n, const float *x) override
Definition: IndexPQ.cpp:78
optimizes the order of indices in a ProductQuantizer
void hamming_distance_table(idx_t n, const float *x, int32_t *dis) const
Definition: IndexPQ.cpp:370
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexPQ.cpp:805
void reconstruct(idx_t key, float *recons) const override
Definition: IndexPQ.cpp:104
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
Definition: IndexPQ.cpp:94
asymmetric product quantizer (default)
Definition: IndexPQ.h:76
void reconstruct(idx_t key, float *recons) const override
Definition: IndexPQ.cpp:861
HE filter (using ht) + PQ combination.
Definition: IndexPQ.h:80
void add(idx_t n, const float *x) override
add and reset will crash at runtime
Definition: IndexPQ.cpp:884
void reset() override
removes all elements from the database.
Definition: IndexPQ.cpp:88
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexPQ.cpp:124
symmetric product quantizer (SDC)
Definition: IndexPQ.h:79
int polysemous_ht
Hamming threshold used for polysemy.
Definition: IndexPQ.h:91
PolysemousTraining polysemous_training
parameters used for the polysemous training
Definition: IndexPQ.h:72
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:43