Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/IndexPQ.h
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 // -*- c++ -*-
12 
13 #ifndef FAISS_INDEX_PQ_H
14 #define FAISS_INDEX_PQ_H
15 
16 #include <stdint.h>
17 
18 #include <vector>
19 
20 #include "Index.h"
21 #include "ProductQuantizer.h"
22 #include "PolysemousTraining.h"
23 
24 namespace faiss {
25 
26 
27 /** Index based on a product quantizer. Stored vectors are
28  * approximated by PQ codes. */
29 struct IndexPQ: Index {
30 
31  /// The product quantizer used to encode the vectors
33 
34  /// Codes. Size ntotal * pq.code_size
35  std::vector<uint8_t> codes;
36 
37  /** Constructor.
38  *
39  * @param d dimensionality of the input vectors
40  * @param M number of subquantizers
41  * @param nbits number of bit per subvector index
42  */
43  IndexPQ (int d, ///< dimensionality of the input vectors
44  size_t M, ///< number of subquantizers
45  size_t nbits, ///< number of bit per subvector index
46  MetricType metric = METRIC_L2);
47 
48  IndexPQ ();
49 
50  virtual void set_typename () override;
51 
52  virtual void train (idx_t n, const float *x) override;
53 
54  virtual void add (idx_t n, const float *x) override;
55 
56  virtual void search (
57  idx_t n, const float *x, idx_t k,
58  float *distances, idx_t *labels) const override;
59 
60  virtual void reset() override;
61 
62  virtual void reconstruct_n (idx_t i0, idx_t ni, float *recons)
63  const override;
64 
65  virtual void reconstruct (idx_t key, float * recons)
66  const override;
67 
68  /******************************************************
69  * Polysemous codes implementation
70  ******************************************************/
71  bool do_polysemous_training; ///< false = standard PQ
72 
73  /// parameters used for the polysemous training
75 
76  /// how to perform the search in search_core
78  ST_PQ, ///< asymmetric product quantizer (default)
79  ST_HE, ///< Hamming distance on codes
80  ST_generalized_HE, ///< nb of same codes
81  ST_SDC, ///< symmetric product quantizer (SDC)
82  ST_polysemous, ///< HE filter (using ht) + PQ combination
83  ST_polysemous_generalize, ///< Filter on generalized Hamming
84  };
85 
86  Search_type_t search_type;
87 
88  // just encode the sign of the components, instead of using the PQ encoder
89  // used only for the queries
90  bool encode_signs;
91 
92  /// Hamming threshold used for polysemy
94 
95  // actual polysemous search
96  void search_core_polysemous (idx_t n, const float *x, idx_t k,
97  float *distances, idx_t *labels) const;
98 
99  /// prepare query for a polysemous search, but instead of
100  /// computing the result, just get the histogram of Hamming
101  /// distances. May be computed on a provided dataset if xb != NULL
102  /// @param dist_histogram (M * nbits + 1)
103  void hamming_distance_histogram (idx_t n, const float *x,
104  idx_t nb, const float *xb,
105  long *dist_histogram);
106 
107  /** compute pairwise distances between queries and database
108  *
109  * @param n nb of query vectors
110  * @param x query vector, size n * d
111  * @param dis output distances, size n * ntotal
112  */
113  void hamming_distance_table (idx_t n, const float *x,
114  int32_t *dis) const;
115 
116 };
117 
118 
119 /// statistics are robust to internal threading, but not if
120 /// IndexPQ::search is called by multiple threads
121 struct IndexPQStats {
122  size_t nq; // nb of queries run
123  size_t ncode; // nb of codes visited
124 
125  size_t n_hamming_pass; // nb of passed Hamming distance tests (for polysemy)
126 
127  IndexPQStats () {reset (); }
128  void reset ();
129 };
130 
131 extern IndexPQStats indexPQ_stats;
132 
133 
134 
135 /** Quantizer where centroids are virtual: they are the Cartesian
136  * product of sub-centroids. */
138  ProductQuantizer pq;
139 
140  MultiIndexQuantizer (int d, ///< dimension of the input vectors
141  size_t M, ///< number of subquantizers
142  size_t nbits); ///< number of bit per subvector index
143 
144  virtual void set_typename ();
145 
146  virtual void train (idx_t n, const float *x);
147 
148 
149  virtual void search (idx_t n, const float *x, idx_t k,
150  float *distances, idx_t *labels) const;
151 
152  /// add and reset will crash at runtime
153  virtual void add (idx_t n, const float *x);
154  virtual void reset ();
155 
156  MultiIndexQuantizer () {}
157 
158  virtual void reconstruct (idx_t key, float * recons) const;
159 };
160 
161 
162 } // namespace faiss
163 
164 
165 
166 #endif
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
Definition: IndexPQ.h:35
Hamming distance on codes.
Definition: IndexPQ.h:79
bool do_polysemous_training
false = standard PQ
Definition: IndexPQ.h:71
virtual void train(idx_t n, const float *x) override
Definition: IndexPQ.cpp:64
int d
vector dimension
Definition: Index.h:66
void hamming_distance_histogram(idx_t n, const float *x, idx_t nb, const float *xb, long *dist_histogram)
Definition: IndexPQ.cpp:394
Search_type_t
how to perform the search in search_core
Definition: IndexPQ.h:77
Filter on generalized Hamming.
Definition: IndexPQ.h:83
virtual void reset()
removes all elements from the database.
Definition: IndexPQ.cpp:877
long idx_t
all indices are this type
Definition: Index.h:64
virtual void train(idx_t n, const float *x)
Definition: IndexPQ.cpp:814
virtual void add(idx_t n, const float *x)
add and reset will crash at runtime
Definition: IndexPQ.cpp:872
ProductQuantizer pq
The product quantizer used to encode the vectors.
Definition: IndexPQ.h:32
virtual void add(idx_t n, const float *x) override
Definition: IndexPQ.cpp:88
optimizes the order of indices in a ProductQuantizer
void hamming_distance_table(idx_t n, const float *x, int32_t *dis) const
Definition: IndexPQ.cpp:381
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const
Definition: IndexPQ.cpp:825
virtual void reconstruct(idx_t key, float *recons) const override
Definition: IndexPQ.cpp:114
virtual void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
Definition: IndexPQ.cpp:104
asymmetric product quantizer (default)
Definition: IndexPQ.h:78
HE filter (using ht) + PQ combination.
Definition: IndexPQ.h:82
virtual void reset() override
removes all elements from the database.
Definition: IndexPQ.cpp:98
virtual void reconstruct(idx_t key, float *recons) const
Definition: IndexPQ.cpp:848
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexPQ.cpp:134
symmetric product quantizer (SDC)
Definition: IndexPQ.h:81
int polysemous_ht
Hamming threshold used for polysemy.
Definition: IndexPQ.h:93
PolysemousTraining polysemous_training
parameters used for the polysemous training
Definition: IndexPQ.h:74
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:44