Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/IndexScalarQuantizer.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #ifndef FAISS_INDEX_SCALAR_QUANTIZER_H
10 #define FAISS_INDEX_SCALAR_QUANTIZER_H
11 
12 #include <stdint.h>
13 
14 
15 #include <vector>
16 
17 
18 #include "IndexIVF.h"
19 
20 
21 namespace faiss {
22 
23 /**
24  * The uniform quantizer has a range [vmin, vmax]. The range can be
25  * the same for all dimensions (uniform) or specific per dimension
26  * (default).
27  */
28 
29 
31 
33  QT_8bit, ///< 8 bits per component
34  QT_4bit, ///< 4 bits per component
35  QT_8bit_uniform, ///< same, shared range for all dimensions
36  QT_4bit_uniform,
37  };
38 
39  QuantizerType qtype;
40 
41  /** The uniform encoder can estimate the range of representable
42  * values of the unform encoder using different statistics. Here
43  * rs = rangestat_arg */
44 
45  // rangestat_arg.
46  enum RangeStat {
47  RS_minmax, ///< [min - rs*(max-min), max + rs*(max-min)]
48  RS_meanstd, ///< [mean - std * rs, mean + std * rs]
49  RS_quantiles, ///< [Q(rs), Q(1-rs)]
50  RS_optim, ///< alternate optimization of reconstruction error
51  };
52 
53  RangeStat rangestat;
54  float rangestat_arg;
55 
56  /// dimension of input vectors
57  size_t d;
58 
59  /// bytes per vector
60  size_t code_size;
61 
62  /// trained values (including the range)
63  std::vector<float> trained;
64 
65  ScalarQuantizer (size_t d, QuantizerType qtype);
66  ScalarQuantizer ();
67 
68  void train (size_t n, const float *x);
69 
70 
71  /// same as compute_code for several vectors
72  void compute_codes (const float * x,
73  uint8_t * codes,
74  size_t n) const ;
75 
76  /// decode a vector from a given code (or n vectors if third argument)
77  void decode (const uint8_t *code, float *x, size_t n) const;
78 
79  // fast, non thread-safe way of computing vector-to-code and
80  // code-to-code distances.
82 
83  /// vector-to-code distance computation
84  virtual float compute_distance (const float *x,
85  const uint8_t *code) = 0;
86 
87  /// code-to-code distance computation
88  virtual float compute_code_distance (const uint8_t *code1,
89  const uint8_t *code2) = 0;
90  virtual ~DistanceComputer () {}
91  };
92 
93  DistanceComputer *get_distance_computer (MetricType metric = METRIC_L2)
94  const;
95 
96 };
97 
98 
100  /// Used to encode the vectors
102 
103  /// Codes. Size ntotal * pq.code_size
104  std::vector<uint8_t> codes;
105 
106  size_t code_size;
107 
108  /** Constructor.
109  *
110  * @param d dimensionality of the input vectors
111  * @param M number of subquantizers
112  * @param nbits number of bit per subvector index
113  */
114  IndexScalarQuantizer (int d,
116  MetricType metric = METRIC_L2);
117 
119 
120  void train(idx_t n, const float* x) override;
121 
122  void add(idx_t n, const float* x) override;
123 
124  void search(
125  idx_t n,
126  const float* x,
127  idx_t k,
128  float* distances,
129  idx_t* labels) const override;
130 
131  void reset() override;
132 
133  void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
134 
135  void reconstruct(idx_t key, float* recons) const override;
136 
137 };
138 
139 
140  /** An IVF implementation where the components of the residuals are
141  * encoded with a scalar uniform quantizer. All distance computations
142  * are asymmetric, so the encoded vectors are decoded and approximate
143  * distances are computed.
144  */
145 
147  ScalarQuantizer sq;
148 
149  IndexIVFScalarQuantizer(Index *quantizer, size_t d, size_t nlist,
151  MetricType metric = METRIC_L2);
152 
154 
155  void train_residual(idx_t n, const float* x) override;
156 
157  void add_with_ids(idx_t n, const float* x, const long* xids) override;
158 
159  void search_preassigned (idx_t n, const float *x, idx_t k,
160  const idx_t *assign,
161  const float *centroid_dis,
162  float *distances, idx_t *labels,
163  bool store_pairs) const override;
164 
165  void reconstruct_from_offset (long list_no, long offset,
166  float* recons) const override;
167 
168 };
169 
170 
171 }
172 
173 
174 #endif
size_t code_size
bytes per vector
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void train_residual(idx_t n, const float *x) override
alternate optimization of reconstruction error
same, shared range for all dimensions
void reconstruct_from_offset(long list_no, long offset, float *recons) const override
void add(idx_t n, const float *x) override
virtual float compute_code_distance(const uint8_t *code1, const uint8_t *code2)=0
code-to-code distance computation
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:34
void reset() override
removes all elements from the database.
void add_with_ids(idx_t n, const float *x, const long *xids) override
int d
vector dimension
Definition: Index.h:64
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
ScalarQuantizer sq
Used to encode the vectors.
long idx_t
all indices are this type
Definition: Index.h:62
void search_preassigned(idx_t n, const float *x, idx_t k, const idx_t *assign, const float *centroid_dis, float *distances, idx_t *labels, bool store_pairs) const override
[mean - std * rs, mean + std * rs]
void decode(const uint8_t *code, float *x, size_t n) const
decode a vector from a given code (or n vectors if third argument)
void compute_codes(const float *x, uint8_t *codes, size_t n) const
same as compute_code for several vectors
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
void reconstruct(idx_t key, float *recons) const override
[min - rs*(max-min), max + rs*(max-min)]
std::vector< float > trained
trained values (including the range)
Index * quantizer
quantizer that maps vectors to inverted lists
Definition: IndexIVF.h:33
void train(idx_t n, const float *x) override
size_t d
dimension of input vectors
size_t nlist
number of possible key values
Definition: IndexIVF.h:34
MetricType
Some algorithms support both an inner product version and a L2 search version.
Definition: Index.h:43
virtual float compute_distance(const float *x, const uint8_t *code)=0
vector-to-code distance computation