Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/hoss/faiss/IndexScalarQuantizer.h
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 // -*- c++ -*-
9 
10 #ifndef FAISS_INDEX_SCALAR_QUANTIZER_H
11 #define FAISS_INDEX_SCALAR_QUANTIZER_H
12 
13 #include <stdint.h>
14 
15 
16 #include <vector>
17 
18 
19 #include "IndexIVF.h"
20 
21 
22 namespace faiss {
23 
24 /**
25  * The uniform quantizer has a range [vmin, vmax]. The range can be
26  * the same for all dimensions (uniform) or specific per dimension
27  * (default).
28  */
29 
30 struct SQDistanceComputer;
31 
33 
35  QT_8bit, ///< 8 bits per component
36  QT_4bit, ///< 4 bits per component
37  QT_8bit_uniform, ///< same, shared range for all dimensions
38  QT_4bit_uniform,
39  QT_fp16,
40  QT_8bit_direct, /// fast indexing of uint8s
41  QT_6bit, ///< 6 bits per component
42  };
43 
44  QuantizerType qtype;
45 
46  /** The uniform encoder can estimate the range of representable
47  * values of the unform encoder using different statistics. Here
48  * rs = rangestat_arg */
49 
50  // rangestat_arg.
51  enum RangeStat {
52  RS_minmax, ///< [min - rs*(max-min), max + rs*(max-min)]
53  RS_meanstd, ///< [mean - std * rs, mean + std * rs]
54  RS_quantiles, ///< [Q(rs), Q(1-rs)]
55  RS_optim, ///< alternate optimization of reconstruction error
56  };
57 
58  RangeStat rangestat;
59  float rangestat_arg;
60 
61  /// dimension of input vectors
62  size_t d;
63 
64  /// bytes per vector
65  size_t code_size;
66 
67  /// trained values (including the range)
68  std::vector<float> trained;
69 
70  ScalarQuantizer (size_t d, QuantizerType qtype);
71  ScalarQuantizer ();
72 
73  void train (size_t n, const float *x);
74 
75 
76  /// same as compute_code for several vectors
77  void compute_codes (const float * x,
78  uint8_t * codes,
79  size_t n) const ;
80 
81  /// decode a vector from a given code (or n vectors if third argument)
82  void decode (const uint8_t *code, float *x, size_t n) const;
83 
84 
85  SQDistanceComputer *get_distance_computer (MetricType metric = METRIC_L2)
86  const;
87 
88 };
89 
90 struct DistanceComputer;
91 
93  /// Used to encode the vectors
95 
96  /// Codes. Size ntotal * pq.code_size
97  std::vector<uint8_t> codes;
98 
99  size_t code_size;
100 
101  /** Constructor.
102  *
103  * @param d dimensionality of the input vectors
104  * @param M number of subquantizers
105  * @param nbits number of bit per subvector index
106  */
107  IndexScalarQuantizer (int d,
109  MetricType metric = METRIC_L2);
110 
112 
113  void train(idx_t n, const float* x) override;
114 
115  void add(idx_t n, const float* x) override;
116 
117  void search(
118  idx_t n,
119  const float* x,
120  idx_t k,
121  float* distances,
122  idx_t* labels) const override;
123 
124  void reset() override;
125 
126  void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
127 
128  void reconstruct(idx_t key, float* recons) const override;
129 
130  DistanceComputer *get_distance_computer () const;
131 
132 };
133 
134 
135  /** An IVF implementation where the components of the residuals are
136  * encoded with a scalar uniform quantizer. All distance computations
137  * are asymmetric, so the encoded vectors are decoded and approximate
138  * distances are computed.
139  */
140 
142  ScalarQuantizer sq;
143  bool by_residual;
144 
145  IndexIVFScalarQuantizer(Index *quantizer, size_t d, size_t nlist,
147  MetricType metric = METRIC_L2);
148 
150 
151  void train_residual(idx_t n, const float* x) override;
152 
153  void encode_vectors(idx_t n, const float* x,
154  const idx_t *list_nos,
155  uint8_t * codes) const override;
156 
157  void add_with_ids(idx_t n, const float* x, const long* xids) override;
158 
159  InvertedListScanner *get_InvertedListScanner (bool store_pairs)
160  const override;
161 
162 
163  void reconstruct_from_offset (long list_no, long offset,
164  float* recons) const override;
165 
166 };
167 
168 
169 }
170 
171 
172 #endif
void encode_vectors(idx_t n, const float *x, const idx_t *list_nos, uint8_t *codes) const override
size_t code_size
bytes per vector
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void train_residual(idx_t n, const float *x) override
alternate optimization of reconstruction error
same, shared range for all dimensions
void reconstruct_from_offset(long list_no, long offset, float *recons) const override
void add(idx_t n, const float *x) override
void reset() override
removes all elements from the database.
void add_with_ids(idx_t n, const float *x, const long *xids) override
default implementation that calls encode_vectors
int d
vector dimension
Definition: Index.h:66
long idx_t
all indices are this type
Definition: Index.h:62
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
ScalarQuantizer sq
Used to encode the vectors.
[mean - std * rs, mean + std * rs]
void decode(const uint8_t *code, float *x, size_t n) const
decode a vector from a given code (or n vectors if third argument)
InvertedListScanner * get_InvertedListScanner(bool store_pairs) const override
get a scanner for this index (store_pairs means ignore labels)
void compute_codes(const float *x, uint8_t *codes, size_t n) const
same as compute_code for several vectors
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
void reconstruct(idx_t key, float *recons) const override
[min - rs*(max-min), max + rs*(max-min)]
std::vector< float > trained
trained values (including the range)
Index * quantizer
quantizer that maps vectors to inverted lists
Definition: IndexIVF.h:32
void train(idx_t n, const float *x) override
size_t d
dimension of input vectors
size_t nlist
number of possible key values
Definition: IndexIVF.h:33
MetricType
Some algorithms support both an inner product version and a L2 search version.
Definition: Index.h:44