Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/tmp/faiss/IndexBinaryIVF.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // -*- c++ -*-
10 
11 #ifndef FAISS_INDEX_BINARY_IVF_H
12 #define FAISS_INDEX_BINARY_IVF_H
13 
14 
15 #include <vector>
16 
17 #include "IndexBinary.h"
18 #include "IndexIVF.h"
19 #include "Clustering.h"
20 #include "Heap.h"
21 
22 
23 namespace faiss {
24 
25 struct BinaryInvertedListScanner;
26 
27 /** Index based on a inverted file (IVF)
28  *
29  * In the inverted file, the quantizer (an IndexBinary instance) provides a
30  * quantization index for each vector to be added. The quantization
31  * index maps to a list (aka inverted list or posting list), where the
32  * id of the vector is stored.
33  *
34  * Otherwise the object is similar to the IndexIVF
35  */
37  /// Acess to the actual data
39  bool own_invlists;
40 
41  size_t nprobe; ///< number of probes at query time
42  size_t max_codes; ///< max nb of codes to visit to do a query
43 
44  /** Select between using a heap or counting to select the k smallest values
45  * when scanning inverted lists.
46  */
47  bool use_heap = true;
48 
49  /// map for direct access to the elements. Enables reconstruct().
51  std::vector<long> direct_map;
52 
53  IndexBinary *quantizer; ///< quantizer that maps vectors to inverted lists
54  size_t nlist; ///< number of possible key values
55 
56  bool own_fields; ///< whether object owns the quantizer
57 
58  ClusteringParameters cp; ///< to override default clustering params
59  Index *clustering_index; ///< to override index used during clustering
60 
61  /// Trains the quantizer and calls train_residual to train sub-quantizers
62  void train_q1(size_t n, const uint8_t *x, bool verbose);
63 
64  /** The Inverted file takes a quantizer (an IndexBinary) on input,
65  * which implements the function mapping a vector to a list
66  * identifier. The pointer is borrowed: the quantizer should not
67  * be deleted while the IndexBinaryIVF is in use.
68  */
69  IndexBinaryIVF(IndexBinary *quantizer, size_t d, size_t nlist);
70 
72 
73  ~IndexBinaryIVF() override;
74 
75  void reset() override;
76 
77  /// Trains the quantizer and calls train_residual to train sub-quantizers
78  void train(idx_t n, const uint8_t *x) override;
79 
80  /// Quantizes x and calls add_with_key
81  void add(idx_t n, const uint8_t *x) override;
82 
83  void add_with_ids(idx_t n, const uint8_t *x, const long *xids) override;
84 
85  /// same as add_with_ids, with precomputed coarse quantizer
86  void add_core (idx_t n, const uint8_t * x, const long *xids,
87  const long *precomputed_idx);
88 
89  /** Search a set of vectors, that are pre-quantized by the IVF
90  * quantizer. Fill in the corresponding heaps with the query
91  * results. search() calls this.
92  *
93  * @param n nb of vectors to query
94  * @param x query vectors, size nx * d
95  * @param assign coarse quantization indices, size nx * nprobe
96  * @param centroid_dis
97  * distances to coarse centroids, size nx * nprobe
98  * @param distance
99  * output distances, size n * k
100  * @param labels output labels, size n * k
101  * @param store_pairs store inv list index + inv list offset
102  * instead in upper/lower 32 bit of result,
103  * instead of ids (used for reranking).
104  * @param params used to override the object's search parameters
105  */
106  void search_preassigned(idx_t n, const uint8_t *x, idx_t k,
107  const idx_t *assign,
108  const int32_t *centroid_dis,
109  int32_t *distances, idx_t *labels,
110  bool store_pairs,
111  const IVFSearchParameters *params=nullptr
112  ) const;
113 
114  virtual BinaryInvertedListScanner *get_InvertedListScanner (
115  bool store_pairs=false) const;
116 
117  /** assign the vectors, then call search_preassign */
118  virtual void search(idx_t n, const uint8_t *x, idx_t k,
119  int32_t *distances, idx_t *labels) const override;
120 
121  void reconstruct(idx_t key, uint8_t *recons) const override;
122 
123  /** Reconstruct a subset of the indexed vectors.
124  *
125  * Overrides default implementation to bypass reconstruct() which requires
126  * direct_map to be maintained.
127  *
128  * @param i0 first vector to reconstruct
129  * @param ni nb of vectors to reconstruct
130  * @param recons output array of reconstructed vectors, size ni * d / 8
131  */
132  void reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const override;
133 
134  /** Similar to search, but also reconstructs the stored vectors (or an
135  * approximation in the case of lossy coding) for the search results.
136  *
137  * Overrides default implementation to avoid having to maintain direct_map
138  * and instead fetch the code offsets through the `store_pairs` flag in
139  * search_preassigned().
140  *
141  * @param recons reconstructed vectors size (n, k, d / 8)
142  */
143  void search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
144  int32_t *distances, idx_t *labels,
145  uint8_t *recons) const override;
146 
147  /** Reconstruct a vector given the location in terms of (inv list index +
148  * inv list offset) instead of the id.
149  *
150  * Useful for reconstructing when the direct_map is not maintained and
151  * the inv list offset is computed by search_preassigned() with
152  * `store_pairs` set.
153  */
154  virtual void reconstruct_from_offset(long list_no, long offset,
155  uint8_t* recons) const;
156 
157 
158  /// Dataset manipulation functions
159 
160  long remove_ids(const IDSelector& sel) override;
161 
162  /** moves the entries from another dataset to self. On output,
163  * other is empty. add_id is added to all moved ids (for
164  * sequential ids, this would be this->ntotal */
165  virtual void merge_from(IndexBinaryIVF& other, idx_t add_id);
166 
167  size_t get_list_size(size_t list_no) const
168  { return invlists->list_size(list_no); }
169 
170  /** intialize a direct map
171  *
172  * @param new_maintain_direct_map if true, create a direct map,
173  * else clear it
174  */
175  void make_direct_map(bool new_maintain_direct_map=true);
176 
177  /// 1= perfectly balanced, >1: imbalanced
178  double imbalance_factor() const;
179 
180  /// display some stats about the inverted lists
181  void print_stats() const;
182 
183  void replace_invlists(InvertedLists *il, bool own=false);
184 };
185 
186 
188 
189  using idx_t = Index::idx_t;
190 
191  /// from now on we handle this query.
192  virtual void set_query (const uint8_t *query_vector) = 0;
193 
194  /// following codes come from this inverted list
195  virtual void set_list (idx_t list_no, uint8_t coarse_dis) = 0;
196 
197  /// compute a single query-to-code distance
198  virtual uint32_t distance_to_code (const uint8_t *code) const = 0;
199 
200  /** compute the distances to codes. (distances, labels) should be
201  * organized as a min- or max-heap
202  *
203  * @param n number of codes to scan
204  * @param codes codes to scan (n * code_size)
205  * @param ids corresponding ids (ignored if store_pairs)
206  * @param distances heap distances (size k)
207  * @param labels heap labels (size k)
208  * @param k heap size
209  */
210  virtual size_t scan_codes (size_t n,
211  const uint8_t *codes,
212  const idx_t *ids,
213  int32_t *distances, idx_t *labels,
214  size_t k) const = 0;
215 
216  virtual ~BinaryInvertedListScanner () {}
217 
218 };
219 
220 
221 } // namespace faiss
222 
223 #endif // FAISS_INDEX_BINARY_IVF_H
virtual void set_query(const uint8_t *query_vector)=0
from now on we handle this query.
size_t nprobe
number of probes at query time
void add_core(idx_t n, const uint8_t *x, const long *xids, const long *precomputed_idx)
same as add_with_ids, with precomputed coarse quantizer
virtual void reconstruct_from_offset(long list_no, long offset, uint8_t *recons) const
virtual void set_list(idx_t list_no, uint8_t coarse_dis)=0
following codes come from this inverted list
bool maintain_direct_map
map for direct access to the elements. Enables reconstruct().
ClusteringParameters cp
to override default clustering params
void search_preassigned(idx_t n, const uint8_t *x, idx_t k, const idx_t *assign, const int32_t *centroid_dis, int32_t *distances, idx_t *labels, bool store_pairs, const IVFSearchParameters *params=nullptr) const
size_t nlist
number of possible key values
virtual size_t list_size(size_t list_no) const =0
get the size of a list
bool verbose
verbosity level
Definition: IndexBinary.h:43
IndexBinary * quantizer
quantizer that maps vectors to inverted lists
bool own_fields
whether object owns the quantizer
virtual void merge_from(IndexBinaryIVF &other, idx_t add_id)
void make_direct_map(bool new_maintain_direct_map=true)
int d
vector dimension
Definition: IndexBinary.h:40
Index * clustering_index
to override index used during clustering
void train_q1(size_t n, const uint8_t *x, bool verbose)
Trains the quantizer and calls train_residual to train sub-quantizers.
virtual uint32_t distance_to_code(const uint8_t *code) const =0
compute a single query-to-code distance
double imbalance_factor() const
1= perfectly balanced, &gt;1: imbalanced
void train(idx_t n, const uint8_t *x) override
Trains the quantizer and calls train_residual to train sub-quantizers.
void assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k=1)
Definition: IndexBinary.cpp:29
void reset() override
Removes all elements from the database.
long idx_t
all indices are this type
Definition: Index.h:64
virtual size_t scan_codes(size_t n, const uint8_t *codes, const idx_t *ids, int32_t *distances, idx_t *labels, size_t k) const =0
size_t max_codes
max nb of codes to visit to do a query
void print_stats() const
display some stats about the inverted lists
void add_with_ids(idx_t n, const uint8_t *x, const long *xids) override
void add(idx_t n, const uint8_t *x) override
Quantizes x and calls add_with_key.
long remove_ids(const IDSelector &sel) override
Dataset manipulation functions.
long idx_t
all indices are this type
Definition: IndexBinary.h:38
virtual void search(idx_t n, const uint8_t *x, idx_t k, int32_t *distances, idx_t *labels) const override
void search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k, int32_t *distances, idx_t *labels, uint8_t *recons) const override
void reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const override
void reconstruct(idx_t key, uint8_t *recons) const override
InvertedLists * invlists
Acess to the actual data.