Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/hoss/faiss/InvertedLists.h
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 // -*- c++ -*-
9 
10 #ifndef FAISS_INVERTEDLISTS_IVF_H
11 #define FAISS_INVERTEDLISTS_IVF_H
12 
13 /**
14  * Definition of inverted lists + a few common classes that implement
15  * the interface.
16  */
17 
18 #include <vector>
19 #include "Index.h"
20 
21 
22 namespace faiss {
23 
24 /** Table of inverted lists
25  * multithreading rules:
26  * - concurrent read accesses are allowed
27  * - concurrent update accesses are allowed
28  * - for resize and add_entries, only concurrent access to different lists
29  * are allowed
30  */
31 struct InvertedLists {
32  typedef Index::idx_t idx_t;
33 
34  size_t nlist; ///< number of possible key values
35  size_t code_size; ///< code size per vector in bytes
36 
37  InvertedLists (size_t nlist, size_t code_size);
38 
39  /*************************
40  * Read only functions */
41 
42  /// get the size of a list
43  virtual size_t list_size(size_t list_no) const = 0;
44 
45  /** get the codes for an inverted list
46  * must be released by release_codes
47  *
48  * @return codes size list_size * code_size
49  */
50  virtual const uint8_t * get_codes (size_t list_no) const = 0;
51 
52  /** get the ids for an inverted list
53  * must be released by release_ids
54  *
55  * @return ids size list_size
56  */
57  virtual const idx_t * get_ids (size_t list_no) const = 0;
58 
59  /// release codes returned by get_codes (default implementation is nop
60  virtual void release_codes (size_t list_no, const uint8_t *codes) const;
61 
62  /// release ids returned by get_ids
63  virtual void release_ids (size_t list_no, const idx_t *ids) const;
64 
65  /// @return a single id in an inverted list
66  virtual idx_t get_single_id (size_t list_no, size_t offset) const;
67 
68  /// @return a single code in an inverted list
69  /// (should be deallocated with release_codes)
70  virtual const uint8_t * get_single_code (
71  size_t list_no, size_t offset) const;
72 
73  /// prepare the following lists (default does nothing)
74  /// a list can be -1 hence the signed long
75  virtual void prefetch_lists (const idx_t *list_nos, int nlist) const;
76 
77  /*************************
78  * writing functions */
79 
80  /// add one entry to an inverted list
81  virtual size_t add_entry (size_t list_no, idx_t theid,
82  const uint8_t *code);
83 
84  virtual size_t add_entries (
85  size_t list_no, size_t n_entry,
86  const idx_t* ids, const uint8_t *code) = 0;
87 
88  virtual void update_entry (size_t list_no, size_t offset,
89  idx_t id, const uint8_t *code);
90 
91  virtual void update_entries (size_t list_no, size_t offset, size_t n_entry,
92  const idx_t *ids, const uint8_t *code) = 0;
93 
94  virtual void resize (size_t list_no, size_t new_size) = 0;
95 
96  virtual void reset ();
97 
98  /// move all entries from oivf (empty on output)
99  void merge_from (InvertedLists *oivf, size_t add_id);
100 
101  virtual ~InvertedLists ();
102 
103  /*************************
104  * statistics */
105 
106  /// 1= perfectly balanced, >1: imbalanced
107  double imbalance_factor () const;
108 
109  /// display some stats about the inverted lists
110  void print_stats () const;
111 
112  /// sum up list sizes
113  size_t compute_ntotal () const;
114 
115  /**************************************
116  * Scoped inverted lists (for automatic deallocation)
117  *
118  * instead of writing:
119  *
120  * uint8_t * codes = invlists->get_codes (10);
121  * ... use codes
122  * invlists->release_codes(10, codes)
123  *
124  * write:
125  *
126  * ScopedCodes codes (invlists, 10);
127  * ... use codes.get()
128  * // release called automatically when codes goes out of scope
129  *
130  * the following function call also works:
131  *
132  * foo (123, ScopedCodes (invlists, 10).get(), 456);
133  *
134  */
135 
136  struct ScopedIds {
137  const InvertedLists *il;
138  const idx_t *ids;
139  size_t list_no;
140 
141  ScopedIds (const InvertedLists *il, size_t list_no):
142  il (il), ids (il->get_ids (list_no)), list_no (list_no)
143  {}
144 
145  const idx_t *get() {return ids; }
146 
147  idx_t operator [] (size_t i) const {
148  return ids[i];
149  }
150 
151  ~ScopedIds () {
152  il->release_ids (list_no, ids);
153  }
154  };
155 
156  struct ScopedCodes {
157  const InvertedLists *il;
158  const uint8_t *codes;
159  size_t list_no;
160 
161  ScopedCodes (const InvertedLists *il, size_t list_no):
162  il (il), codes (il->get_codes (list_no)), list_no (list_no)
163  {}
164 
165  ScopedCodes (const InvertedLists *il, size_t list_no, size_t offset):
166  il (il), codes (il->get_single_code (list_no, offset)),
167  list_no (list_no)
168  {}
169 
170  const uint8_t *get() {return codes; }
171 
172  ~ScopedCodes () {
173  il->release_codes (list_no, codes);
174  }
175  };
176 
177 
178 };
179 
180 
181 /// simple (default) implementation as an array of inverted lists
183  std::vector < std::vector<uint8_t> > codes; // binary codes, size nlist
184  std::vector < std::vector<idx_t> > ids; ///< Inverted lists for indexes
185 
186  ArrayInvertedLists (size_t nlist, size_t code_size);
187 
188  size_t list_size(size_t list_no) const override;
189  const uint8_t * get_codes (size_t list_no) const override;
190  const idx_t * get_ids (size_t list_no) const override;
191 
192  size_t add_entries (
193  size_t list_no, size_t n_entry,
194  const idx_t* ids, const uint8_t *code) override;
195 
196  void update_entries (size_t list_no, size_t offset, size_t n_entry,
197  const idx_t *ids, const uint8_t *code) override;
198 
199  void resize (size_t list_no, size_t new_size) override;
200 
201  virtual ~ArrayInvertedLists ();
202 };
203 
204 /*****************************************************************
205  * Meta-inverted lists
206  *
207  * About terminology: the inverted lists are seen as a sparse matrix,
208  * that can be stacked horizontally, vertically and sliced.
209  *****************************************************************/
210 
212 
213  ReadOnlyInvertedLists (size_t nlist, size_t code_size):
214  InvertedLists (nlist, code_size) {}
215 
216  size_t add_entries (
217  size_t list_no, size_t n_entry,
218  const idx_t* ids, const uint8_t *code) override;
219 
220  void update_entries (size_t list_no, size_t offset, size_t n_entry,
221  const idx_t *ids, const uint8_t *code) override;
222 
223  void resize (size_t list_no, size_t new_size) override;
224 
225 };
226 
227 
228 /// Horizontal stack of inverted lists
230 
231  std::vector<const InvertedLists *>ils;
232 
233  /// build InvertedLists by concatenating nil of them
234  HStackInvertedLists (int nil, const InvertedLists **ils);
235 
236  size_t list_size(size_t list_no) const override;
237  const uint8_t * get_codes (size_t list_no) const override;
238  const idx_t * get_ids (size_t list_no) const override;
239 
240  void prefetch_lists (const idx_t *list_nos, int nlist) const override;
241 
242  void release_codes (size_t list_no, const uint8_t *codes) const override;
243  void release_ids (size_t list_no, const idx_t *ids) const override;
244 
245  idx_t get_single_id (size_t list_no, size_t offset) const override;
246 
247  const uint8_t * get_single_code (
248  size_t list_no, size_t offset) const override;
249 
250 };
251 
253 
254 
255 /// vertical slice of indexes in another InvertedLists
257  const InvertedLists *il;
258  idx_t i0, i1;
259 
260  SliceInvertedLists(const InvertedLists *il, idx_t i0, idx_t i1);
261 
262  size_t list_size(size_t list_no) const override;
263  const uint8_t * get_codes (size_t list_no) const override;
264  const idx_t * get_ids (size_t list_no) const override;
265 
266  void release_codes (size_t list_no, const uint8_t *codes) const override;
267  void release_ids (size_t list_no, const idx_t *ids) const override;
268 
269  idx_t get_single_id (size_t list_no, size_t offset) const override;
270 
271  const uint8_t * get_single_code (
272  size_t list_no, size_t offset) const override;
273 
274  void prefetch_lists (const idx_t *list_nos, int nlist) const override;
275 };
276 
277 
279  std::vector<const InvertedLists *>ils;
280  std::vector<idx_t> cumsz;
281 
282  /// build InvertedLists by concatenating nil of them
283  VStackInvertedLists (int nil, const InvertedLists **ils);
284 
285  size_t list_size(size_t list_no) const override;
286  const uint8_t * get_codes (size_t list_no) const override;
287  const idx_t * get_ids (size_t list_no) const override;
288 
289  void release_codes (size_t list_no, const uint8_t *codes) const override;
290  void release_ids (size_t list_no, const idx_t *ids) const override;
291 
292  idx_t get_single_id (size_t list_no, size_t offset) const override;
293 
294  const uint8_t * get_single_code (
295  size_t list_no, size_t offset) const override;
296 
297  void prefetch_lists (const idx_t *list_nos, int nlist) const override;
298 
299 };
300 
301 
302 /** use the first inverted lists if they are non-empty otherwise use the second
303  *
304  * This is useful if il1 has a few inverted lists that are too long,
305  * and that il0 has replacement lists for those, with empty lists for
306  * the others. */
308 
309  const InvertedLists *il0;
310  const InvertedLists *il1;
311 
313  const InvertedLists *il1);
314 
315  size_t list_size(size_t list_no) const override;
316  const uint8_t * get_codes (size_t list_no) const override;
317  const idx_t * get_ids (size_t list_no) const override;
318 
319  void release_codes (size_t list_no, const uint8_t *codes) const override;
320  void release_ids (size_t list_no, const idx_t *ids) const override;
321 
322  idx_t get_single_id (size_t list_no, size_t offset) const override;
323 
324  const uint8_t * get_single_code (
325  size_t list_no, size_t offset) const override;
326 
327  void prefetch_lists (const idx_t *list_nos, int nlist) const override;
328 
329 };
330 
331 } // namespace faiss
332 
333 
334 #endif
const uint8_t * get_codes(size_t list_no) const override
void prefetch_lists(const idx_t *list_nos, int nlist) const override
idx_t get_single_id(size_t list_no, size_t offset) const override
simple (default) implementation as an array of inverted lists
void release_ids(size_t list_no, const idx_t *ids) const override
release ids returned by get_ids
void release_ids(size_t list_no, const idx_t *ids) const override
release ids returned by get_ids
const idx_t * get_ids(size_t list_no) const override
virtual const idx_t * get_ids(size_t list_no) const =0
double imbalance_factor() const
1= perfectly balanced, &gt;1: imbalanced
const idx_t * get_ids(size_t list_no) const override
idx_t get_single_id(size_t list_no, size_t offset) const override
vertical slice of indexes in another InvertedLists
void prefetch_lists(const idx_t *list_nos, int nlist) const override
virtual size_t list_size(size_t list_no) const =0
get the size of a list
size_t list_size(size_t list_no) const override
get the size of a list
const uint8_t * get_single_code(size_t list_no, size_t offset) const override
const uint8_t * get_single_code(size_t list_no, size_t offset) const override
void merge_from(InvertedLists *oivf, size_t add_id)
move all entries from oivf (empty on output)
virtual idx_t get_single_id(size_t list_no, size_t offset) const
long idx_t
all indices are this type
Definition: Index.h:62
size_t code_size
code size per vector in bytes
Definition: InvertedLists.h:35
virtual const uint8_t * get_single_code(size_t list_no, size_t offset) const
VStackInvertedLists(int nil, const InvertedLists **ils)
build InvertedLists by concatenating nil of them
size_t compute_ntotal() const
sum up list sizes
const uint8_t * get_codes(size_t list_no) const override
virtual size_t add_entry(size_t list_no, idx_t theid, const uint8_t *code)
add one entry to an inverted list
const uint8_t * get_codes(size_t list_no) const override
size_t list_size(size_t list_no) const override
get the size of a list
void prefetch_lists(const idx_t *list_nos, int nlist) const override
size_t list_size(size_t list_no) const override
get the size of a list
void prefetch_lists(const idx_t *list_nos, int nlist) const override
HStackInvertedLists(int nil, const InvertedLists **ils)
build InvertedLists by concatenating nil of them
const uint8_t * get_single_code(size_t list_no, size_t offset) const override
Horizontal stack of inverted lists.
idx_t get_single_id(size_t list_no, size_t offset) const override
const uint8_t * get_codes(size_t list_no) const override
const idx_t * get_ids(size_t list_no) const override
size_t list_size(size_t list_no) const override
get the size of a list
size_t nlist
number of possible key values
Definition: InvertedLists.h:34
const uint8_t * get_single_code(size_t list_no, size_t offset) const override
std::vector< std::vector< idx_t > > ids
Inverted lists for indexes.
void release_ids(size_t list_no, const idx_t *ids) const override
release ids returned by get_ids
const idx_t * get_ids(size_t list_no) const override
void print_stats() const
display some stats about the inverted lists
virtual const uint8_t * get_codes(size_t list_no) const =0
virtual void prefetch_lists(const idx_t *list_nos, int nlist) const
virtual void release_ids(size_t list_no, const idx_t *ids) const
release ids returned by get_ids
void release_codes(size_t list_no, const uint8_t *codes) const override
release codes returned by get_codes (default implementation is nop
const uint8_t * get_codes(size_t list_no) const override
void release_ids(size_t list_no, const idx_t *ids) const override
release ids returned by get_ids
void release_codes(size_t list_no, const uint8_t *codes) const override
release codes returned by get_codes (default implementation is nop
size_t list_size(size_t list_no) const override
get the size of a list
virtual void release_codes(size_t list_no, const uint8_t *codes) const
release codes returned by get_codes (default implementation is nop
idx_t get_single_id(size_t list_no, size_t offset) const override
void release_codes(size_t list_no, const uint8_t *codes) const override
release codes returned by get_codes (default implementation is nop
void release_codes(size_t list_no, const uint8_t *codes) const override
release codes returned by get_codes (default implementation is nop
const idx_t * get_ids(size_t list_no) const override