Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/hoss/faiss/OnDiskInvertedLists.h
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 // -*- c++ -*-
9 
10 #ifndef FAISS_ON_DISK_INVERTED_LISTS_H
11 #define FAISS_ON_DISK_INVERTED_LISTS_H
12 
13 #include <vector>
14 #include <list>
15 
16 #include "IndexIVF.h"
17 
18 namespace faiss {
19 
20 
21 struct LockLevels;
22 
23 /** On-disk storage of inverted lists.
24  *
25  * The data is stored in a mmapped chunk of memory (base ptointer ptr,
26  * size totsize). Each list is a range of memory that contains (object
27  * List) that contains:
28  *
29  * - uint8_t codes[capacity * code_size]
30  * - followed by idx_t ids[capacity]
31  *
32  * in each of the arrays, the size <= capacity first elements are
33  * used, the rest is not initialized.
34  *
35  * Addition and resize are supported by:
36  * - roundind up the capacity of the lists to a power of two
37  * - maintaining a list of empty slots, sorted by size.
38  * - resizing the mmapped block is adjusted as needed.
39  *
40  * An OnDiskInvertedLists is compact if the size == capacity for all
41  * lists and there are no available slots.
42  *
43  * Addition to the invlists is slow. For incremental add it is better
44  * to use a default ArrayInvertedLists object and convert it to an
45  * OnDisk with merge_from.
46  *
47  * When it is known that a set of lists will be accessed, it is useful
48  * to call prefetch_lists, that launches a set of threads to read the
49  * lists in parallel.
50  */
52 
53  struct List {
54  size_t size; // size of inverted list (entries)
55  size_t capacity; // allocated size (entries)
56  size_t offset; // offset in buffer (bytes)
57  List ();
58  };
59 
60  // size nlist
61  std::vector<List> lists;
62 
63  struct Slot {
64  size_t offset; // bytes
65  size_t capacity; // bytes
66  Slot (size_t offset, size_t capacity);
67  Slot ();
68  };
69 
70  // size whatever space remains
71  std::list<Slot> slots;
72 
73  std::string filename;
74  size_t totsize;
75  uint8_t *ptr; // mmap base pointer
76  bool read_only; /// are inverted lists mapped read-only
77 
78  OnDiskInvertedLists (size_t nlist, size_t code_size,
79  const char *filename);
80 
81  size_t list_size(size_t list_no) const override;
82  const uint8_t * get_codes (size_t list_no) const override;
83  const idx_t * get_ids (size_t list_no) const override;
84 
85  size_t add_entries (
86  size_t list_no, size_t n_entry,
87  const idx_t* ids, const uint8_t *code) override;
88 
89  void update_entries (size_t list_no, size_t offset, size_t n_entry,
90  const idx_t *ids, const uint8_t *code) override;
91 
92  void resize (size_t list_no, size_t new_size) override;
93 
94  // copy all inverted lists into *this, in compact form (without
95  // allocating slots)
96  size_t merge_from (const InvertedLists **ils, int n_il, bool verbose=false);
97 
98  /// restrict the inverted lists to l0:l1 without touching the mmapped region
99  void crop_invlists(size_t l0, size_t l1);
100 
101  void prefetch_lists (const idx_t *list_nos, int nlist) const override;
102 
103  virtual ~OnDiskInvertedLists ();
104 
105  // private
106 
107  LockLevels * locks;
108 
109  // encapsulates the threads that are busy prefeteching
110  struct OngoingPrefetch;
111  OngoingPrefetch *pf;
112  int prefetch_nthread;
113 
114  void do_mmap ();
115  void update_totsize (size_t new_totsize);
116  void resize_locked (size_t list_no, size_t new_size);
117  size_t allocate_slot (size_t capacity);
118  void free_slot (size_t offset, size_t capacity);
119 
120  // empty constructor for the I/O functions
122 };
123 
124 
125 } // namespace faiss
126 
127 #endif
void crop_invlists(size_t l0, size_t l1)
restrict the inverted lists to l0:l1 without touching the mmapped region
const idx_t * get_ids(size_t list_no) const override
void prefetch_lists(const idx_t *list_nos, int nlist) const override
size_t code_size
code size per vector in bytes
Definition: InvertedLists.h:35
const uint8_t * get_codes(size_t list_no) const override
size_t list_size(size_t list_no) const override
get the size of a list
size_t nlist
number of possible key values
Definition: InvertedLists.h:34