Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/tmp/faiss/OnDiskInvertedLists.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // -*- c++ -*-
10 
11 #ifndef FAISS_ON_DISK_INVERTED_LISTS_H
12 #define FAISS_ON_DISK_INVERTED_LISTS_H
13 
14 #include <vector>
15 #include <list>
16 
17 #include "IndexIVF.h"
18 
19 namespace faiss {
20 
21 
22 struct LockLevels;
23 
24 /** On-disk storage of inverted lists.
25  *
26  * The data is stored in a mmapped chunk of memory (base ptointer ptr,
27  * size totsize). Each list is a range of memory that contains (object
28  * List) that contains:
29  *
30  * - uint8_t codes[capacity * code_size]
31  * - followed by idx_t ids[capacity]
32  *
33  * in each of the arrays, the size <= capacity first elements are
34  * used, the rest is not initialized.
35  *
36  * Addition and resize are supported by:
37  * - roundind up the capacity of the lists to a power of two
38  * - maintaining a list of empty slots, sorted by size.
39  * - resizing the mmapped block is adjusted as needed.
40  *
41  * An OnDiskInvertedLists is compact if the size == capacity for all
42  * lists and there are no available slots.
43  *
44  * Addition to the invlists is slow. For incremental add it is better
45  * to use a default ArrayInvertedLists object and convert it to an
46  * OnDisk with merge_from.
47  *
48  * When it is known that a set of lists will be accessed, it is useful
49  * to call prefetch_lists, that launches a set of threads to read the
50  * lists in parallel.
51  */
53 
54  struct List {
55  size_t size; // size of inverted list (entries)
56  size_t capacity; // allocated size (entries)
57  size_t offset; // offset in buffer (bytes)
58  List ();
59  };
60 
61  std::vector<List> lists;
62 
63  struct Slot {
64  size_t offset; // bytes
65  size_t capacity; // bytes
66  Slot (size_t offset, size_t capacity);
67  Slot ();
68  };
69 
70  std::list<Slot> slots;
71 
72  std::string filename;
73  size_t totsize;
74  uint8_t *ptr; // mmap base pointer
75  bool read_only; /// are inverted lists mapped read-only
76 
77  OnDiskInvertedLists (size_t nlist, size_t code_size,
78  const char *filename);
79 
80  size_t list_size(size_t list_no) const override;
81  const uint8_t * get_codes (size_t list_no) const override;
82  const idx_t * get_ids (size_t list_no) const override;
83 
84  size_t add_entries (
85  size_t list_no, size_t n_entry,
86  const idx_t* ids, const uint8_t *code) override;
87 
88  void update_entries (size_t list_no, size_t offset, size_t n_entry,
89  const idx_t *ids, const uint8_t *code) override;
90 
91  void resize (size_t list_no, size_t new_size) override;
92 
93  // copy all inverted lists into *this, in compact form (without
94  // allocating slots)
95  size_t merge_from (const InvertedLists **ils, int n_il);
96 
97  void prefetch_lists (const long *list_nos, int nlist) const override;
98 
99  virtual ~OnDiskInvertedLists ();
100 
101  // private
102 
103  LockLevels * locks;
104 
105  // encapsulates the threads that are busy prefeteching
106  struct OngoingPrefetch;
107  OngoingPrefetch *pf;
108 
109  void do_mmap ();
110  void update_totsize (size_t new_totsize);
111  void resize_locked (size_t list_no, size_t new_size);
112  size_t allocate_slot (size_t capacity);
113  void free_slot (size_t offset, size_t capacity);
114 
115  // empty constructor for the I/O functions
117 };
118 
119 
120 } // namespace faiss
121 
122 #endif
const idx_t * get_ids(size_t list_no) const override
size_t code_size
code size per vector in bytes
Definition: InvertedLists.h:36
const uint8_t * get_codes(size_t list_no) const override
size_t list_size(size_t list_no) const override
get the size of a list
size_t nlist
number of possible key values
Definition: InvertedLists.h:35
void prefetch_lists(const long *list_nos, int nlist) const override