Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/tmp/faiss/AuxIndexStructures.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // -*- c++ -*-
10 
11 // Auxiliary index structures, that are used in indexes but that can
12 // be forward-declared
13 
14 #ifndef FAISS_AUX_INDEX_STRUCTURES_H
15 #define FAISS_AUX_INDEX_STRUCTURES_H
16 
17 #include <stdint.h>
18 
19 #include <vector>
20 #include <unordered_set>
21 
22 
23 #include "Index.h"
24 
25 namespace faiss {
26 
27 /** The objective is to have a simple result structure while
28  * minimizing the number of mem copies in the result. The method
29  * do_allocation can be overloaded to allocate the result tables in
30  * the matrix type of a scripting language like Lua or Python. */
32  size_t nq; ///< nb of queries
33  size_t *lims; ///< size (nq + 1)
34 
35  typedef Index::idx_t idx_t;
36 
37  idx_t *labels; ///< result for query i is labels[lims[i]:lims[i+1]]
38  float *distances; ///< corresponding distances (not sorted)
39 
40  size_t buffer_size; ///< size of the result buffers used
41 
42  /// lims must be allocated on input to range_search.
43  explicit RangeSearchResult (idx_t nq, bool alloc_lims=true);
44 
45  /// called when lims contains the nb of elements result entries
46  /// for each query
47  virtual void do_allocation ();
48 
49  virtual ~RangeSearchResult ();
50 };
51 
52 
53 /** Encapsulates a set of ids to remove. */
54 struct IDSelector {
55  typedef Index::idx_t idx_t;
56  virtual bool is_member (idx_t id) const = 0;
57  virtual ~IDSelector() {}
58 };
59 
60 
61 
62 /** remove ids between [imni, imax) */
64  idx_t imin, imax;
65 
66  IDSelectorRange (idx_t imin, idx_t imax);
67  bool is_member(idx_t id) const override;
68  ~IDSelectorRange() override {}
69 };
70 
71 
72 /** Remove ids from a set. Repetitions of ids in the indices set
73  * passed to the constructor does not hurt performance. The hash
74  * function used for the bloom filter and GCC's implementation of
75  * unordered_set are just the least significant bits of the id. This
76  * works fine for random ids or ids in sequences but will produce many
77  * hash collisions if lsb's are always the same */
79 
80  std::unordered_set<idx_t> set;
81 
82  typedef unsigned char uint8_t;
83  std::vector<uint8_t> bloom; // assumes low bits of id are a good hash value
84  int nbits;
85  idx_t mask;
86 
87  IDSelectorBatch (long n, const idx_t *indices);
88  bool is_member(idx_t id) const override;
89  ~IDSelectorBatch() override {}
90 };
91 
92 
93 // Below are structures used only by Index implementations
94 
95 
96 
97 /** List of temporary buffers used to store results before they are
98  * copied to the RangeSearchResult object. */
99 struct BufferList {
100  typedef Index::idx_t idx_t;
101 
102  // buffer sizes in # entries
103  size_t buffer_size;
104 
105  struct Buffer {
106  idx_t *ids;
107  float *dis;
108  };
109 
110  std::vector<Buffer> buffers;
111  size_t wp; ///< write pointer in the last buffer.
112 
113  explicit BufferList (size_t buffer_size);
114 
115  ~BufferList ();
116 
117  // create a new buffer
118  void append_buffer ();
119 
120  inline void add (idx_t id, float dis)
121  {
122  if (wp == buffer_size) { // need new buffer
123  append_buffer();
124  }
125  Buffer & buf = buffers.back();
126  buf.ids [wp] = id;
127  buf.dis [wp] = dis;
128  wp++;
129  }
130 
131  /// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
132  /// tables dest_ids, dest_dis
133  void copy_range (size_t ofs, size_t n,
134  idx_t * dest_ids, float *dest_dis);
135 
136 };
137 
138 
139 
140 /// the entries in the buffers are split per query
142  RangeSearchResult * res;
143 
144  explicit RangeSearchPartialResult (RangeSearchResult * res_in);
145 
146  /// result structure for a single query
147  struct QueryResult {
148  idx_t qno;
149  size_t nres;
151  inline void add (float dis, idx_t id) {
152  nres++;
153  pres->add (id, dis);
154  }
155  };
156 
157  std::vector<QueryResult> queries;
158 
159  /// begin a new result
160  QueryResult & new_result (idx_t qno);
161 
162  void finalize ();
163 
164  /// called by range_search before do_allocation
165  void set_lims ();
166 
167  /// called by range_search after do_allocation
168  void set_result (bool incremental = false);
169 
170 };
171 
172 /***********************************************************
173  * Abstract I/O objects
174  ***********************************************************/
175 
176 
177 struct IOReader {
178  // name that can be used in error messages
179  std::string name;
180 
181  // fread
182  virtual size_t operator()(
183  void *ptr, size_t size, size_t nitems) = 0;
184 
185  // return a file number that can be memory-mapped
186  virtual int fileno ();
187 
188  virtual ~IOReader() {}
189 };
190 
191 struct IOWriter {
192  // name that can be used in error messages
193  std::string name;
194 
195  // fwrite
196  virtual size_t operator()(
197  const void *ptr, size_t size, size_t nitems) = 0;
198 
199  // return a file number that can be memory-mapped
200  virtual int fileno ();
201 
202  virtual ~IOWriter() {}
203 };
204 
205 
207  std::vector<uint8_t> data;
208  size_t rp = 0;
209  size_t operator()(void *ptr, size_t size, size_t nitems) override;
210 };
211 
213  std::vector<uint8_t> data;
214  size_t operator()(const void *ptr, size_t size, size_t nitems) override;
215 };
216 
217 
218 
219 }; // namespace faiss
220 
221 
222 
223 #endif
size_t nq
nb of queries
result structure for a single query
void copy_range(size_t ofs, size_t n, idx_t *dest_ids, float *dest_dis)
void set_result(bool incremental=false)
called by range_search after do_allocation
void set_lims()
called by range_search before do_allocation
RangeSearchResult(idx_t nq, bool alloc_lims=true)
lims must be allocated on input to range_search.
size_t wp
write pointer in the last buffer.
float * distances
corresponding distances (not sorted)
long idx_t
all indices are this type
Definition: Index.h:64
QueryResult & new_result(idx_t qno)
begin a new result
the entries in the buffers are split per query
size_t buffer_size
size of the result buffers used
size_t * lims
size (nq + 1)
idx_t * labels
result for query i is labels[lims[i]:lims[i+1]]