Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
AuxIndexStructures.h
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved
11 // -*- c++ -*-
12 // Auxiliary index structures, that are used in indexes but that can
13 // be forward-declared
14 
15 #ifndef FAISS_AUX_INDEX_STRUCTURES_H
16 #define FAISS_AUX_INDEX_STRUCTURES_H
17 
18 #include <vector>
19 
20 #if __cplusplus >= 201103L
21 #include <unordered_set>
22 #endif
23 
24 #include <set>
25 
26 
27 #include "Index.h"
28 
29 namespace faiss {
30 
31 /** The objective is to have a simple result structure while
32  * minimizing the number of mem copies in the result. The method
33  * do_allocation can be overloaded to allocate the result tables in
34  * the matrix type of a srcipting language like Lua or Python. */
36  size_t nq; ///< nb of queries
37  size_t *lims; ///< size (nq + 1)
38 
39  typedef Index::idx_t idx_t;
40 
41  idx_t *labels; ///< result for query i is labels[lims[i]:lims[i+1]]
42  float *distances; ///< corresponding distances (not sorted)
43 
44  size_t buffer_size; ///< size of the result buffers used
45 
46  /// lims must be allocated on input to range_search.
47  explicit RangeSearchResult (size_t nq);
48 
49  /// called when lims contains the nb of elements result entries
50  /// for each query
51  virtual void do_allocation ();
52 
53  virtual ~RangeSearchResult ();
54 };
55 
56 
57 /** Encapsulates a set of ids to remove. */
58 struct IDSelector {
59  typedef Index::idx_t idx_t;
60  virtual bool is_member (idx_t id) const = 0;
61  virtual ~IDSelector() {}
62 };
63 
64 
65 
66 /** remove ids between [imni, imax) */
68  idx_t imin, imax;
69 
70  IDSelectorRange (idx_t imin, idx_t imax);
71  virtual bool is_member (idx_t id) const override;
72  virtual ~IDSelectorRange () {}
73 
74 };
75 
76 
77 /** Remove ids from a set. Repetitions of ids in the indices set
78  * passed to the constructor does not hurt performance. The hash
79  * function used for the bloom filter and GCC's implementation of
80  * unordered_set are just the least significant bits of the id. This
81  * works fine for random ids or ids in sequences but will produce many
82  * hash collisions if lsb's are always the same */
84 
85 #if __cplusplus >= 201103L
86  std::unordered_set<idx_t> set;
87 #else
88  std::set<idx_t> set;
89 #endif
90 
91  typedef unsigned char uint8_t;
92  std::vector<uint8_t> bloom; // assumes low bits of id are a good hash value
93  int nbits;
94  idx_t mask;
95 
96  IDSelectorBatch (long n, const idx_t *indices);
97  virtual bool is_member (idx_t id) const override;
98  virtual ~IDSelectorBatch() {}
99 
100 };
101 
102 
103 // Below are structures used only by Index implementations
104 
105 
106 
107 /** List of temporary buffers used to store results before they are
108  * copied to the RangeSearchResult object. */
109 struct BufferList {
110  typedef Index::idx_t idx_t;
111 
112  // buffer sizes in # entries
113  size_t buffer_size;
114 
115  struct Buffer {
116  idx_t *ids;
117  float *dis;
118  };
119 
120  std::vector<Buffer> buffers;
121  size_t wp; ///< write pointer in the last buffer.
122 
123  explicit BufferList (size_t buffer_size);
124 
125  ~BufferList ();
126 
127  // create a new buffer
128  void append_buffer ();
129 
130  inline void add (idx_t id, float dis)
131  {
132  if (wp == buffer_size) { // need new buffer
133  append_buffer();
134  }
135  Buffer & buf = buffers.back();
136  buf.ids [wp] = id;
137  buf.dis [wp] = dis;
138  wp++;
139  }
140 
141  /// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
142  /// tables dest_ids, dest_dis
143  void copy_range (size_t ofs, size_t n,
144  idx_t * dest_ids, float *dest_dis);
145 
146 };
147 
148 
149 
150 /// the entries in the buffers are split per query
152  RangeSearchResult * res;
153 
154  explicit RangeSearchPartialResult (RangeSearchResult * res_in);
155 
156  /// result structure for a single query
157  struct QueryResult {
158  idx_t qno;
159  size_t nres;
161  inline void add (float dis, idx_t id) {
162  nres++;
163  pres->add (id, dis);
164  }
165  };
166 
167  std::vector<QueryResult> queries;
168 
169  /// begin a new result
170  QueryResult & new_result (idx_t qno);
171 
172 
173  void finalize ();
174 
175 
176  /// called by range_search before do_allocation
177  void set_lims ();
178 
179  /// called by range_search after do_allocation
180  void set_result (bool incremental = false);
181 
182 };
183 
184 
185 }; // namespace faiss
186 
187 
188 
189 #endif
size_t nq
nb of queries
result structure for a single query
void copy_range(size_t ofs, size_t n, idx_t *dest_ids, float *dest_dis)
void set_result(bool incremental=false)
called by range_search after do_allocation
void set_lims()
called by range_search before do_allocation
size_t wp
write pointer in the last buffer.
RangeSearchResult(size_t nq)
lims must be allocated on input to range_search.
float * distances
corresponding distances (not sorted)
long idx_t
all indices are this type
Definition: Index.h:64
QueryResult & new_result(idx_t qno)
begin a new result
the entries in the buffers are split per query
size_t buffer_size
size of the result buffers used
size_t * lims
size (nq + 1)
idx_t * labels
result for query i is labels[lims[i]:lims[i+1]]