Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/AuxIndexStructures.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the CC-by-NC license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved
10 // -*- c++ -*-
11 // Auxiliary index structures, that are used in indexes but that can
12 // be forward-declared
13 
14 #ifndef FAISS_AUX_INDEX_STRUCTURES_H
15 #define FAISS_AUX_INDEX_STRUCTURES_H
16 
17 #include <vector>
18 
19 #if __cplusplus >= 201103L
20 #include <unordered_set>
21 #endif
22 
23 #include <set>
24 
25 
26 #include "Index.h"
27 
28 namespace faiss {
29 
30 /** The objective is to have a simple result structure while
31  * minimizing the number of mem copies in the result. The method
32  * do_allocation can be overloaded to allocate the result tables in
33  * the matrix type of a srcipting language like Lua or Python. */
35  size_t nq; ///< nb of queries
36  size_t *lims; ///< size (nq + 1)
37 
38  typedef Index::idx_t idx_t;
39 
40  idx_t *labels; ///< result for query i is labels[lims[i]:lims[i+1]]
41  float *distances; ///< corresponding distances (not sorted)
42 
43  size_t buffer_size; ///< size of the result buffers used
44 
45  /// lims must be allocated on input to range_search.
46  explicit RangeSearchResult (size_t nq);
47 
48  /// called when lims contains the nb of elements result entries
49  /// for each query
50  virtual void do_allocation ();
51 
52  virtual ~RangeSearchResult ();
53 };
54 
55 
56 /** Encapsulates a set of ids to remove. */
57 struct IDSelector {
58  typedef Index::idx_t idx_t;
59  virtual bool is_member (idx_t id) const = 0;
60  virtual ~IDSelector() {}
61 };
62 
63 
64 
65 /** remove ids between [imni, imax) */
67  idx_t imin, imax;
68 
69  IDSelectorRange (idx_t imin, idx_t imax);
70  bool is_member(idx_t id) const override;
71  ~IDSelectorRange() override {}
72 };
73 
74 
75 /** Remove ids from a set. Repetitions of ids in the indices set
76  * passed to the constructor does not hurt performance. The hash
77  * function used for the bloom filter and GCC's implementation of
78  * unordered_set are just the least significant bits of the id. This
79  * works fine for random ids or ids in sequences but will produce many
80  * hash collisions if lsb's are always the same */
82 
83 #if __cplusplus >= 201103L
84  std::unordered_set<idx_t> set;
85 #else
86  std::set<idx_t> set;
87 #endif
88 
89  typedef unsigned char uint8_t;
90  std::vector<uint8_t> bloom; // assumes low bits of id are a good hash value
91  int nbits;
92  idx_t mask;
93 
94  IDSelectorBatch (long n, const idx_t *indices);
95  bool is_member(idx_t id) const override;
96  ~IDSelectorBatch() override {}
97 };
98 
99 
100 // Below are structures used only by Index implementations
101 
102 
103 
104 /** List of temporary buffers used to store results before they are
105  * copied to the RangeSearchResult object. */
106 struct BufferList {
107  typedef Index::idx_t idx_t;
108 
109  // buffer sizes in # entries
110  size_t buffer_size;
111 
112  struct Buffer {
113  idx_t *ids;
114  float *dis;
115  };
116 
117  std::vector<Buffer> buffers;
118  size_t wp; ///< write pointer in the last buffer.
119 
120  explicit BufferList (size_t buffer_size);
121 
122  ~BufferList ();
123 
124  // create a new buffer
125  void append_buffer ();
126 
127  inline void add (idx_t id, float dis)
128  {
129  if (wp == buffer_size) { // need new buffer
130  append_buffer();
131  }
132  Buffer & buf = buffers.back();
133  buf.ids [wp] = id;
134  buf.dis [wp] = dis;
135  wp++;
136  }
137 
138  /// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
139  /// tables dest_ids, dest_dis
140  void copy_range (size_t ofs, size_t n,
141  idx_t * dest_ids, float *dest_dis);
142 
143 };
144 
145 
146 
147 /// the entries in the buffers are split per query
149  RangeSearchResult * res;
150 
151  explicit RangeSearchPartialResult (RangeSearchResult * res_in);
152 
153  /// result structure for a single query
154  struct QueryResult {
155  idx_t qno;
156  size_t nres;
158  inline void add (float dis, idx_t id) {
159  nres++;
160  pres->add (id, dis);
161  }
162  };
163 
164  std::vector<QueryResult> queries;
165 
166  /// begin a new result
167  QueryResult & new_result (idx_t qno);
168 
169 
170  void finalize ();
171 
172 
173  /// called by range_search before do_allocation
174  void set_lims ();
175 
176  /// called by range_search after do_allocation
177  void set_result (bool incremental = false);
178 
179 };
180 
181 
182 }; // namespace faiss
183 
184 
185 
186 #endif
size_t nq
nb of queries
result structure for a single query
void copy_range(size_t ofs, size_t n, idx_t *dest_ids, float *dest_dis)
void set_result(bool incremental=false)
called by range_search after do_allocation
void set_lims()
called by range_search before do_allocation
size_t wp
write pointer in the last buffer.
RangeSearchResult(size_t nq)
lims must be allocated on input to range_search.
float * distances
corresponding distances (not sorted)
long idx_t
all indices are this type
Definition: Index.h:62
QueryResult & new_result(idx_t qno)
begin a new result
the entries in the buffers are split per query
size_t buffer_size
size of the result buffers used
size_t * lims
size (nq + 1)
idx_t * labels
result for query i is labels[lims[i]:lims[i+1]]