Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/AuxIndexStructures.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved
10 // -*- c++ -*-
11 // Auxiliary index structures, that are used in indexes but that can
12 // be forward-declared
13 
14 #ifndef FAISS_AUX_INDEX_STRUCTURES_H
15 #define FAISS_AUX_INDEX_STRUCTURES_H
16 
17 #include <vector>
18 #include <unordered_set>
19 
20 
21 #include "Index.h"
22 
23 namespace faiss {
24 
25 /** The objective is to have a simple result structure while
26  * minimizing the number of mem copies in the result. The method
27  * do_allocation can be overloaded to allocate the result tables in
28  * the matrix type of a scripting language like Lua or Python. */
30  size_t nq; ///< nb of queries
31  size_t *lims; ///< size (nq + 1)
32 
33  typedef Index::idx_t idx_t;
34 
35  idx_t *labels; ///< result for query i is labels[lims[i]:lims[i+1]]
36  float *distances; ///< corresponding distances (not sorted)
37 
38  size_t buffer_size; ///< size of the result buffers used
39 
40  /// lims must be allocated on input to range_search.
41  explicit RangeSearchResult (idx_t nq, bool alloc_lims=true);
42 
43  /// called when lims contains the nb of elements result entries
44  /// for each query
45  virtual void do_allocation ();
46 
47  virtual ~RangeSearchResult ();
48 };
49 
50 
51 /** Encapsulates a set of ids to remove. */
52 struct IDSelector {
53  typedef Index::idx_t idx_t;
54  virtual bool is_member (idx_t id) const = 0;
55  virtual ~IDSelector() {}
56 };
57 
58 
59 
60 /** remove ids between [imni, imax) */
62  idx_t imin, imax;
63 
64  IDSelectorRange (idx_t imin, idx_t imax);
65  bool is_member(idx_t id) const override;
66  ~IDSelectorRange() override {}
67 };
68 
69 
70 /** Remove ids from a set. Repetitions of ids in the indices set
71  * passed to the constructor does not hurt performance. The hash
72  * function used for the bloom filter and GCC's implementation of
73  * unordered_set are just the least significant bits of the id. This
74  * works fine for random ids or ids in sequences but will produce many
75  * hash collisions if lsb's are always the same */
77 
78  std::unordered_set<idx_t> set;
79 
80  typedef unsigned char uint8_t;
81  std::vector<uint8_t> bloom; // assumes low bits of id are a good hash value
82  int nbits;
83  idx_t mask;
84 
85  IDSelectorBatch (long n, const idx_t *indices);
86  bool is_member(idx_t id) const override;
87  ~IDSelectorBatch() override {}
88 };
89 
90 
91 // Below are structures used only by Index implementations
92 
93 
94 
95 /** List of temporary buffers used to store results before they are
96  * copied to the RangeSearchResult object. */
97 struct BufferList {
98  typedef Index::idx_t idx_t;
99 
100  // buffer sizes in # entries
101  size_t buffer_size;
102 
103  struct Buffer {
104  idx_t *ids;
105  float *dis;
106  };
107 
108  std::vector<Buffer> buffers;
109  size_t wp; ///< write pointer in the last buffer.
110 
111  explicit BufferList (size_t buffer_size);
112 
113  ~BufferList ();
114 
115  // create a new buffer
116  void append_buffer ();
117 
118  inline void add (idx_t id, float dis)
119  {
120  if (wp == buffer_size) { // need new buffer
121  append_buffer();
122  }
123  Buffer & buf = buffers.back();
124  buf.ids [wp] = id;
125  buf.dis [wp] = dis;
126  wp++;
127  }
128 
129  /// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
130  /// tables dest_ids, dest_dis
131  void copy_range (size_t ofs, size_t n,
132  idx_t * dest_ids, float *dest_dis);
133 
134 };
135 
136 
137 
138 /// the entries in the buffers are split per query
140  RangeSearchResult * res;
141 
142  explicit RangeSearchPartialResult (RangeSearchResult * res_in);
143 
144  /// result structure for a single query
145  struct QueryResult {
146  idx_t qno;
147  size_t nres;
149  inline void add (float dis, idx_t id) {
150  nres++;
151  pres->add (id, dis);
152  }
153  };
154 
155  std::vector<QueryResult> queries;
156 
157  /// begin a new result
158  QueryResult & new_result (idx_t qno);
159 
160 
161  void finalize ();
162 
163 
164  /// called by range_search before do_allocation
165  void set_lims ();
166 
167  /// called by range_search after do_allocation
168  void set_result (bool incremental = false);
169 
170 };
171 
172 
173 }; // namespace faiss
174 
175 
176 
177 #endif
size_t nq
nb of queries
result structure for a single query
void copy_range(size_t ofs, size_t n, idx_t *dest_ids, float *dest_dis)
void set_result(bool incremental=false)
called by range_search after do_allocation
void set_lims()
called by range_search before do_allocation
RangeSearchResult(idx_t nq, bool alloc_lims=true)
lims must be allocated on input to range_search.
size_t wp
write pointer in the last buffer.
float * distances
corresponding distances (not sorted)
long idx_t
all indices are this type
Definition: Index.h:62
QueryResult & new_result(idx_t qno)
begin a new result
the entries in the buffers are split per query
size_t buffer_size
size of the result buffers used
size_t * lims
size (nq + 1)
idx_t * labels
result for query i is labels[lims[i]:lims[i+1]]