Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/tmp/faiss/AuxIndexStructures.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // -*- c++ -*-
10 
11 #include "AuxIndexStructures.h"
12 
13 #include "FaissAssert.h"
14 
15 #include <cstring>
16 
17 namespace faiss {
18 
19 
20 /***********************************************************************
21  * RangeSearchResult
22  ***********************************************************************/
23 
24 RangeSearchResult::RangeSearchResult (idx_t nq, bool alloc_lims): nq (nq) {
25  if (alloc_lims) {
26  lims = new size_t [nq + 1];
27  memset (lims, 0, sizeof(*lims) * (nq + 1));
28  } else {
29  lims = nullptr;
30  }
31  labels = nullptr;
32  distances = nullptr;
33  buffer_size = 1024 * 256;
34 }
35 
36 /// called when lims contains the nb of elements result entries
37 /// for each query
39  size_t ofs = 0;
40  for (int i = 0; i < nq; i++) {
41  size_t n = lims[i];
42  lims [i] = ofs;
43  ofs += n;
44  }
45  lims [nq] = ofs;
46  labels = new idx_t [ofs];
47  distances = new float [ofs];
48 }
49 
50 RangeSearchResult::~RangeSearchResult () {
51  delete [] labels;
52  delete [] distances;
53  delete [] lims;
54 }
55 
56 /***********************************************************************
57  * BufferList
58  ***********************************************************************/
59 
60 
61 BufferList::BufferList (size_t buffer_size):
62  buffer_size (buffer_size)
63 {
64  wp = buffer_size;
65 }
66 
67 BufferList::~BufferList ()
68 {
69  for (int i = 0; i < buffers.size(); i++) {
70  delete [] buffers[i].ids;
71  delete [] buffers[i].dis;
72  }
73 }
74 
75 
76 
77 void BufferList::append_buffer ()
78 {
79  Buffer buf = {new idx_t [buffer_size], new float [buffer_size]};
80  buffers.push_back (buf);
81  wp = 0;
82 }
83 
84 /// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
85 /// tables dest_ids, dest_dis
86 void BufferList::copy_range (size_t ofs, size_t n,
87  idx_t * dest_ids, float *dest_dis)
88 {
89  size_t bno = ofs / buffer_size;
90  ofs -= bno * buffer_size;
91  while (n > 0) {
92  size_t ncopy = ofs + n < buffer_size ? n : buffer_size - ofs;
93  Buffer buf = buffers [bno];
94  memcpy (dest_ids, buf.ids + ofs, ncopy * sizeof(*dest_ids));
95  memcpy (dest_dis, buf.dis + ofs, ncopy * sizeof(*dest_dis));
96  dest_ids += ncopy;
97  dest_dis += ncopy;
98  ofs = 0;
99  bno ++;
100  n -= ncopy;
101  }
102 }
103 
104 
105 /***********************************************************************
106  * RangeSearchPartialResult
107  ***********************************************************************/
108 
109 
110 RangeSearchPartialResult::RangeSearchPartialResult (RangeSearchResult * res_in):
111  BufferList(res_in->buffer_size),
112  res(res_in)
113 {}
114 
115 
116 /// begin a new result
117 RangeSearchPartialResult::QueryResult &
119 {
120  QueryResult qres = {qno, 0, this};
121  queries.push_back (qres);
122  return queries.back();
123 }
124 
125 
126 void RangeSearchPartialResult::finalize ()
127 {
128  set_lims ();
129 #pragma omp barrier
130 
131 #pragma omp single
132  res->do_allocation ();
133 
134 #pragma omp barrier
135  set_result ();
136 }
137 
138 
139 /// called by range_search before do_allocation
141 {
142  for (int i = 0; i < queries.size(); i++) {
143  QueryResult & qres = queries[i];
144  res->lims[qres.qno] = qres.nres;
145  }
146 }
147 
148 /// called by range_search after do_allocation
150 {
151  size_t ofs = 0;
152  for (int i = 0; i < queries.size(); i++) {
153  QueryResult & qres = queries[i];
154 
155  copy_range (ofs, qres.nres,
156  res->labels + res->lims[qres.qno],
157  res->distances + res->lims[qres.qno]);
158  if (incremental) {
159  res->lims[qres.qno] += qres.nres;
160  }
161  ofs += qres.nres;
162  }
163 }
164 
165 
166 /***********************************************************************
167  * IDSelectorRange
168  ***********************************************************************/
169 
170 IDSelectorRange::IDSelectorRange (idx_t imin, idx_t imax):
171  imin (imin), imax (imax)
172 {
173 }
174 
175 bool IDSelectorRange::is_member (idx_t id) const
176 {
177  return id >= imin && id < imax;
178 }
179 
180 
181 /***********************************************************************
182  * IDSelectorBatch
183  ***********************************************************************/
184 
185 IDSelectorBatch::IDSelectorBatch (long n, const idx_t *indices)
186 {
187  nbits = 0;
188  while (n > (1L << nbits)) nbits++;
189  nbits += 5;
190  // for n = 1M, nbits = 25 is optimal, see P56659518
191 
192  mask = (1L << nbits) - 1;
193  bloom.resize (1UL << (nbits - 3), 0);
194  for (long i = 0; i < n; i++) {
195  long id = indices[i];
196  set.insert(id);
197  id &= mask;
198  bloom[id >> 3] |= 1 << (id & 7);
199  }
200 }
201 
202 bool IDSelectorBatch::is_member (idx_t i) const
203 {
204  long im = i & mask;
205  if(!(bloom[im>>3] & (1 << (im & 7)))) {
206  return 0;
207  }
208  return set.count(i);
209 }
210 
211 
212 /***********************************************************************
213  * IO functions
214  ***********************************************************************/
215 
216 
217 int IOReader::fileno ()
218 {
219  FAISS_THROW_MSG ("IOReader does not support memory mapping");
220 }
221 
222 int IOWriter::fileno ()
223 {
224  FAISS_THROW_MSG ("IOWriter does not support memory mapping");
225 }
226 
227 
228 size_t VectorIOWriter::operator()(
229  const void *ptr, size_t size, size_t nitems)
230 {
231  size_t o = data.size();
232  data.resize(o + size * nitems);
233  memcpy (&data[o], ptr, size * nitems);
234  return nitems;
235 }
236 
237 size_t VectorIOReader::operator()(
238  void *ptr, size_t size, size_t nitems)
239 {
240  if (rp >= data.size()) return 0;
241  size_t nremain = (data.size() - rp) / size;
242  if (nremain < nitems) nitems = nremain;
243  memcpy (ptr, &data[rp], size * nitems);
244  rp += size * nitems;
245  return nitems;
246 }
247 
248 
249 
250 
251 
252 } // namespace faiss
size_t nq
nb of queries
result structure for a single query
void copy_range(size_t ofs, size_t n, idx_t *dest_ids, float *dest_dis)
void set_result(bool incremental=false)
called by range_search after do_allocation
void set_lims()
called by range_search before do_allocation
RangeSearchResult(idx_t nq, bool alloc_lims=true)
lims must be allocated on input to range_search.
size_t wp
write pointer in the last buffer.
float * distances
corresponding distances (not sorted)
QueryResult & new_result(idx_t qno)
begin a new result
size_t buffer_size
size of the result buffers used
size_t * lims
size (nq + 1)
idx_t * labels
result for query i is labels[lims[i]:lims[i+1]]