Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/AuxIndexStructures.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved
10 // -*- c++ -*-
11 
12 #include "AuxIndexStructures.h"
13 
14 #include <cstring>
15 
16 namespace faiss {
17 
18 
19 /***********************************************************************
20  * RangeSearchResult
21  ***********************************************************************/
22 
24  lims = new size_t [nq + 1];
25  memset (lims, 0, sizeof(*lims) * (nq + 1));
26  labels = nullptr;
27  distances = nullptr;
28  buffer_size = 1024 * 256;
29 }
30 
31 /// called when lims contains the nb of elements result entries
32 /// for each query
34  size_t ofs = 0;
35  for (int i = 0; i < nq; i++) {
36  size_t n = lims[i];
37  lims [i] = ofs;
38  ofs += n;
39  }
40  lims [nq] = ofs;
41  labels = new idx_t [ofs];
42  distances = new float [ofs];
43 }
44 
45 RangeSearchResult::~RangeSearchResult () {
46  delete [] labels;
47  delete [] distances;
48  delete [] lims;
49 }
50 
51 /***********************************************************************
52  * BufferList
53  ***********************************************************************/
54 
55 
56 BufferList::BufferList (size_t buffer_size):
57  buffer_size (buffer_size)
58 {
59  wp = buffer_size;
60 }
61 
62 BufferList::~BufferList ()
63 {
64  for (int i = 0; i < buffers.size(); i++) {
65  delete [] buffers[i].ids;
66  delete [] buffers[i].dis;
67  }
68 }
69 
70 
71 
72 void BufferList::append_buffer ()
73 {
74  Buffer buf = {new idx_t [buffer_size], new float [buffer_size]};
75  buffers.push_back (buf);
76  wp = 0;
77 }
78 
79 /// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
80 /// tables dest_ids, dest_dis
81 void BufferList::copy_range (size_t ofs, size_t n,
82  idx_t * dest_ids, float *dest_dis)
83 {
84  size_t bno = ofs / buffer_size;
85  ofs -= bno * buffer_size;
86  while (n > 0) {
87  size_t ncopy = ofs + n < buffer_size ? n : buffer_size - ofs;
88  Buffer buf = buffers [bno];
89  memcpy (dest_ids, buf.ids + ofs, ncopy * sizeof(*dest_ids));
90  memcpy (dest_dis, buf.dis + ofs, ncopy * sizeof(*dest_dis));
91  dest_ids += ncopy;
92  dest_dis += ncopy;
93  ofs = 0;
94  bno ++;
95  n -= ncopy;
96  }
97 }
98 
99 
100 /***********************************************************************
101  * RangeSearchPartialResult
102  ***********************************************************************/
103 
104 
105 RangeSearchPartialResult::RangeSearchPartialResult (RangeSearchResult * res_in):
106  BufferList(res_in->buffer_size),
107  res(res_in)
108 {}
109 
110 
111 /// begin a new result
112 RangeSearchPartialResult::QueryResult &
114 {
115  QueryResult qres = {qno, 0, this};
116  queries.push_back (qres);
117  return queries.back();
118 }
119 
120 
121 void RangeSearchPartialResult::finalize ()
122 {
123  set_lims ();
124 #pragma omp barrier
125 
126 #pragma omp single
127  res->do_allocation ();
128 
129 #pragma omp barrier
130  set_result ();
131 }
132 
133 
134 /// called by range_search before do_allocation
136 {
137  for (int i = 0; i < queries.size(); i++) {
138  QueryResult & qres = queries[i];
139  res->lims[qres.qno] = qres.nres;
140  }
141 }
142 
143 /// called by range_search after do_allocation
145 {
146  size_t ofs = 0;
147  for (int i = 0; i < queries.size(); i++) {
148  QueryResult & qres = queries[i];
149 
150  copy_range (ofs, qres.nres,
151  res->labels + res->lims[qres.qno],
152  res->distances + res->lims[qres.qno]);
153  if (incremental) {
154  res->lims[qres.qno] += qres.nres;
155  }
156  ofs += qres.nres;
157  }
158 }
159 
160 
161 /***********************************************************************
162  * IDSelectorRange
163  ***********************************************************************/
164 
165 IDSelectorRange::IDSelectorRange (idx_t imin, idx_t imax):
166  imin (imin), imax (imax)
167 {
168 }
169 
170 bool IDSelectorRange::is_member (idx_t id) const
171 {
172  return id >= imin && id < imax;
173 }
174 
175 
176 /***********************************************************************
177  * IDSelectorBatch
178  ***********************************************************************/
179 
180 IDSelectorBatch::IDSelectorBatch (long n, const idx_t *indices)
181 {
182  nbits = 0;
183  while (n > (1L << nbits)) nbits++;
184  nbits += 5;
185  // for n = 1M, nbits = 25 is optimal, see P56659518
186 
187  mask = (1L << nbits) - 1;
188  bloom.resize (1UL << (nbits - 3), 0);
189  for (long i = 0; i < n; i++) {
190  long id = indices[i];
191  set.insert(id);
192  id &= mask;
193  bloom[id >> 3] |= 1 << (id & 7);
194  }
195 }
196 
197 bool IDSelectorBatch::is_member (idx_t i) const
198 {
199  long im = i & mask;
200  if(!(bloom[im>>3] & (1 << (im & 7)))) {
201  return 0;
202  }
203  return set.count(i);
204 }
205 
206 
207 
208 
209 
210 
211 
212 }
size_t nq
nb of queries
result structure for a single query
void copy_range(size_t ofs, size_t n, idx_t *dest_ids, float *dest_dis)
void set_result(bool incremental=false)
called by range_search after do_allocation
void set_lims()
called by range_search before do_allocation
size_t wp
write pointer in the last buffer.
RangeSearchResult(size_t nq)
lims must be allocated on input to range_search.
float * distances
corresponding distances (not sorted)
QueryResult & new_result(idx_t qno)
begin a new result
size_t buffer_size
size of the result buffers used
size_t * lims
size (nq + 1)
idx_t * labels
result for query i is labels[lims[i]:lims[i+1]]