Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/AuxIndexStructures.cpp
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved
11 // -*- c++ -*-
12 
13 #include "AuxIndexStructures.h"
14 
15 #include <cstring>
16 
17 namespace faiss {
18 
19 
20 /***********************************************************************
21  * RangeSearchResult
22  ***********************************************************************/
23 
25  lims = new size_t [nq + 1];
26  memset (lims, 0, sizeof(*lims) * (nq + 1));
27  labels = nullptr;
28  distances = nullptr;
29  buffer_size = 1024 * 256;
30 }
31 
32 /// called when lims contains the nb of elements result entries
33 /// for each query
35  size_t ofs = 0;
36  for (int i = 0; i < nq; i++) {
37  size_t n = lims[i];
38  lims [i] = ofs;
39  ofs += n;
40  }
41  lims [nq] = ofs;
42  labels = new idx_t [ofs];
43  distances = new float [ofs];
44 }
45 
46 RangeSearchResult::~RangeSearchResult () {
47  delete [] labels;
48  delete [] distances;
49  delete [] lims;
50 }
51 
52 /***********************************************************************
53  * BufferList
54  ***********************************************************************/
55 
56 
57 BufferList::BufferList (size_t buffer_size):
58  buffer_size (buffer_size)
59 {
60  wp = buffer_size;
61 }
62 
63 BufferList::~BufferList ()
64 {
65  for (int i = 0; i < buffers.size(); i++) {
66  delete [] buffers[i].ids;
67  delete [] buffers[i].dis;
68  }
69 }
70 
71 
72 
73 void BufferList::append_buffer ()
74 {
75  Buffer buf = {new idx_t [buffer_size], new float [buffer_size]};
76  buffers.push_back (buf);
77  wp = 0;
78 }
79 
80 /// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
81 /// tables dest_ids, dest_dis
82 void BufferList::copy_range (size_t ofs, size_t n,
83  idx_t * dest_ids, float *dest_dis)
84 {
85  size_t bno = ofs / buffer_size;
86  ofs -= bno * buffer_size;
87  while (n > 0) {
88  size_t ncopy = ofs + n < buffer_size ? n : buffer_size - ofs;
89  Buffer buf = buffers [bno];
90  memcpy (dest_ids, buf.ids + ofs, ncopy * sizeof(*dest_ids));
91  memcpy (dest_dis, buf.dis + ofs, ncopy * sizeof(*dest_dis));
92  dest_ids += ncopy;
93  dest_dis += ncopy;
94  ofs = 0;
95  bno ++;
96  n -= ncopy;
97  }
98 }
99 
100 
101 /***********************************************************************
102  * RangeSearchPartialResult
103  ***********************************************************************/
104 
105 
106 RangeSearchPartialResult::RangeSearchPartialResult (RangeSearchResult * res_in):
107  BufferList(res_in->buffer_size),
108  res(res_in)
109 {}
110 
111 
112 /// begin a new result
113 RangeSearchPartialResult::QueryResult &
115 {
116  QueryResult qres = {qno, 0, this};
117  queries.push_back (qres);
118  return queries.back();
119 }
120 
121 
122 void RangeSearchPartialResult::finalize ()
123 {
124  set_lims ();
125 #pragma omp barrier
126 
127 #pragma omp single
128  res->do_allocation ();
129 
130 #pragma omp barrier
131  set_result ();
132 }
133 
134 
135 /// called by range_search before do_allocation
137 {
138  for (int i = 0; i < queries.size(); i++) {
139  QueryResult & qres = queries[i];
140  res->lims[qres.qno] = qres.nres;
141  }
142 }
143 
144 /// called by range_search after do_allocation
146 {
147  size_t ofs = 0;
148  for (int i = 0; i < queries.size(); i++) {
149  QueryResult & qres = queries[i];
150 
151  copy_range (ofs, qres.nres,
152  res->labels + res->lims[qres.qno],
153  res->distances + res->lims[qres.qno]);
154  if (incremental) {
155  res->lims[qres.qno] += qres.nres;
156  }
157  ofs += qres.nres;
158  }
159 }
160 
161 
162 IDSelectorRange::IDSelectorRange (idx_t imin, idx_t imax):
163  imin (imin), imax (imax)
164 {
165 }
166 
167 bool IDSelectorRange::is_member (idx_t id) const
168 {
169  return id >= imin && id < imax;
170 }
171 
172 
173 
174 IDSelectorBatch::IDSelectorBatch (long n, const idx_t *indices)
175 {
176  nbits = 0;
177  while (n > (1L << nbits)) nbits++;
178  nbits += 5;
179  // for n = 1M, nbits = 25 is optimal, see P56659518
180 
181  mask = (1L << nbits) - 1;
182  bloom.resize (1UL << (nbits - 3), 0);
183  for (long i = 0; i < n; i++) {
184  long id = indices[i];
185  set.insert(id);
186  id &= mask;
187  bloom[id >> 3] |= 1 << (id & 7);
188  }
189 }
190 
191 bool IDSelectorBatch::is_member (idx_t i) const
192 {
193  long im = i & mask;
194  if(!(bloom[im>>3] & (1 << (im & 7)))) {
195  return 0;
196  }
197  return set.count(i);
198 }
199 
200 
201 
202 
203 
204 
205 
206 }
size_t nq
nb of queries
result structure for a single query
void copy_range(size_t ofs, size_t n, idx_t *dest_ids, float *dest_dis)
void set_result(bool incremental=false)
called by range_search after do_allocation
void set_lims()
called by range_search before do_allocation
size_t wp
write pointer in the last buffer.
RangeSearchResult(size_t nq)
lims must be allocated on input to range_search.
float * distances
corresponding distances (not sorted)
QueryResult & new_result(idx_t qno)
begin a new result
size_t buffer_size
size of the result buffers used
size_t * lims
size (nq + 1)
idx_t * labels
result for query i is labels[lims[i]:lims[i+1]]