Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/AuxIndexStructures.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved
10 // -*- c++ -*-
11 
12 #include "AuxIndexStructures.h"
13 
14 #include <cstring>
15 
16 namespace faiss {
17 
18 
19 /***********************************************************************
20  * RangeSearchResult
21  ***********************************************************************/
22 
23 RangeSearchResult::RangeSearchResult (idx_t nq, bool alloc_lims): nq (nq) {
24  if (alloc_lims) {
25  lims = new size_t [nq + 1];
26  memset (lims, 0, sizeof(*lims) * (nq + 1));
27  } else {
28  lims = nullptr;
29  }
30  labels = nullptr;
31  distances = nullptr;
32  buffer_size = 1024 * 256;
33 }
34 
35 /// called when lims contains the nb of elements result entries
36 /// for each query
38  size_t ofs = 0;
39  for (int i = 0; i < nq; i++) {
40  size_t n = lims[i];
41  lims [i] = ofs;
42  ofs += n;
43  }
44  lims [nq] = ofs;
45  labels = new idx_t [ofs];
46  distances = new float [ofs];
47 }
48 
49 RangeSearchResult::~RangeSearchResult () {
50  delete [] labels;
51  delete [] distances;
52  delete [] lims;
53 }
54 
55 /***********************************************************************
56  * BufferList
57  ***********************************************************************/
58 
59 
60 BufferList::BufferList (size_t buffer_size):
61  buffer_size (buffer_size)
62 {
63  wp = buffer_size;
64 }
65 
66 BufferList::~BufferList ()
67 {
68  for (int i = 0; i < buffers.size(); i++) {
69  delete [] buffers[i].ids;
70  delete [] buffers[i].dis;
71  }
72 }
73 
74 
75 
76 void BufferList::append_buffer ()
77 {
78  Buffer buf = {new idx_t [buffer_size], new float [buffer_size]};
79  buffers.push_back (buf);
80  wp = 0;
81 }
82 
83 /// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
84 /// tables dest_ids, dest_dis
85 void BufferList::copy_range (size_t ofs, size_t n,
86  idx_t * dest_ids, float *dest_dis)
87 {
88  size_t bno = ofs / buffer_size;
89  ofs -= bno * buffer_size;
90  while (n > 0) {
91  size_t ncopy = ofs + n < buffer_size ? n : buffer_size - ofs;
92  Buffer buf = buffers [bno];
93  memcpy (dest_ids, buf.ids + ofs, ncopy * sizeof(*dest_ids));
94  memcpy (dest_dis, buf.dis + ofs, ncopy * sizeof(*dest_dis));
95  dest_ids += ncopy;
96  dest_dis += ncopy;
97  ofs = 0;
98  bno ++;
99  n -= ncopy;
100  }
101 }
102 
103 
104 /***********************************************************************
105  * RangeSearchPartialResult
106  ***********************************************************************/
107 
108 
109 RangeSearchPartialResult::RangeSearchPartialResult (RangeSearchResult * res_in):
110  BufferList(res_in->buffer_size),
111  res(res_in)
112 {}
113 
114 
115 /// begin a new result
116 RangeSearchPartialResult::QueryResult &
118 {
119  QueryResult qres = {qno, 0, this};
120  queries.push_back (qres);
121  return queries.back();
122 }
123 
124 
125 void RangeSearchPartialResult::finalize ()
126 {
127  set_lims ();
128 #pragma omp barrier
129 
130 #pragma omp single
131  res->do_allocation ();
132 
133 #pragma omp barrier
134  set_result ();
135 }
136 
137 
138 /// called by range_search before do_allocation
140 {
141  for (int i = 0; i < queries.size(); i++) {
142  QueryResult & qres = queries[i];
143  res->lims[qres.qno] = qres.nres;
144  }
145 }
146 
147 /// called by range_search after do_allocation
149 {
150  size_t ofs = 0;
151  for (int i = 0; i < queries.size(); i++) {
152  QueryResult & qres = queries[i];
153 
154  copy_range (ofs, qres.nres,
155  res->labels + res->lims[qres.qno],
156  res->distances + res->lims[qres.qno]);
157  if (incremental) {
158  res->lims[qres.qno] += qres.nres;
159  }
160  ofs += qres.nres;
161  }
162 }
163 
164 
165 /***********************************************************************
166  * IDSelectorRange
167  ***********************************************************************/
168 
169 IDSelectorRange::IDSelectorRange (idx_t imin, idx_t imax):
170  imin (imin), imax (imax)
171 {
172 }
173 
174 bool IDSelectorRange::is_member (idx_t id) const
175 {
176  return id >= imin && id < imax;
177 }
178 
179 
180 /***********************************************************************
181  * IDSelectorBatch
182  ***********************************************************************/
183 
184 IDSelectorBatch::IDSelectorBatch (long n, const idx_t *indices)
185 {
186  nbits = 0;
187  while (n > (1L << nbits)) nbits++;
188  nbits += 5;
189  // for n = 1M, nbits = 25 is optimal, see P56659518
190 
191  mask = (1L << nbits) - 1;
192  bloom.resize (1UL << (nbits - 3), 0);
193  for (long i = 0; i < n; i++) {
194  long id = indices[i];
195  set.insert(id);
196  id &= mask;
197  bloom[id >> 3] |= 1 << (id & 7);
198  }
199 }
200 
201 bool IDSelectorBatch::is_member (idx_t i) const
202 {
203  long im = i & mask;
204  if(!(bloom[im>>3] & (1 << (im & 7)))) {
205  return 0;
206  }
207  return set.count(i);
208 }
209 
210 
211 
212 
213 
214 
215 
216 }
size_t nq
nb of queries
result structure for a single query
void copy_range(size_t ofs, size_t n, idx_t *dest_ids, float *dest_dis)
void set_result(bool incremental=false)
called by range_search after do_allocation
void set_lims()
called by range_search before do_allocation
RangeSearchResult(idx_t nq, bool alloc_lims=true)
lims must be allocated on input to range_search.
size_t wp
write pointer in the last buffer.
float * distances
corresponding distances (not sorted)
QueryResult & new_result(idx_t qno)
begin a new result
size_t buffer_size
size of the result buffers used
size_t * lims
size (nq + 1)
idx_t * labels
result for query i is labels[lims[i]:lims[i+1]]