Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/hoss/faiss/MetaIndexes.cpp
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 // -*- c++ -*-
9 
10 #include "MetaIndexes.h"
11 
12 #include <cstdio>
13 
14 #include "FaissAssert.h"
15 #include "Heap.h"
16 #include "AuxIndexStructures.h"
17 #include "WorkerThread.h"
18 
19 
20 namespace faiss {
21 
22 namespace {
23 
24 typedef Index::idx_t idx_t;
25 
26 } // namespace
27 
28 /*****************************************************
29  * IndexIDMap implementation
30  *******************************************************/
31 
32 IndexIDMap::IndexIDMap (Index *index):
33  index (index),
34  own_fields (false)
35 {
36  FAISS_THROW_IF_NOT_MSG (index->ntotal == 0, "index must be empty on input");
37  is_trained = index->is_trained;
38  metric_type = index->metric_type;
39  verbose = index->verbose;
40  d = index->d;
41 }
42 
43 void IndexIDMap::add (idx_t, const float *)
44 {
45  FAISS_THROW_MSG ("add does not make sense with IndexIDMap, "
46  "use add_with_ids");
47 }
48 
49 
50 void IndexIDMap::train (idx_t n, const float *x)
51 {
52  index->train (n, x);
53  is_trained = index->is_trained;
54 }
55 
57 {
58  index->reset ();
59  id_map.clear();
60  ntotal = 0;
61 }
62 
63 
64 void IndexIDMap::add_with_ids (idx_t n, const float * x, const long *xids)
65 {
66  index->add (n, x);
67  for (idx_t i = 0; i < n; i++)
68  id_map.push_back (xids[i]);
69  ntotal = index->ntotal;
70 }
71 
72 
73 void IndexIDMap::search (idx_t n, const float *x, idx_t k,
74  float *distances, idx_t *labels) const
75 {
76  index->search (n, x, k, distances, labels);
77  idx_t *li = labels;
78 #pragma omp parallel for
79  for (idx_t i = 0; i < n * k; i++) {
80  li[i] = li[i] < 0 ? li[i] : id_map[li[i]];
81  }
82 }
83 
84 
85 void IndexIDMap::range_search (idx_t n, const float *x, float radius,
86  RangeSearchResult *result) const
87 {
88  index->range_search(n, x, radius, result);
89 #pragma omp parallel for
90  for (idx_t i = 0; i < result->lims[result->nq]; i++) {
91  result->labels[i] = result->labels[i] < 0 ?
92  result->labels[i] : id_map[result->labels[i]];
93  }
94 }
95 
96 namespace {
97 
98 struct IDTranslatedSelector: IDSelector {
99  const std::vector <long> & id_map;
100  const IDSelector & sel;
101  IDTranslatedSelector (const std::vector <long> & id_map,
102  const IDSelector & sel):
103  id_map (id_map), sel (sel)
104  {}
105  bool is_member(idx_t id) const override {
106  return sel.is_member(id_map[id]);
107  }
108 };
109 
110 }
111 
113 {
114  // remove in sub-index first
115  IDTranslatedSelector sel2 (id_map, sel);
116  long nremove = index->remove_ids (sel2);
117 
118  long j = 0;
119  for (idx_t i = 0; i < ntotal; i++) {
120  if (sel.is_member (id_map[i])) {
121  // remove
122  } else {
123  id_map[j] = id_map[i];
124  j++;
125  }
126  }
127  FAISS_ASSERT (j == index->ntotal);
128  ntotal = j;
129  id_map.resize(ntotal);
130  return nremove;
131 }
132 
133 
134 
135 
136 IndexIDMap::~IndexIDMap ()
137 {
138  if (own_fields) delete index;
139 }
140 
141 /*****************************************************
142  * IndexIDMap2 implementation
143  *******************************************************/
144 
145 IndexIDMap2::IndexIDMap2 (Index *index): IndexIDMap (index)
146 {}
147 
148 void IndexIDMap2::add_with_ids(idx_t n, const float* x, const long* xids)
149 {
150  size_t prev_ntotal = ntotal;
151  IndexIDMap::add_with_ids (n, x, xids);
152  for (size_t i = prev_ntotal; i < ntotal; i++) {
153  rev_map [id_map [i]] = i;
154  }
155 }
156 
158 {
159  rev_map.clear ();
160  for (size_t i = 0; i < ntotal; i++) {
161  rev_map [id_map [i]] = i;
162  }
163 }
164 
165 
167 {
168  // This is quite inefficient
169  long nremove = IndexIDMap::remove_ids (sel);
171  return nremove;
172 }
173 
174 void IndexIDMap2::reconstruct (idx_t key, float * recons) const
175 {
176  try {
177  index->reconstruct (rev_map.at (key), recons);
178  } catch (const std::out_of_range& e) {
179  FAISS_THROW_FMT ("key %ld not found", key);
180  }
181 }
182 
183 /*****************************************************
184  * IndexSplitVectors implementation
185  *******************************************************/
186 
187 
189  Index (d), own_fields (false),
190  threaded (threaded), sum_d (0)
191 {
192 
193 }
194 
195 void IndexSplitVectors::add_sub_index (Index *index)
196 {
197  sub_indexes.push_back (index);
198  sync_with_sub_indexes ();
199 }
200 
201 void IndexSplitVectors::sync_with_sub_indexes ()
202 {
203  if (sub_indexes.empty()) return;
204  Index * index0 = sub_indexes[0];
205  sum_d = index0->d;
206  metric_type = index0->metric_type;
207  is_trained = index0->is_trained;
208  ntotal = index0->ntotal;
209  for (int i = 1; i < sub_indexes.size(); i++) {
210  Index * index = sub_indexes[i];
211  FAISS_THROW_IF_NOT (metric_type == index->metric_type);
212  FAISS_THROW_IF_NOT (ntotal == index->ntotal);
213  sum_d += index->d;
214  }
215 
216 }
217 
218 void IndexSplitVectors::add(idx_t /*n*/, const float* /*x*/) {
219  FAISS_THROW_MSG("not implemented");
220 }
221 
222 
223 
225  idx_t n, const float *x, idx_t k,
226  float *distances, idx_t *labels) const
227 {
228  FAISS_THROW_IF_NOT_MSG (k == 1,
229  "search implemented only for k=1");
230  FAISS_THROW_IF_NOT_MSG (sum_d == d,
231  "not enough indexes compared to # dimensions");
232 
233  long nshard = sub_indexes.size();
234  float *all_distances = new float [nshard * k * n];
235  idx_t *all_labels = new idx_t [nshard * k * n];
236  ScopeDeleter<float> del (all_distances);
237  ScopeDeleter<idx_t> del2 (all_labels);
238 
239  auto query_func = [n, x, k, distances, labels, all_distances, all_labels, this]
240  (int no) {
241  const IndexSplitVectors *index = this;
242  float *distances1 = no == 0 ? distances : all_distances + no * k * n;
243  idx_t *labels1 = no == 0 ? labels : all_labels + no * k * n;
244  if (index->verbose)
245  printf ("begin query shard %d on %ld points\n", no, n);
246  const Index * sub_index = index->sub_indexes[no];
247  long sub_d = sub_index->d, d = index->d;
248  idx_t ofs = 0;
249  for (int i = 0; i < no; i++) ofs += index->sub_indexes[i]->d;
250  float *sub_x = new float [sub_d * n];
251  ScopeDeleter<float> del1 (sub_x);
252  for (idx_t i = 0; i < n; i++)
253  memcpy (sub_x + i * sub_d, x + ofs + i * d, sub_d * sizeof (sub_x));
254  sub_index->search (n, sub_x, k, distances1, labels1);
255  if (index->verbose)
256  printf ("end query shard %d\n", no);
257  };
258 
259  if (!threaded) {
260  for (int i = 0; i < nshard; i++) {
261  query_func(i);
262  }
263  } else {
264  std::vector<std::unique_ptr<WorkerThread> > threads;
265  std::vector<std::future<bool>> v;
266 
267  for (int i = 0; i < nshard; i++) {
268  threads.emplace_back(new WorkerThread());
269  WorkerThread *wt = threads.back().get();
270  v.emplace_back(wt->add([i, query_func](){query_func(i); }));
271  }
272 
273  // Blocking wait for completion
274  for (auto& func : v) {
275  func.get();
276  }
277  }
278 
279  long factor = 1;
280  for (int i = 0; i < nshard; i++) {
281  if (i > 0) { // results of 0 are already in the table
282  const float *distances_i = all_distances + i * k * n;
283  const idx_t *labels_i = all_labels + i * k * n;
284  for (long j = 0; j < n; j++) {
285  if (labels[j] >= 0 && labels_i[j] >= 0) {
286  labels[j] += labels_i[j] * factor;
287  distances[j] += distances_i[j];
288  } else {
289  labels[j] = -1;
290  distances[j] = 0.0 / 0.0;
291  }
292  }
293  }
294  factor *= sub_indexes[i]->ntotal;
295  }
296 
297 }
298 
299 void IndexSplitVectors::train(idx_t /*n*/, const float* /*x*/) {
300  FAISS_THROW_MSG("not implemented");
301 }
302 
303 void IndexSplitVectors::reset ()
304 {
305  FAISS_THROW_MSG ("not implemented");
306 }
307 
308 
309 IndexSplitVectors::~IndexSplitVectors ()
310 {
311  if (own_fields) {
312  for (int s = 0; s < sub_indexes.size(); s++)
313  delete sub_indexes [s];
314  }
315 }
316 
317 
318 } // namespace faiss
size_t nq
nb of queries
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: MetaIndexes.cpp:73
void add_with_ids(idx_t n, const float *x, const long *xids) override
void add(idx_t n, const float *x) override
this will fail. Use add_with_ids
Definition: MetaIndexes.cpp:43
int d
vector dimension
Definition: Index.h:66
long idx_t
all indices are this type
Definition: Index.h:62
std::vector< long > id_map
! whether pointers are deleted in destructo
Definition: MetaIndexes.h:25
void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const override
Definition: MetaIndexes.cpp:85
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
void construct_rev_map()
make the rev_map from scratch
bool verbose
verbosity level
Definition: Index.h:68
void add(idx_t n, const float *x) override
long remove_ids(const IDSelector &sel) override
remove ids adapted to IndexFlat
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
void reset() override
removes all elements from the database.
Definition: MetaIndexes.cpp:56
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
long remove_ids(const IDSelector &sel) override
remove ids adapted to IndexFlat
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:74
void reconstruct(idx_t key, float *recons) const override
void train(idx_t n, const float *x) override
Definition: MetaIndexes.cpp:50
size_t * lims
size (nq + 1)
std::future< bool > add(std::function< void()> f)
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:71
virtual void reconstruct(idx_t key, float *recons) const
Definition: Index.cpp:54
void add_with_ids(idx_t n, const float *x, const long *xids) override
Definition: MetaIndexes.cpp:64
idx_t * labels
result for query i is labels[lims[i]:lims[i+1]]
IndexSplitVectors(idx_t d, bool threaded=false)
sum of dimensions seen so far