Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/hoss/faiss/IndexBinaryIVF.cpp
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 // Copyright 2004-present Facebook. All Rights Reserved
9 // -*- c++ -*-
10 
11 #include "IndexBinaryIVF.h"
12 
13 #include <cstdio>
14 #include <memory>
15 
16 #include "hamming.h"
17 #include "utils.h"
18 
19 #include "AuxIndexStructures.h"
20 #include "FaissAssert.h"
21 #include "IndexFlat.h"
22 
23 
24 namespace faiss {
25 
26 IndexBinaryIVF::IndexBinaryIVF(IndexBinary *quantizer, size_t d, size_t nlist)
27  : IndexBinary(d),
28  invlists(new ArrayInvertedLists(nlist, code_size)),
29  own_invlists(true),
30  nprobe(1),
31  max_codes(0),
32  maintain_direct_map(false),
33  quantizer(quantizer),
34  nlist(nlist),
35  own_fields(false),
36  clustering_index(nullptr)
37 {
38  FAISS_THROW_IF_NOT (d == quantizer->d);
39  is_trained = quantizer->is_trained && (quantizer->ntotal == nlist);
40 
41  cp.niter = 10;
42 }
43 
44 IndexBinaryIVF::IndexBinaryIVF()
45  : invlists(nullptr),
46  own_invlists(false),
47  nprobe(1),
48  max_codes(0),
49  maintain_direct_map(false),
50  quantizer(nullptr),
51  nlist(0),
52  own_fields(false),
53  clustering_index(nullptr)
54 {}
55 
56 void IndexBinaryIVF::add(idx_t n, const uint8_t *x) {
57  add_with_ids(n, x, nullptr);
58 }
59 
60 void IndexBinaryIVF::add_with_ids(idx_t n, const uint8_t *x, const long *xids) {
61  add_core(n, x, xids, nullptr);
62 }
63 
64 void IndexBinaryIVF::add_core(idx_t n, const uint8_t *x, const long *xids,
65  const long *precomputed_idx) {
66  FAISS_THROW_IF_NOT(is_trained);
67  assert(invlists);
68  FAISS_THROW_IF_NOT_MSG(!(maintain_direct_map && xids),
69  "cannot have direct map and add with ids");
70 
71  const long * idx;
72 
73  std::unique_ptr<long[]> scoped_idx;
74 
75  if (precomputed_idx) {
76  idx = precomputed_idx;
77  } else {
78  scoped_idx.reset(new long[n]);
79  quantizer->assign(n, x, scoped_idx.get());
80  idx = scoped_idx.get();
81  }
82 
83  long n_add = 0;
84  for (size_t i = 0; i < n; i++) {
85  long id = xids ? xids[i] : ntotal + i;
86  long list_no = idx[i];
87 
88  if (list_no < 0)
89  continue;
90  const uint8_t *xi = x + i * code_size;
91  size_t offset = invlists->add_entry(list_no, id, xi);
92 
94  direct_map.push_back(list_no << 32 | offset);
95  n_add++;
96  }
97  if (verbose) {
98  printf("IndexBinaryIVF::add_with_ids: added %ld / %ld vectors\n",
99  n_add, n);
100  }
101  ntotal += n_add;
102 }
103 
104 void IndexBinaryIVF::make_direct_map(bool new_maintain_direct_map) {
105  // nothing to do
106  if (new_maintain_direct_map == maintain_direct_map)
107  return;
108 
109  if (new_maintain_direct_map) {
110  direct_map.resize(ntotal, -1);
111  for (size_t key = 0; key < nlist; key++) {
112  size_t list_size = invlists->list_size(key);
113  const idx_t *idlist = invlists->get_ids(key);
114 
115  for (long ofs = 0; ofs < list_size; ofs++) {
116  FAISS_THROW_IF_NOT_MSG(0 <= idlist[ofs] && idlist[ofs] < ntotal,
117  "direct map supported only for seuquential ids");
118  direct_map[idlist[ofs]] = key << 32 | ofs;
119  }
120  }
121  } else {
122  direct_map.clear();
123  }
124  maintain_direct_map = new_maintain_direct_map;
125 }
126 
127 void IndexBinaryIVF::search(idx_t n, const uint8_t *x, idx_t k,
128  int32_t *distances, idx_t *labels) const {
129  std::unique_ptr<idx_t[]> idx(new idx_t[n * nprobe]);
130  std::unique_ptr<int32_t[]> coarse_dis(new int32_t[n * nprobe]);
131 
132  double t0 = getmillisecs();
133  quantizer->search(n, x, nprobe, coarse_dis.get(), idx.get());
134  indexIVF_stats.quantization_time += getmillisecs() - t0;
135 
136  t0 = getmillisecs();
137  invlists->prefetch_lists(idx.get(), n * nprobe);
138 
139  search_preassigned(n, x, k, idx.get(), coarse_dis.get(),
140  distances, labels, false);
141  indexIVF_stats.search_time += getmillisecs() - t0;
142 }
143 
144 void IndexBinaryIVF::reconstruct(idx_t key, uint8_t *recons) const {
145  FAISS_THROW_IF_NOT_MSG(direct_map.size() == ntotal,
146  "direct map is not initialized");
147  long list_no = direct_map[key] >> 32;
148  long offset = direct_map[key] & 0xffffffff;
149  reconstruct_from_offset(list_no, offset, recons);
150 }
151 
152 void IndexBinaryIVF::reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const {
153  FAISS_THROW_IF_NOT(ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
154 
155  for (long list_no = 0; list_no < nlist; list_no++) {
156  size_t list_size = invlists->list_size(list_no);
157  const Index::idx_t *idlist = invlists->get_ids(list_no);
158 
159  for (long offset = 0; offset < list_size; offset++) {
160  long id = idlist[offset];
161  if (!(id >= i0 && id < i0 + ni)) {
162  continue;
163  }
164 
165  uint8_t *reconstructed = recons + (id - i0) * d;
166  reconstruct_from_offset(list_no, offset, reconstructed);
167  }
168  }
169 }
170 
172  int32_t *distances, idx_t *labels,
173  uint8_t *recons) const {
174  std::unique_ptr<idx_t[]> idx(new long[n * nprobe]);
175  std::unique_ptr<int32_t[]> coarse_dis(new int32_t[n * nprobe]);
176 
177  quantizer->search(n, x, nprobe, coarse_dis.get(), idx.get());
178 
179  invlists->prefetch_lists(idx.get(), n * nprobe);
180 
181  // search_preassigned() with `store_pairs` enabled to obtain the list_no
182  // and offset into `codes` for reconstruction
183  search_preassigned(n, x, k, idx.get(), coarse_dis.get(),
184  distances, labels, /* store_pairs */true);
185  for (idx_t i = 0; i < n; ++i) {
186  for (idx_t j = 0; j < k; ++j) {
187  idx_t ij = i * k + j;
188  idx_t key = labels[ij];
189  uint8_t *reconstructed = recons + ij * d;
190  if (key < 0) {
191  // Fill with NaNs
192  memset(reconstructed, -1, sizeof(*reconstructed) * d);
193  } else {
194  int list_no = key >> 32;
195  int offset = key & 0xffffffff;
196 
197  // Update label to the actual id
198  labels[ij] = invlists->get_single_id(list_no, offset);
199 
200  reconstruct_from_offset(list_no, offset, reconstructed);
201  }
202  }
203  }
204 }
205 
206 void IndexBinaryIVF::reconstruct_from_offset(long list_no, long offset,
207  uint8_t *recons) const {
208  memcpy(recons, invlists->get_single_code(list_no, offset), code_size);
209 }
210 
212  direct_map.clear();
213  invlists->reset();
214  ntotal = 0;
215 }
216 
218  FAISS_THROW_IF_NOT_MSG(!maintain_direct_map,
219  "direct map remove not implemented");
220 
221  std::vector<long> toremove(nlist);
222 
223 #pragma omp parallel for
224  for (long i = 0; i < nlist; i++) {
225  long l0 = invlists->list_size (i), l = l0, j = 0;
226  const idx_t *idsi = invlists->get_ids(i);
227  while (j < l) {
228  if (sel.is_member(idsi[j])) {
229  l--;
230  invlists->update_entry(
231  i, j,
232  invlists->get_single_id(i, l),
233  invlists->get_single_code(i, l));
234  } else {
235  j++;
236  }
237  }
238  toremove[i] = l0 - l;
239  }
240  // this will not run well in parallel on ondisk because of possible shrinks
241  long nremove = 0;
242  for (long i = 0; i < nlist; i++) {
243  if (toremove[i] > 0) {
244  nremove += toremove[i];
245  invlists->resize(
246  i, invlists->list_size(i) - toremove[i]);
247  }
248  }
249  ntotal -= nremove;
250  return nremove;
251 }
252 
253 void IndexBinaryIVF::train(idx_t n, const uint8_t *x) {
254  if (verbose) {
255  printf("Training quantizer\n");
256  }
257 
258  if (quantizer->is_trained && (quantizer->ntotal == nlist)) {
259  if (verbose) {
260  printf("IVF quantizer does not need training.\n");
261  }
262  } else {
263  if (verbose) {
264  printf("Training quantizer on %ld vectors in %dD\n", n, d);
265  }
266 
267  Clustering clus(d, nlist, cp);
268  quantizer->reset();
269 
270  std::unique_ptr<float[]> x_f(new float[n * d]);
271  binary_to_real(n * d, x, x_f.get());
272 
273  IndexFlatL2 index_tmp(d);
274 
275  if (clustering_index && verbose) {
276  printf("using clustering_index of dimension %d to do the clustering\n",
278  }
279 
280  clus.train(n, x_f.get(), clustering_index ? *clustering_index : index_tmp);
281 
282  std::unique_ptr<uint8_t[]> x_b(new uint8_t[clus.k * code_size]);
283  real_to_binary(d * clus.k, clus.centroids.data(), x_b.get());
284 
285  quantizer->add(clus.k, x_b.get());
286  quantizer->is_trained = true;
287  }
288 
289  is_trained = true;
290 }
291 
293  // minimal sanity checks
294  FAISS_THROW_IF_NOT(other.d == d);
295  FAISS_THROW_IF_NOT(other.nlist == nlist);
296  FAISS_THROW_IF_NOT(other.code_size == code_size);
297  FAISS_THROW_IF_NOT_MSG((!maintain_direct_map &&
298  !other.maintain_direct_map),
299  "direct map copy not implemented");
300  FAISS_THROW_IF_NOT_MSG(typeid (*this) == typeid (other),
301  "can only merge indexes of the same type");
302 
303  invlists->merge_from (other.invlists, add_id);
304 
305  ntotal += other.ntotal;
306  other.ntotal = 0;
307 }
308 
309 void IndexBinaryIVF::replace_invlists(InvertedLists *il, bool own) {
310  FAISS_THROW_IF_NOT(il->nlist == nlist &&
311  il->code_size == code_size);
312  if (own_invlists) {
313  delete invlists;
314  }
315  invlists = il;
316  own_invlists = own;
317 }
318 
319 
320 namespace {
321 
322 using idx_t = Index::idx_t;
323 
324 
325 template<class HammingComputer, bool store_pairs>
326 struct IVFBinaryScannerL2: BinaryInvertedListScanner {
327 
328  HammingComputer hc;
329  size_t code_size;
330 
331  IVFBinaryScannerL2 (size_t code_size): code_size (code_size)
332  {}
333 
334  void set_query (const uint8_t *query_vector) override {
335  hc.set (query_vector, code_size);
336  }
337 
338  idx_t list_no;
339  void set_list (idx_t list_no, uint8_t /* coarse_dis */) override {
340  this->list_no = list_no;
341  }
342 
343  uint32_t distance_to_code (const uint8_t *code) const override {
344  return hc.hamming (code);
345  }
346 
347  size_t scan_codes (size_t n,
348  const uint8_t *codes,
349  const idx_t *ids,
350  int32_t *simi, idx_t *idxi,
351  size_t k) const override
352  {
353  using C = CMax<int32_t, idx_t>;
354 
355  size_t nup = 0;
356  for (size_t j = 0; j < n; j++) {
357  uint32_t dis = hc.hamming (codes);
358  if (dis < simi[0]) {
359  heap_pop<C> (k, simi, idxi);
360  long id = store_pairs ? (list_no << 32 | j) : ids[j];
361  heap_push<C> (k, simi, idxi, dis, id);
362  nup++;
363  }
364  codes += code_size;
365  }
366  return nup;
367  }
368 
369 
370 };
371 
372 
373 template <bool store_pairs>
374 BinaryInvertedListScanner *select_IVFBinaryScannerL2 (size_t code_size) {
375 
376  switch (code_size) {
377 #define HANDLE_CS(cs) \
378  case cs: \
379  return new IVFBinaryScannerL2<HammingComputer ## cs, store_pairs> (cs);
380  HANDLE_CS(4);
381  HANDLE_CS(8);
382  HANDLE_CS(16);
383  HANDLE_CS(20);
384  HANDLE_CS(32);
385  HANDLE_CS(64);
386 #undef HANDLE_CS
387  default:
388  if (code_size % 8 == 0) {
389  return new IVFBinaryScannerL2<HammingComputerM8,
390  store_pairs> (code_size);
391  } else if (code_size % 4 == 0) {
392  return new IVFBinaryScannerL2<HammingComputerM4,
393  store_pairs> (code_size);
394  } else {
395  return new IVFBinaryScannerL2<HammingComputerDefault,
396  store_pairs> (code_size);
397  }
398  }
399 }
400 
401 
402 void search_knn_hamming_heap(const IndexBinaryIVF& ivf,
403  size_t n,
404  const uint8_t *x,
405  idx_t k,
406  const idx_t *keys,
407  const int32_t * coarse_dis,
408  int32_t *distances, idx_t *labels,
409  bool store_pairs,
410  const IVFSearchParameters *params)
411 {
412  long nprobe = params ? params->nprobe : ivf.nprobe;
413  long max_codes = params ? params->max_codes : ivf.max_codes;
414  MetricType metric_type = ivf.metric_type;
415 
416  // almost verbatim copy from IndexIVF::search_preassigned
417 
418  size_t nlistv = 0, ndis = 0, nheap = 0;
419  using HeapForIP = CMin<int32_t, idx_t>;
420  using HeapForL2 = CMax<int32_t, idx_t>;
421 
422 #pragma omp parallel if(n > 1) reduction(+: nlistv, ndis, nheap)
423  {
424  std::unique_ptr<BinaryInvertedListScanner> scanner
425  (ivf.get_InvertedListScanner (store_pairs));
426 
427 #pragma omp for
428  for (size_t i = 0; i < n; i++) {
429  const uint8_t *xi = x + i * ivf.code_size;
430  scanner->set_query(xi);
431 
432  const long * keysi = keys + i * nprobe;
433  int32_t * simi = distances + k * i;
434  long * idxi = labels + k * i;
435 
436  if (metric_type == METRIC_INNER_PRODUCT) {
437  heap_heapify<HeapForIP> (k, simi, idxi);
438  } else {
439  heap_heapify<HeapForL2> (k, simi, idxi);
440  }
441 
442  size_t nscan = 0;
443 
444  for (size_t ik = 0; ik < nprobe; ik++) {
445  long key = keysi[ik]; /* select the list */
446  if (key < 0) {
447  // not enough centroids for multiprobe
448  continue;
449  }
450  FAISS_THROW_IF_NOT_FMT
451  (key < (long) ivf.nlist,
452  "Invalid key=%ld at ik=%ld nlist=%ld\n",
453  key, ik, ivf.nlist);
454 
455  scanner->set_list (key, coarse_dis[i * nprobe + ik]);
456 
457  nlistv++;
458 
459  size_t list_size = ivf.invlists->list_size(key);
460  InvertedLists::ScopedCodes scodes (ivf.invlists, key);
461  std::unique_ptr<InvertedLists::ScopedIds> sids;
462  const Index::idx_t * ids = nullptr;
463 
464  if (!store_pairs) {
465  sids.reset (new InvertedLists::ScopedIds (ivf.invlists, key));
466  ids = sids->get();
467  }
468 
469  nheap += scanner->scan_codes (list_size, scodes.get(),
470  ids, simi, idxi, k);
471 
472  nscan += list_size;
473  if (max_codes && nscan >= max_codes)
474  break;
475  }
476 
477  ndis += nscan;
478  if (metric_type == METRIC_INNER_PRODUCT) {
479  heap_reorder<HeapForIP> (k, simi, idxi);
480  } else {
481  heap_reorder<HeapForL2> (k, simi, idxi);
482  }
483 
484  } // parallel for
485  } // parallel
486 
487  indexIVF_stats.nq += n;
488  indexIVF_stats.nlist += nlistv;
489  indexIVF_stats.ndis += ndis;
490  indexIVF_stats.nheap_updates += nheap;
491 
492 }
493 
494 template<class HammingComputer, bool store_pairs>
495 void search_knn_hamming_count(const IndexBinaryIVF& ivf,
496  size_t nx,
497  const uint8_t *x,
498  const long *keys,
499  int k,
500  int32_t *distances,
501  long *labels,
502  const IVFSearchParameters *params) {
503  const int nBuckets = ivf.d + 1;
504  std::vector<int> all_counters(nx * nBuckets, 0);
505  std::unique_ptr<long[]> all_ids_per_dis(new long[nx * nBuckets * k]);
506 
507  long nprobe = params ? params->nprobe : ivf.nprobe;
508  long max_codes = params ? params->max_codes : ivf.max_codes;
509 
510  std::vector<HCounterState<HammingComputer>> cs;
511  for (size_t i = 0; i < nx; ++i) {
512  cs.push_back(HCounterState<HammingComputer>(
513  all_counters.data() + i * nBuckets,
514  all_ids_per_dis.get() + i * nBuckets * k,
515  x + i * ivf.code_size,
516  ivf.d,
517  k
518  ));
519  }
520 
521  size_t nlistv = 0, ndis = 0;
522 
523 #pragma omp parallel for reduction(+: nlistv, ndis)
524  for (size_t i = 0; i < nx; i++) {
525  const long * keysi = keys + i * nprobe;
526  HCounterState<HammingComputer>& csi = cs[i];
527 
528  size_t nscan = 0;
529 
530  for (size_t ik = 0; ik < nprobe; ik++) {
531  long key = keysi[ik]; /* select the list */
532  if (key < 0) {
533  // not enough centroids for multiprobe
534  continue;
535  }
536  FAISS_THROW_IF_NOT_FMT (
537  key < (long) ivf.nlist,
538  "Invalid key=%ld at ik=%ld nlist=%ld\n",
539  key, ik, ivf.nlist);
540 
541  nlistv++;
542  size_t list_size = ivf.invlists->list_size(key);
543  InvertedLists::ScopedCodes scodes (ivf.invlists, key);
544  const uint8_t *list_vecs = scodes.get();
545  const Index::idx_t *ids = store_pairs
546  ? nullptr
547  : ivf.invlists->get_ids(key);
548 
549  for (size_t j = 0; j < list_size; j++) {
550  const uint8_t * yj = list_vecs + ivf.code_size * j;
551 
552  long id = store_pairs ? (key << 32 | j) : ids[j];
553  csi.update_counter(yj, id);
554  }
555  if (ids)
556  ivf.invlists->release_ids (key, ids);
557 
558  nscan += list_size;
559  if (max_codes && nscan >= max_codes)
560  break;
561  }
562  ndis += nscan;
563 
564  int nres = 0;
565  for (int b = 0; b < nBuckets && nres < k; b++) {
566  for (int l = 0; l < csi.counters[b] && nres < k; l++) {
567  labels[i * k + nres] = csi.ids_per_dis[b * k + l];
568  distances[i * k + nres] = b;
569  nres++;
570  }
571  }
572  while (nres < k) {
573  labels[i * k + nres] = -1;
574  distances[i * k + nres] = std::numeric_limits<int32_t>::max();
575  ++nres;
576  }
577  }
578 
579  indexIVF_stats.nq += nx;
580  indexIVF_stats.nlist += nlistv;
581  indexIVF_stats.ndis += ndis;
582 }
583 
584 
585 
586 template<bool store_pairs>
587 void search_knn_hamming_count_1 (
588  const IndexBinaryIVF& ivf,
589  size_t nx,
590  const uint8_t *x,
591  const long *keys,
592  int k,
593  int32_t *distances,
594  long *labels,
595  const IVFSearchParameters *params) {
596  switch (ivf.code_size) {
597 #define HANDLE_CS(cs) \
598  case cs: \
599  search_knn_hamming_count<HammingComputer ## cs, store_pairs>( \
600  ivf, nx, x, keys, k, distances, labels, params); \
601  break;
602  HANDLE_CS(4);
603  HANDLE_CS(8);
604  HANDLE_CS(16);
605  HANDLE_CS(20);
606  HANDLE_CS(32);
607  HANDLE_CS(64);
608 #undef HANDLE_CS
609  default:
610  if (ivf.code_size % 8 == 0) {
611  search_knn_hamming_count<HammingComputerM8, store_pairs>
612  (ivf, nx, x, keys, k, distances, labels, params);
613  } else if (ivf.code_size % 4 == 0) {
614  search_knn_hamming_count<HammingComputerM4, store_pairs>
615  (ivf, nx, x, keys, k, distances, labels, params);
616  } else {
617  search_knn_hamming_count<HammingComputerDefault, store_pairs>
618  (ivf, nx, x, keys, k, distances, labels, params);
619  }
620  break;
621  }
622 
623 }
624 
625 } // namespace
626 
627 BinaryInvertedListScanner *IndexBinaryIVF::get_InvertedListScanner
628  (bool store_pairs) const
629 {
630  if (store_pairs) {
631  return select_IVFBinaryScannerL2<true> (code_size);
632  } else {
633  return select_IVFBinaryScannerL2<false> (code_size);
634  }
635 }
636 
637 void IndexBinaryIVF::search_preassigned(idx_t n, const uint8_t *x, idx_t k,
638  const idx_t *idx,
639  const int32_t * coarse_dis,
640  int32_t *distances, idx_t *labels,
641  bool store_pairs,
642  const IVFSearchParameters *params
643  ) const {
644 
645  if (use_heap) {
646  search_knn_hamming_heap (*this, n, x, k, idx, coarse_dis,
647  distances, labels, store_pairs,
648  params);
649  } else {
650  if (store_pairs) {
651  search_knn_hamming_count_1<true>
652  (*this, n, x, idx, k, distances, labels, params);
653  } else {
654  search_knn_hamming_count_1<false>
655  (*this, n, x, idx, k, distances, labels, params);
656  }
657  }
658 }
659 
660 IndexBinaryIVF::~IndexBinaryIVF() {
661  if (own_invlists) {
662  delete invlists;
663  }
664 
665  if (own_fields) {
666  delete quantizer;
667  }
668 }
669 
670 
671 } // namespace faiss
virtual void search(idx_t n, const uint8_t *x, idx_t k, int32_t *distances, idx_t *labels) const =0
size_t nprobe
number of probes at query time
void add_core(idx_t n, const uint8_t *x, const long *xids, const long *precomputed_idx)
same as add_with_ids, with precomputed coarse quantizer
int niter
clustering iterations
Definition: Clustering.h:23
simple (default) implementation as an array of inverted lists
virtual void reconstruct_from_offset(long list_no, long offset, uint8_t *recons) const
virtual const idx_t * get_ids(size_t list_no) const =0
virtual void reset()=0
Removes all elements from the database.
bool maintain_direct_map
map for direct access to the elements. Enables reconstruct().
ClusteringParameters cp
to override default clustering params
void search_preassigned(idx_t n, const uint8_t *x, idx_t k, const idx_t *assign, const int32_t *centroid_dis, int32_t *distances, idx_t *labels, bool store_pairs, const IVFSearchParameters *params=nullptr) const
size_t nlist
number of possible key values
virtual size_t list_size(size_t list_no) const =0
get the size of a list
bool verbose
verbosity level
Definition: IndexBinary.h:44
size_t k
nb of centroids
Definition: Clustering.h:59
bool is_trained
set if the Index does not require training, or if training is done already
Definition: IndexBinary.h:47
IndexBinary * quantizer
quantizer that maps vectors to inverted lists
int code_size
number of bytes per vector ( = d / 8 )
Definition: IndexBinary.h:42
bool own_fields
whether object owns the quantizer
virtual void merge_from(IndexBinaryIVF &other, idx_t add_id)
Index::idx_t idx_t
all indices are this type
Definition: IndexBinary.h:37
void make_direct_map(bool new_maintain_direct_map=true)
int d
vector dimension
Definition: IndexBinary.h:41
Index * clustering_index
to override index used during clustering
void merge_from(InvertedLists *oivf, size_t add_id)
move all entries from oivf (empty on output)
virtual idx_t get_single_id(size_t list_no, size_t offset) const
int d
vector dimension
Definition: Index.h:66
long idx_t
all indices are this type
Definition: Index.h:62
size_t code_size
code size per vector in bytes
Definition: InvertedLists.h:35
virtual const uint8_t * get_single_code(size_t list_no, size_t offset) const
void train(idx_t n, const uint8_t *x) override
Trains the quantizer.
void assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k=1)
Definition: IndexBinary.cpp:28
virtual size_t add_entry(size_t list_no, idx_t theid, const uint8_t *code)
add one entry to an inverted list
void reset() override
Removes all elements from the database.
double getmillisecs()
ms elapsed since some arbitrary epoch
Definition: utils.cpp:69
std::vector< float > centroids
centroids (k * d)
Definition: Clustering.h:62
void real_to_binary(size_t d, const float *x_in, uint8_t *x_out)
Definition: utils.cpp:1570
void add_with_ids(idx_t n, const uint8_t *x, const long *xids) override
void add(idx_t n, const uint8_t *x) override
size_t nlist
number of possible key values
Definition: InvertedLists.h:34
long remove_ids(const IDSelector &sel) override
Dataset manipulation functions.
idx_t ntotal
total nb of indexed vectors
Definition: IndexBinary.h:43
virtual void add(idx_t n, const uint8_t *x)=0
virtual void search(idx_t n, const uint8_t *x, idx_t k, int32_t *distances, idx_t *labels) const override
virtual void train(idx_t n, const float *x, faiss::Index &index)
Index is used during the assignment stage.
Definition: Clustering.cpp:77
void binary_to_real(size_t d, const uint8_t *x_in, float *x_out)
Definition: utils.cpp:1564
virtual void prefetch_lists(const idx_t *list_nos, int nlist) const
void search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k, int32_t *distances, idx_t *labels, uint8_t *recons) const override
void reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const override
void reconstruct(idx_t key, uint8_t *recons) const override
MetricType
Some algorithms support both an inner product version and a L2 search version.
Definition: Index.h:44
InvertedLists * invlists
Acess to the actual data.