Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/tmp/faiss/IndexBinaryIVF.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved
10 // -*- c++ -*-
11 
12 #include "IndexBinaryIVF.h"
13 
14 #include <cstdio>
15 #include <memory>
16 
17 #include "hamming.h"
18 #include "utils.h"
19 
20 #include "AuxIndexStructures.h"
21 #include "FaissAssert.h"
22 #include "IndexFlat.h"
23 
24 
25 namespace faiss {
26 
27 IndexBinaryIVF::IndexBinaryIVF(IndexBinary *quantizer, size_t d, size_t nlist)
28  : IndexBinary(d),
29  invlists(new ArrayInvertedLists(nlist, code_size)),
30  own_invlists(true),
31  nprobe(1),
32  max_codes(0),
33  maintain_direct_map(false),
34  quantizer(quantizer),
35  nlist(nlist),
36  own_fields(false),
37  clustering_index(nullptr)
38 {
39  FAISS_THROW_IF_NOT (d == quantizer->d);
40  is_trained = quantizer->is_trained && (quantizer->ntotal == nlist);
41 
42  cp.niter = 10;
43 }
44 
45 IndexBinaryIVF::IndexBinaryIVF()
46  : invlists(nullptr),
47  own_invlists(false),
48  nprobe(1),
49  max_codes(0),
50  maintain_direct_map(false),
51  quantizer(nullptr),
52  nlist(0),
53  own_fields(false),
54  clustering_index(nullptr)
55 {}
56 
57 void IndexBinaryIVF::add(idx_t n, const uint8_t *x) {
58  add_with_ids(n, x, nullptr);
59 }
60 
61 void IndexBinaryIVF::add_with_ids(idx_t n, const uint8_t *x, const long *xids) {
62  add_core(n, x, xids, nullptr);
63 }
64 
65 void IndexBinaryIVF::add_core(idx_t n, const uint8_t *x, const long *xids,
66  const long *precomputed_idx) {
67  FAISS_THROW_IF_NOT(is_trained);
68  assert(invlists);
69  FAISS_THROW_IF_NOT_MSG(!(maintain_direct_map && xids),
70  "cannot have direct map and add with ids");
71 
72  const long * idx;
73 
74  std::unique_ptr<long[]> scoped_idx;
75 
76  if (precomputed_idx) {
77  idx = precomputed_idx;
78  } else {
79  scoped_idx.reset(new long[n]);
80  quantizer->assign(n, x, scoped_idx.get());
81  idx = scoped_idx.get();
82  }
83 
84  long n_add = 0;
85  for (size_t i = 0; i < n; i++) {
86  long id = xids ? xids[i] : ntotal + i;
87  long list_no = idx[i];
88 
89  if (list_no < 0)
90  continue;
91  const uint8_t *xi = x + i * code_size;
92  size_t offset = invlists->add_entry(list_no, id, xi);
93 
95  direct_map.push_back(list_no << 32 | offset);
96  n_add++;
97  }
98  if (verbose) {
99  printf("IndexBinaryIVF::add_with_ids: added %ld / %ld vectors\n",
100  n_add, n);
101  }
102  ntotal += n_add;
103 }
104 
105 void IndexBinaryIVF::make_direct_map(bool new_maintain_direct_map) {
106  // nothing to do
107  if (new_maintain_direct_map == maintain_direct_map)
108  return;
109 
110  if (new_maintain_direct_map) {
111  direct_map.resize(ntotal, -1);
112  for (size_t key = 0; key < nlist; key++) {
113  size_t list_size = invlists->list_size(key);
114  const idx_t *idlist = invlists->get_ids(key);
115 
116  for (long ofs = 0; ofs < list_size; ofs++) {
117  FAISS_THROW_IF_NOT_MSG(0 <= idlist[ofs] && idlist[ofs] < ntotal,
118  "direct map supported only for seuquential ids");
119  direct_map[idlist[ofs]] = key << 32 | ofs;
120  }
121  }
122  } else {
123  direct_map.clear();
124  }
125  maintain_direct_map = new_maintain_direct_map;
126 }
127 
128 void IndexBinaryIVF::search(idx_t n, const uint8_t *x, idx_t k,
129  int32_t *distances, idx_t *labels) const {
130  std::unique_ptr<idx_t[]> idx(new idx_t[n * nprobe]);
131  std::unique_ptr<int32_t[]> coarse_dis(new int32_t[n * nprobe]);
132 
133  quantizer->search(n, x, nprobe, coarse_dis.get(), idx.get());
134 
135  invlists->prefetch_lists(idx.get(), n * nprobe);
136 
137  search_preassigned(n, x, k, idx.get(), coarse_dis.get(),
138  distances, labels, false);
139 }
140 
141 void IndexBinaryIVF::reconstruct(idx_t key, uint8_t *recons) const {
142  FAISS_THROW_IF_NOT_MSG(direct_map.size() == ntotal,
143  "direct map is not initialized");
144  long list_no = direct_map[key] >> 32;
145  long offset = direct_map[key] & 0xffffffff;
146  reconstruct_from_offset(list_no, offset, recons);
147 }
148 
149 void IndexBinaryIVF::reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const {
150  FAISS_THROW_IF_NOT(ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
151 
152  for (long list_no = 0; list_no < nlist; list_no++) {
153  size_t list_size = invlists->list_size(list_no);
154  const Index::idx_t *idlist = invlists->get_ids(list_no);
155 
156  for (long offset = 0; offset < list_size; offset++) {
157  long id = idlist[offset];
158  if (!(id >= i0 && id < i0 + ni)) {
159  continue;
160  }
161 
162  uint8_t *reconstructed = recons + (id - i0) * d;
163  reconstruct_from_offset(list_no, offset, reconstructed);
164  }
165  }
166 }
167 
169  int32_t *distances, idx_t *labels,
170  uint8_t *recons) const {
171  std::unique_ptr<idx_t[]> idx(new long[n * nprobe]);
172  std::unique_ptr<int32_t[]> coarse_dis(new int32_t[n * nprobe]);
173 
174  quantizer->search(n, x, nprobe, coarse_dis.get(), idx.get());
175 
176  invlists->prefetch_lists(idx.get(), n * nprobe);
177 
178  // search_preassigned() with `store_pairs` enabled to obtain the list_no
179  // and offset into `codes` for reconstruction
180  search_preassigned(n, x, k, idx.get(), coarse_dis.get(),
181  distances, labels, /* store_pairs */true);
182  for (idx_t i = 0; i < n; ++i) {
183  for (idx_t j = 0; j < k; ++j) {
184  idx_t ij = i * k + j;
185  idx_t key = labels[ij];
186  uint8_t *reconstructed = recons + ij * d;
187  if (key < 0) {
188  // Fill with NaNs
189  memset(reconstructed, -1, sizeof(*reconstructed) * d);
190  } else {
191  int list_no = key >> 32;
192  int offset = key & 0xffffffff;
193 
194  // Update label to the actual id
195  labels[ij] = invlists->get_single_id(list_no, offset);
196 
197  reconstruct_from_offset(list_no, offset, reconstructed);
198  }
199  }
200  }
201 }
202 
203 void IndexBinaryIVF::reconstruct_from_offset(long list_no, long offset,
204  uint8_t *recons) const {
205  memcpy(recons, invlists->get_single_code(list_no, offset), code_size);
206 }
207 
209  direct_map.clear();
210  invlists->reset();
211  ntotal = 0;
212 }
213 
215  FAISS_THROW_IF_NOT_MSG(!maintain_direct_map,
216  "direct map remove not implemented");
217 
218  std::vector<long> toremove(nlist);
219 
220 #pragma omp parallel for
221  for (long i = 0; i < nlist; i++) {
222  long l0 = invlists->list_size (i), l = l0, j = 0;
223  const idx_t *idsi = invlists->get_ids(i);
224  while (j < l) {
225  if (sel.is_member(idsi[j])) {
226  l--;
227  invlists->update_entry(
228  i, j,
229  invlists->get_single_id(i, l),
230  invlists->get_single_code(i, l));
231  } else {
232  j++;
233  }
234  }
235  toremove[i] = l0 - l;
236  }
237  // this will not run well in parallel on ondisk because of possible shrinks
238  long nremove = 0;
239  for (long i = 0; i < nlist; i++) {
240  if (toremove[i] > 0) {
241  nremove += toremove[i];
242  invlists->resize(
243  i, invlists->list_size(i) - toremove[i]);
244  }
245  }
246  ntotal -= nremove;
247  return nremove;
248 }
249 
250 void IndexBinaryIVF::train(idx_t n, const uint8_t *x) {
251  if (verbose)
252  printf("Training level-1 quantizer\n");
253 
254  train_q1(n, x, verbose);
255 
256  is_trained = true;
257 }
258 
260  std::vector<int> hist(nlist);
261 
262  for (int i = 0; i < nlist; i++) {
263  hist[i] = invlists->list_size(i);
264  }
265 
266  return faiss::imbalance_factor(nlist, hist.data());
267 }
268 
270  std::vector<int> sizes(40);
271  for (int i = 0; i < nlist; i++) {
272  for (int j = 0; j < sizes.size(); j++) {
273  if ((invlists->list_size(i) >> j) == 0) {
274  sizes[j]++;
275  break;
276  }
277  }
278  }
279  for (int i = 0; i < sizes.size(); i++) {
280  if (sizes[i]) {
281  printf("list size in < %d: %d instances\n", 1 << i, sizes[i]);
282  }
283  }
284 }
285 
287  // minimal sanity checks
288  FAISS_THROW_IF_NOT(other.d == d);
289  FAISS_THROW_IF_NOT(other.nlist == nlist);
290  FAISS_THROW_IF_NOT(other.code_size == code_size);
291  FAISS_THROW_IF_NOT_MSG((!maintain_direct_map &&
292  !other.maintain_direct_map),
293  "direct map copy not implemented");
294  FAISS_THROW_IF_NOT_MSG(typeid (*this) == typeid (other),
295  "can only merge indexes of the same type");
296 
297  invlists->merge_from (other.invlists, add_id);
298 
299  ntotal += other.ntotal;
300  other.ntotal = 0;
301 }
302 
303 void IndexBinaryIVF::replace_invlists(InvertedLists *il, bool own) {
304  FAISS_THROW_IF_NOT(il->nlist == nlist &&
305  il->code_size == code_size);
306  if (own_invlists) {
307  delete invlists;
308  }
309  invlists = il;
310  own_invlists = own;
311 }
312 
313 
314 void IndexBinaryIVF::train_q1(size_t n, const uint8_t *x, bool verbose) {
315  if (quantizer->is_trained && (quantizer->ntotal == nlist)) {
316  if (verbose)
317  printf("IVF quantizer does not need training.\n");
318  } else {
319  if (verbose)
320  printf("Training level-1 quantizer on %ld vectors in %dD\n", n, d);
321 
322  Clustering clus(d, nlist, cp);
323  quantizer->reset();
324 
325  std::unique_ptr<float[]> x_f(new float[n * d]);
326  binary_to_real(n * d, x, x_f.get());
327 
328  IndexFlatL2 index_tmp(d);
329 
330  if (clustering_index && verbose) {
331  printf("using clustering_index of dimension %d to do the clustering\n",
333  }
334 
335  clus.train(n, x_f.get(), clustering_index ? *clustering_index : index_tmp);
336 
337  std::unique_ptr<uint8_t[]> x_b(new uint8_t[clus.k * code_size]);
338  real_to_binary(d * clus.k, clus.centroids.data(), x_b.get());
339 
340  quantizer->add(clus.k, x_b.get());
341  quantizer->is_trained = true;
342  }
343 }
344 
345 
346 namespace {
347 
348 using idx_t = Index::idx_t;
349 
350 
351 template<class HammingComputer, bool store_pairs>
352 struct IVFBinaryScannerL2: BinaryInvertedListScanner {
353 
354  HammingComputer hc;
355  size_t code_size;
356 
357  IVFBinaryScannerL2 (size_t code_size): code_size (code_size)
358  {}
359 
360  void set_query (const uint8_t *query_vector) override {
361  hc.set (query_vector, code_size);
362  }
363 
364  idx_t list_no;
365  void set_list (idx_t list_no, uint8_t /* coarse_dis */) override {
366  this->list_no = list_no;
367  }
368 
369  uint32_t distance_to_code (const uint8_t *code) const override {
370  return hc.hamming (code);
371  }
372 
373  size_t scan_codes (size_t n,
374  const uint8_t *codes,
375  const idx_t *ids,
376  int32_t *simi, idx_t *idxi,
377  size_t k) const override
378  {
379  using C = CMax<int32_t, idx_t>;
380 
381  size_t nup = 0;
382  for (size_t j = 0; j < n; j++) {
383  uint32_t dis = hc.hamming (codes);
384  if (dis < simi[0]) {
385  heap_pop<C> (k, simi, idxi);
386  long id = store_pairs ? (list_no << 32 | j) : ids[j];
387  heap_push<C> (k, simi, idxi, dis, id);
388  nup++;
389  }
390  codes += code_size;
391  }
392  return nup;
393  }
394 
395 
396 };
397 
398 
399 template <bool store_pairs>
400 BinaryInvertedListScanner *select_IVFBinaryScannerL2 (size_t code_size) {
401 
402  switch (code_size) {
403 #define HANDLE_CS(cs) \
404  case cs: \
405  return new IVFBinaryScannerL2<HammingComputer ## cs, store_pairs> (cs);
406  HANDLE_CS(4);
407  HANDLE_CS(8);
408  HANDLE_CS(16);
409  HANDLE_CS(20);
410  HANDLE_CS(32);
411  HANDLE_CS(64);
412 #undef HANDLE_CS
413  default:
414  if (code_size % 8 == 0) {
415  return new IVFBinaryScannerL2<HammingComputerM8,
416  store_pairs> (code_size);
417  } else if (code_size % 4 == 0) {
418  return new IVFBinaryScannerL2<HammingComputerM4,
419  store_pairs> (code_size);
420  } else {
421  return new IVFBinaryScannerL2<HammingComputerDefault,
422  store_pairs> (code_size);
423  }
424  }
425 }
426 
427 
428 void search_knn_hamming_heap(const IndexBinaryIVF& ivf,
429  size_t n,
430  const uint8_t *x,
431  idx_t k,
432  const idx_t *keys,
433  const int32_t * coarse_dis,
434  int32_t *distances, idx_t *labels,
435  bool store_pairs,
436  const IVFSearchParameters *params)
437 {
438  long nprobe = params ? params->nprobe : ivf.nprobe;
439  long max_codes = params ? params->max_codes : ivf.max_codes;
440  MetricType metric_type = ivf.metric_type;
441 
442  // almost verbatim copy from IndexIVF::search_preassigned
443 
444  size_t nlistv = 0, ndis = 0, nheap = 0;
445  using HeapForIP = CMin<int32_t, idx_t>;
446  using HeapForL2 = CMax<int32_t, idx_t>;
447 
448 #pragma omp parallel if(n > 1) reduction(+: nlistv, ndis, nheap)
449  {
450  std::unique_ptr<BinaryInvertedListScanner> scanner
451  (ivf.get_InvertedListScanner (store_pairs));
452 
453 #pragma omp for
454  for (size_t i = 0; i < n; i++) {
455  const uint8_t *xi = x + i * ivf.code_size;
456  scanner->set_query(xi);
457 
458  const long * keysi = keys + i * nprobe;
459  int32_t * simi = distances + k * i;
460  long * idxi = labels + k * i;
461 
462  if (metric_type == METRIC_INNER_PRODUCT) {
463  heap_heapify<HeapForIP> (k, simi, idxi);
464  } else {
465  heap_heapify<HeapForL2> (k, simi, idxi);
466  }
467 
468  size_t nscan = 0;
469 
470  for (size_t ik = 0; ik < nprobe; ik++) {
471  long key = keysi[ik]; /* select the list */
472  if (key < 0) {
473  // not enough centroids for multiprobe
474  continue;
475  }
476  FAISS_THROW_IF_NOT_FMT
477  (key < (long) ivf.nlist,
478  "Invalid key=%ld at ik=%ld nlist=%ld\n",
479  key, ik, ivf.nlist);
480 
481  scanner->set_list (key, coarse_dis[i * nprobe + ik]);
482 
483  nlistv++;
484 
485  size_t list_size = ivf.invlists->list_size(key);
486  InvertedLists::ScopedCodes scodes (ivf.invlists, key);
487  const Index::idx_t * ids = store_pairs ? nullptr :
488  ivf.invlists->get_ids (key);
489 
490  nheap += scanner->scan_codes (list_size, scodes.get(),
491  ids, simi, idxi, k);
492 
493  if (ids) {
494  ivf.invlists->release_ids (ids);
495  }
496 
497  nscan += list_size;
498  if (max_codes && nscan >= max_codes)
499  break;
500  }
501 
502  ndis += nscan;
503  if (metric_type == METRIC_INNER_PRODUCT) {
504  heap_reorder<HeapForIP> (k, simi, idxi);
505  } else {
506  heap_reorder<HeapForL2> (k, simi, idxi);
507  }
508 
509  } // parallel for
510  } // parallel
511 
512  indexIVF_stats.nq += n;
513  indexIVF_stats.nlist += nlistv;
514  indexIVF_stats.ndis += ndis;
515  indexIVF_stats.nheap_updates += nheap;
516 
517 }
518 
519 template<class HammingComputer, bool store_pairs>
520 void search_knn_hamming_count(const IndexBinaryIVF& ivf,
521  size_t nx,
522  const uint8_t *x,
523  const long *keys,
524  int k,
525  int32_t *distances,
526  long *labels,
527  const IVFSearchParameters *params) {
528  const int nBuckets = ivf.d + 1;
529  std::vector<int> all_counters(nx * nBuckets, 0);
530  std::unique_ptr<long[]> all_ids_per_dis(new long[nx * nBuckets * k]);
531 
532  long nprobe = params ? params->nprobe : ivf.nprobe;
533  long max_codes = params ? params->max_codes : ivf.max_codes;
534 
535  std::vector<HCounterState<HammingComputer>> cs;
536  for (size_t i = 0; i < nx; ++i) {
537  cs.push_back(HCounterState<HammingComputer>(
538  all_counters.data() + i * nBuckets,
539  all_ids_per_dis.get() + i * nBuckets * k,
540  x + i * ivf.code_size,
541  ivf.d,
542  k
543  ));
544  }
545 
546  size_t nlistv = 0, ndis = 0;
547 
548 #pragma omp parallel for reduction(+: nlistv, ndis)
549  for (size_t i = 0; i < nx; i++) {
550  const long * keysi = keys + i * nprobe;
551  HCounterState<HammingComputer>& csi = cs[i];
552 
553  size_t nscan = 0;
554 
555  for (size_t ik = 0; ik < nprobe; ik++) {
556  long key = keysi[ik]; /* select the list */
557  if (key < 0) {
558  // not enough centroids for multiprobe
559  continue;
560  }
561  FAISS_THROW_IF_NOT_FMT (
562  key < (long) ivf.nlist,
563  "Invalid key=%ld at ik=%ld nlist=%ld\n",
564  key, ik, ivf.nlist);
565 
566  nlistv++;
567  size_t list_size = ivf.invlists->list_size(key);
568  InvertedLists::ScopedCodes scodes (ivf.invlists, key);
569  const uint8_t *list_vecs = scodes.get();
570  const Index::idx_t *ids = store_pairs
571  ? nullptr
572  : ivf.invlists->get_ids(key);
573 
574  for (size_t j = 0; j < list_size; j++) {
575  const uint8_t * yj = list_vecs + ivf.code_size * j;
576 
577  long id = store_pairs ? (key << 32 | j) : ids[j];
578  csi.update_counter(yj, id);
579  }
580  if (ids)
581  ivf.invlists->release_ids (ids);
582 
583  nscan += list_size;
584  if (max_codes && nscan >= max_codes)
585  break;
586  }
587  ndis += nscan;
588 
589  int nres = 0;
590  for (int b = 0; b < nBuckets && nres < k; b++) {
591  for (int l = 0; l < csi.counters[b] && nres < k; l++) {
592  labels[i * k + nres] = csi.ids_per_dis[b * k + l];
593  distances[i * k + nres] = b;
594  nres++;
595  }
596  }
597  while (nres < k) {
598  labels[i * k + nres] = -1;
599  distances[i * k + nres] = std::numeric_limits<int32_t>::max();
600  ++nres;
601  }
602  }
603 
604  indexIVF_stats.nq += nx;
605  indexIVF_stats.nlist += nlistv;
606  indexIVF_stats.ndis += ndis;
607 }
608 
609 
610 
611 template<bool store_pairs>
612 void search_knn_hamming_count_1 (
613  const IndexBinaryIVF& ivf,
614  size_t nx,
615  const uint8_t *x,
616  const long *keys,
617  int k,
618  int32_t *distances,
619  long *labels,
620  const IVFSearchParameters *params) {
621  switch (ivf.code_size) {
622 #define HANDLE_CS(cs) \
623  case cs: \
624  search_knn_hamming_count<HammingComputer ## cs, store_pairs>( \
625  ivf, nx, x, keys, k, distances, labels, params); \
626  break;
627  HANDLE_CS(4);
628  HANDLE_CS(8);
629  HANDLE_CS(16);
630  HANDLE_CS(20);
631  HANDLE_CS(32);
632  HANDLE_CS(64);
633 #undef HANDLE_CS
634  default:
635  if (ivf.code_size % 8 == 0) {
636  search_knn_hamming_count<HammingComputerM8, store_pairs>
637  (ivf, nx, x, keys, k, distances, labels, params);
638  } else if (ivf.code_size % 4 == 0) {
639  search_knn_hamming_count<HammingComputerM4, store_pairs>
640  (ivf, nx, x, keys, k, distances, labels, params);
641  } else {
642  search_knn_hamming_count<HammingComputerDefault, store_pairs>
643  (ivf, nx, x, keys, k, distances, labels, params);
644  }
645  break;
646  }
647 
648 }
649 
650 } // namespace
651 
652 BinaryInvertedListScanner *IndexBinaryIVF::get_InvertedListScanner
653  (bool store_pairs) const
654 {
655  if (store_pairs) {
656  return select_IVFBinaryScannerL2<true> (code_size);
657  } else {
658  return select_IVFBinaryScannerL2<false> (code_size);
659  }
660 }
661 
662 void IndexBinaryIVF::search_preassigned(idx_t n, const uint8_t *x, idx_t k,
663  const idx_t *idx,
664  const int32_t * coarse_dis,
665  int32_t *distances, idx_t *labels,
666  bool store_pairs,
667  const IVFSearchParameters *params
668  ) const {
669 
670  if (use_heap) {
671  search_knn_hamming_heap (*this, n, x, k, idx, coarse_dis,
672  distances, labels, store_pairs,
673  params);
674  } else {
675  if (store_pairs) {
676  search_knn_hamming_count_1<true>
677  (*this, n, x, idx, k, distances, labels, params);
678  } else {
679  search_knn_hamming_count_1<false>
680  (*this, n, x, idx, k, distances, labels, params);
681  }
682  }
683 }
684 
685 IndexBinaryIVF::~IndexBinaryIVF() {
686  if (own_invlists) {
687  delete invlists;
688  }
689 
690  if (own_fields) {
691  delete quantizer;
692  }
693 }
694 
695 
696 } // namespace faiss
virtual void search(idx_t n, const uint8_t *x, idx_t k, int32_t *distances, idx_t *labels) const =0
size_t nprobe
number of probes at query time
void add_core(idx_t n, const uint8_t *x, const long *xids, const long *precomputed_idx)
same as add_with_ids, with precomputed coarse quantizer
int niter
clustering iterations
Definition: Clustering.h:24
simple (default) implementation as an array of inverted lists
virtual void reconstruct_from_offset(long list_no, long offset, uint8_t *recons) const
virtual const idx_t * get_ids(size_t list_no) const =0
virtual void reset()=0
Removes all elements from the database.
bool maintain_direct_map
map for direct access to the elements. Enables reconstruct().
ClusteringParameters cp
to override default clustering params
void search_preassigned(idx_t n, const uint8_t *x, idx_t k, const idx_t *assign, const int32_t *centroid_dis, int32_t *distances, idx_t *labels, bool store_pairs, const IVFSearchParameters *params=nullptr) const
size_t nlist
number of possible key values
virtual size_t list_size(size_t list_no) const =0
get the size of a list
bool verbose
verbosity level
Definition: IndexBinary.h:43
size_t k
nb of centroids
Definition: Clustering.h:59
double imbalance_factor(int n, int k, const long *assign)
a balanced assignment has a IF of 1
Definition: utils.cpp:1292
bool is_trained
set if the Index does not require training, or if training is done already
Definition: IndexBinary.h:46
IndexBinary * quantizer
quantizer that maps vectors to inverted lists
int code_size
number of bytes per vector ( = d / 8 )
Definition: IndexBinary.h:41
bool own_fields
whether object owns the quantizer
virtual void merge_from(IndexBinaryIVF &other, idx_t add_id)
void make_direct_map(bool new_maintain_direct_map=true)
int d
vector dimension
Definition: IndexBinary.h:40
Index * clustering_index
to override index used during clustering
void merge_from(InvertedLists *oivf, size_t add_id)
move all entries from oivf (empty on output)
virtual idx_t get_single_id(size_t list_no, size_t offset) const
int d
vector dimension
Definition: Index.h:66
size_t code_size
code size per vector in bytes
Definition: InvertedLists.h:36
void train_q1(size_t n, const uint8_t *x, bool verbose)
Trains the quantizer and calls train_residual to train sub-quantizers.
virtual const uint8_t * get_single_code(size_t list_no, size_t offset) const
double imbalance_factor() const
1= perfectly balanced, &gt;1: imbalanced
void train(idx_t n, const uint8_t *x) override
Trains the quantizer and calls train_residual to train sub-quantizers.
void assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k=1)
Definition: IndexBinary.cpp:29
virtual size_t add_entry(size_t list_no, idx_t theid, const uint8_t *code)
add one entry to an inverted list
void reset() override
Removes all elements from the database.
long idx_t
all indices are this type
Definition: Index.h:64
std::vector< float > centroids
centroids (k * d)
Definition: Clustering.h:62
virtual void prefetch_lists(const long *list_nos, int nlist) const
void real_to_binary(size_t d, const float *x_in, uint8_t *x_out)
Definition: utils.cpp:1558
void print_stats() const
display some stats about the inverted lists
void add_with_ids(idx_t n, const uint8_t *x, const long *xids) override
void add(idx_t n, const uint8_t *x) override
Quantizes x and calls add_with_key.
size_t nlist
number of possible key values
Definition: InvertedLists.h:35
long remove_ids(const IDSelector &sel) override
Dataset manipulation functions.
idx_t ntotal
total nb of indexed vectors
Definition: IndexBinary.h:42
long idx_t
all indices are this type
Definition: IndexBinary.h:38
virtual void add(idx_t n, const uint8_t *x)=0
virtual void search(idx_t n, const uint8_t *x, idx_t k, int32_t *distances, idx_t *labels) const override
virtual void train(idx_t n, const float *x, faiss::Index &index)
Index is used during the assignment stage.
Definition: Clustering.cpp:64
void binary_to_real(size_t d, const uint8_t *x_in, float *x_out)
Definition: utils.cpp:1552
void search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k, int32_t *distances, idx_t *labels, uint8_t *recons) const override
void reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const override
void reconstruct(idx_t key, uint8_t *recons) const override
MetricType
Some algorithms support both an inner product version and a L2 search version.
Definition: Index.h:45
InvertedLists * invlists
Acess to the actual data.