Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/IndexIVF.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the CC-by-NC license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 /* Copyright 2004-present Facebook. All Rights Reserved.
10  Inverted list structure.
11 */
12 
13 #include "IndexIVF.h"
14 
15 #include <cstdio>
16 
17 #include "utils.h"
18 #include "hamming.h"
19 
20 #include "FaissAssert.h"
21 #include "IndexFlat.h"
22 #include "AuxIndexStructures.h"
23 
24 namespace faiss {
25 
26 /*****************************************
27  * IndexIVF implementation
28  ******************************************/
29 
30 
31 IndexIVF::IndexIVF (Index * quantizer, size_t d, size_t nlist,
32  MetricType metric):
33  Index (d, metric),
34  nlist (nlist),
35  nprobe (1),
36  quantizer (quantizer),
37  quantizer_trains_alone (false),
38  own_fields (false),
39  ids (nlist),
40  maintain_direct_map (false)
41 {
42  FAISS_THROW_IF_NOT (d == quantizer->d);
43  is_trained = quantizer->is_trained && (quantizer->ntotal == nlist);
44  // Spherical by default if the metric is inner_product
45  if (metric_type == METRIC_INNER_PRODUCT) {
46  cp.spherical = true;
47  }
48  // here we set a low # iterations because this is typically used
49  // for large clusterings (nb this is not used for the MultiIndex,
50  // for which quantizer_trains_alone = true)
51  cp.niter = 10;
52  cp.verbose = verbose;
53 
54 }
55 
56 IndexIVF::IndexIVF ():
57  nlist (0), nprobe (1), quantizer (nullptr),
58  quantizer_trains_alone (false), own_fields (false),
59  maintain_direct_map (false)
60 {}
61 
62 
63 void IndexIVF::add (idx_t n, const float * x)
64 {
65  add_with_ids (n, x, nullptr);
66 }
67 
68 void IndexIVF::make_direct_map (bool new_maintain_direct_map)
69 {
70  // nothing to do
71  if (new_maintain_direct_map == maintain_direct_map)
72  return;
73 
74  if (new_maintain_direct_map) {
75  direct_map.resize (ntotal, -1);
76  for (size_t key = 0; key < nlist; key++) {
77  const std::vector<long> & idlist = ids[key];
78 
79  for (long ofs = 0; ofs < idlist.size(); ofs++) {
80  FAISS_THROW_IF_NOT_MSG (
81  0 <= idlist [ofs] && idlist[ofs] < ntotal,
82  "direct map supported only for seuquential ids");
83  direct_map [idlist [ofs]] = key << 32 | ofs;
84  }
85  }
86  } else {
87  direct_map.clear ();
88  }
89  maintain_direct_map = new_maintain_direct_map;
90 }
91 
92 
94 {
95  ntotal = 0;
96  direct_map.clear();
97  for (size_t i = 0; i < ids.size(); i++)
98  ids[i].clear();
99 }
100 
101 
102 void IndexIVF::train (idx_t n, const float *x)
103 {
104  if (quantizer->is_trained && (quantizer->ntotal == nlist)) {
105  if (verbose)
106  printf ("IVF quantizer does not need training.\n");
107  } else if (quantizer_trains_alone) {
108  if (verbose)
109  printf ("IVF quantizer trains alone...\n");
110  quantizer->train (n, x);
111  FAISS_THROW_IF_NOT_MSG (quantizer->ntotal == nlist,
112  "nlist not consistent with quantizer size");
113  } else {
114  if (verbose)
115  printf ("Training IVF quantizer on %ld vectors in %dD\n",
116  n, d);
117 
118  Clustering clus (d, nlist, cp);
119 
120  quantizer->reset();
121  clus.train (n, x, *quantizer);
122  quantizer->is_trained = true;
123  }
124  if (verbose)
125  printf ("Training IVF residual\n");
126 
127  train_residual (n, x);
128  is_trained = true;
129 }
130 
131 void IndexIVF::train_residual (idx_t n, const float *x)
132 {
133  if (verbose)
134  printf ("IndexIVF: no residual training\n");
135  // does nothing by default
136 }
137 
138 
139 
141 {
142  std::vector<int> hist (nlist);
143  for (int i = 0; i < nlist; i++) {
144  hist[i] = ids[i].size();
145  }
146  return faiss::imbalance_factor (nlist, hist.data());
147 }
148 
150 {
151  std::vector<int> sizes(40);
152  for (int i = 0; i < nlist; i++) {
153  for (int j = 0; j < sizes.size(); j++) {
154  if ((ids[i].size() >> j) == 0) {
155  sizes[j]++;
156  break;
157  }
158  }
159  }
160  for (int i = 0; i < sizes.size(); i++) {
161  if (sizes[i]) {
162  printf ("list size in < %d: %d instances\n",
163  1 << i, sizes[i]);
164  }
165  }
166 
167 }
168 
169 void IndexIVF::merge_from (IndexIVF &other, idx_t add_id)
170 {
171  // minimal sanity checks
172  FAISS_THROW_IF_NOT (other.d == d);
173  FAISS_THROW_IF_NOT (other.nlist == nlist);
174  FAISS_THROW_IF_NOT_MSG ((!maintain_direct_map &&
175  !other.maintain_direct_map),
176  "direct map copy not implemented");
177  FAISS_THROW_IF_NOT_MSG (typeid (*this) == typeid (other),
178  "can only merge indexes of the same type");
179  for (long i = 0; i < nlist; i++) {
180  std::vector<idx_t> & src = other.ids[i];
181  std::vector<idx_t> & dest = ids[i];
182  for (long j = 0; j < src.size(); j++)
183  dest.push_back (src[j] + add_id);
184  src.clear();
185  }
186  merge_from_residuals (other);
187  ntotal += other.ntotal;
188  other.ntotal = 0;
189 }
190 
191 
192 
193 IndexIVF::~IndexIVF()
194 {
195  if (own_fields) delete quantizer;
196 }
197 
198 
199 
200 /*****************************************
201  * IndexIVFFlat implementation
202  ******************************************/
203 
204 IndexIVFFlat::IndexIVFFlat (Index * quantizer,
205  size_t d, size_t nlist, MetricType metric):
206  IndexIVF (quantizer, d, nlist, metric)
207 {
208  vecs.resize (nlist);
209 }
210 
211 
212 
213 
214 
215 
216 void IndexIVFFlat::add_with_ids (idx_t n, const float * x, const long *xids)
217 {
218  add_core (n, x, xids, nullptr);
219 }
220 
221 void IndexIVFFlat::add_core (idx_t n, const float * x, const long *xids,
222  const long *precomputed_idx)
223 
224 {
225  FAISS_THROW_IF_NOT (is_trained);
226  FAISS_THROW_IF_NOT_MSG (!(maintain_direct_map && xids),
227  "cannot have direct map and add with ids");
228  const long * idx;
229  ScopeDeleter<long> del;
230 
231  if (precomputed_idx) {
232  idx = precomputed_idx;
233  } else {
234  long * idx0 = new long [n];
235  quantizer->assign (n, x, idx0);
236  idx = idx0;
237  del.set (idx);
238  }
239  long n_add = 0;
240  for (size_t i = 0; i < n; i++) {
241  long id = xids ? xids[i] : ntotal + i;
242  long list_no = idx [i];
243  if (list_no < 0)
244  continue;
245  assert (list_no < nlist);
246 
247  ids[list_no].push_back (id);
248  const float *xi = x + i * d;
249  /* store the vectors */
250  for (size_t j = 0 ; j < d ; j++)
251  vecs[list_no].push_back (xi [j]);
252 
254  direct_map.push_back (list_no << 32 | (ids[list_no].size() - 1));
255  n_add++;
256  }
257  if (verbose) {
258  printf("IndexIVFFlat::add_core: added %ld / %ld vectors\n",
259  n_add, n);
260  }
261  ntotal += n_add;
262 }
263 
264 void IndexIVFFlatStats::reset()
265 {
266  memset ((void*)this, 0, sizeof (*this));
267 }
268 
269 
270 IndexIVFFlatStats indexIVFFlat_stats;
271 
273  size_t nx,
274  const float * x,
275  const long * __restrict keys,
276  float_minheap_array_t * res) const
277 {
278 
279  const size_t k = res->k;
280  size_t nlistv = 0, ndis = 0;
281 
282 #pragma omp parallel for reduction(+: nlistv, ndis)
283  for (size_t i = 0; i < nx; i++) {
284  const float * xi = x + i * d;
285  const long * keysi = keys + i * nprobe;
286  float * __restrict simi = res->get_val (i);
287  long * __restrict idxi = res->get_ids (i);
288  minheap_heapify (k, simi, idxi);
289 
290  for (size_t ik = 0; ik < nprobe; ik++) {
291  long key = keysi[ik]; /* select the list */
292  if (key < 0) {
293  // not enough centroids for multiprobe
294  continue;
295  }
296  if (key >= (long) nlist) {
297  fprintf (stderr, "Invalid key=%ld at ik=%ld nlist=%ld\n",
298  key, ik, nlist);
299  throw;
300  }
301  nlistv++;
302  const size_t list_size = ids[key].size();
303  const float * list_vecs = vecs[key].data();
304 
305  for (size_t j = 0; j < list_size; j++) {
306  const float * yj = list_vecs + d * j;
307  float ip = fvec_inner_product (xi, yj, d);
308  if (ip > simi[0]) {
309  minheap_pop (k, simi, idxi);
310  minheap_push (k, simi, idxi, ip, ids[key][j]);
311  }
312  }
313  ndis += list_size;
314  }
315  minheap_reorder (k, simi, idxi);
316  }
317  indexIVFFlat_stats.nq += nx;
318  indexIVFFlat_stats.nlist += nlistv;
319  indexIVFFlat_stats.ndis += ndis;
320 }
321 
322 
324  size_t nx,
325  const float * x,
326  const long * __restrict keys,
327  float_maxheap_array_t * res) const
328 {
329  const size_t k = res->k;
330  size_t nlistv = 0, ndis = 0;
331 
332 #pragma omp parallel for reduction(+: nlistv, ndis)
333  for (size_t i = 0; i < nx; i++) {
334  const float * xi = x + i * d;
335  const long * keysi = keys + i * nprobe;
336  float * __restrict disi = res->get_val (i);
337  long * __restrict idxi = res->get_ids (i);
338  maxheap_heapify (k, disi, idxi);
339 
340  for (size_t ik = 0; ik < nprobe; ik++) {
341  long key = keysi[ik]; /* select the list */
342  if (key < 0) {
343  // not enough centroids for multiprobe
344  continue;
345  }
346  if (key >= (long) nlist) {
347  fprintf (stderr, "Invalid key=%ld at ik=%ld nlist=%ld\n",
348  key, ik, nlist);
349  throw;
350  }
351  nlistv++;
352  const size_t list_size = ids[key].size();
353  const float * list_vecs = vecs[key].data();
354 
355  for (size_t j = 0; j < list_size; j++) {
356  const float * yj = list_vecs + d * j;
357  float disij = fvec_L2sqr (xi, yj, d);
358  if (disij < disi[0]) {
359  maxheap_pop (k, disi, idxi);
360  maxheap_push (k, disi, idxi, disij, ids[key][j]);
361  }
362  }
363  ndis += list_size;
364  }
365  maxheap_reorder (k, disi, idxi);
366  }
367  indexIVFFlat_stats.nq += nx;
368  indexIVFFlat_stats.nlist += nlistv;
369  indexIVFFlat_stats.ndis += ndis;
370 }
371 
372 
373 void IndexIVFFlat::search (idx_t n, const float *x, idx_t k,
374  float *distances, idx_t *labels) const
375 {
376  idx_t * idx = new idx_t [n * nprobe];
377  ScopeDeleter <idx_t> del (idx);
378  quantizer->assign (n, x, idx, nprobe);
379  search_preassigned (n, x, k, idx, distances, labels);
380 }
381 
382 
383 void IndexIVFFlat::search_preassigned (idx_t n, const float *x, idx_t k,
384  const idx_t *idx,
385  float *distances, idx_t *labels) const
386 {
387  if (metric_type == METRIC_INNER_PRODUCT) {
388  float_minheap_array_t res = {
389  size_t(n), size_t(k), labels, distances};
390  search_knn_inner_product (n, x, idx, &res);
391 
392  } else if (metric_type == METRIC_L2) {
393  float_maxheap_array_t res = {
394  size_t(n), size_t(k), labels, distances};
395  search_knn_L2sqr (n, x, idx, &res);
396  }
397 
398 }
399 
400 
401 void IndexIVFFlat::range_search (idx_t nx, const float *x, float radius,
402  RangeSearchResult *result) const
403 {
404  idx_t * keys = new idx_t [nx * nprobe];
405  ScopeDeleter<idx_t> del (keys);
406  quantizer->assign (nx, x, keys, nprobe);
407 
408 #pragma omp parallel
409  {
410  RangeSearchPartialResult pres(result);
411 
412  for (size_t i = 0; i < nx; i++) {
413  const float * xi = x + i * d;
414  const long * keysi = keys + i * nprobe;
415 
417  pres.new_result (i);
418 
419  for (size_t ik = 0; ik < nprobe; ik++) {
420  long key = keysi[ik]; /* select the list */
421  if (key < 0 || key >= (long) nlist) {
422  fprintf (stderr, "Invalid key=%ld at ik=%ld nlist=%ld\n",
423  key, ik, nlist);
424  throw;
425  }
426 
427  const size_t list_size = ids[key].size();
428  const float * list_vecs = vecs[key].data();
429 
430  for (size_t j = 0; j < list_size; j++) {
431  const float * yj = list_vecs + d * j;
432  if (metric_type == METRIC_L2) {
433  float disij = fvec_L2sqr (xi, yj, d);
434  if (disij < radius) {
435  qres.add (disij, ids[key][j]);
436  }
437  } else if (metric_type == METRIC_INNER_PRODUCT) {
438  float disij = fvec_inner_product(xi, yj, d);
439  if (disij > radius) {
440  qres.add (disij, ids[key][j]);
441  }
442  }
443  }
444  }
445  }
446 
447  pres.finalize ();
448  }
449 }
450 
452 {
453  IndexIVFFlat &other = dynamic_cast<IndexIVFFlat &> (other_in);
454  for (int i = 0; i < nlist; i++) {
455  std::vector<float> & src = other.vecs[i];
456  std::vector<float> & dest = vecs[i];
457  for (int j = 0; j < src.size(); j++)
458  dest.push_back (src[j]);
459  src.clear();
460  }
461 }
462 
463 void IndexIVFFlat::copy_subset_to (IndexIVFFlat & other, int subset_type,
464  long a1, long a2) const
465 {
466  FAISS_THROW_IF_NOT (nlist == other.nlist);
467  FAISS_THROW_IF_NOT (!other.maintain_direct_map);
468 
469  for (long list_no = 0; list_no < nlist; list_no++) {
470  const std::vector<idx_t> & ids_in = ids[list_no];
471  std::vector<idx_t> & ids_out = other.ids[list_no];
472  const std::vector<float> & vecs_in = vecs[list_no];
473  std::vector<float> & vecs_out = other.vecs[list_no];
474 
475  for (long i = 0; i < ids_in.size(); i++) {
476  idx_t id = ids_in[i];
477  if (subset_type == 0 && a1 <= id && id < a2) {
478  ids_out.push_back (id);
479  vecs_out.insert (vecs_out.end(),
480  vecs_in.begin() + i * d,
481  vecs_in.begin() + (i + 1) * d);
482  other.ntotal++;
483  }
484  }
485  }
486 }
487 
488 void IndexIVFFlat::update_vectors (int n, idx_t *new_ids, const float *x)
489 {
490  FAISS_THROW_IF_NOT (maintain_direct_map);
491  FAISS_THROW_IF_NOT (is_trained);
492  std::vector<idx_t> assign (n);
493  quantizer->assign (n, x, assign.data());
494 
495  for (int i = 0; i < n; i++) {
496  idx_t id = new_ids[i];
497  FAISS_THROW_IF_NOT_MSG (0 <= id && id < ntotal,
498  "id to update out of range");
499  { // remove old one
500  long dm = direct_map[id];
501  long ofs = dm & 0xffffffff;
502  long il = dm >> 32;
503  size_t l = ids[il].size();
504  if (ofs != l - 1) {
505  long id2 = ids[il].back();
506  ids[il][ofs] = id2;
507  direct_map[id2] = (il << 32) | ofs;
508  memcpy (vecs[il].data() + ofs * d,
509  vecs[il].data() + (l - 1) * d,
510  d * sizeof(vecs[il][0]));
511  }
512  ids[il].pop_back();
513  vecs[il].resize((l - 1) * d);
514  }
515  { // insert new one
516  long il = assign[i];
517  size_t l = ids[il].size();
518  long dm = (il << 32) | l;
519  direct_map[id] = dm;
520  ids[il].push_back (id);
521  vecs[il].resize((l + 1) * d);
522  memcpy (vecs[il].data() + l * d,
523  x + i * d,
524  d * sizeof(vecs[il][0]));
525  }
526  }
527 
528 }
529 
530 
531 
532 
534 {
535  IndexIVF::reset();
536  for (size_t key = 0; key < nlist; key++) {
537  vecs[key].clear();
538  }
539 }
540 
542 {
543  FAISS_THROW_IF_NOT_MSG (!maintain_direct_map,
544  "direct map remove not implemented");
545  long nremove = 0;
546 #pragma omp parallel for reduction(+: nremove)
547  for (long i = 0; i < nlist; i++) {
548  std::vector<idx_t> & idsi = ids[i];
549  float *vecsi = vecs[i].data();
550 
551  long l = idsi.size(), j = 0;
552  while (j < l) {
553  if (sel.is_member (idsi[j])) {
554  l--;
555  idsi [j] = idsi [l];
556  memmove (vecsi + j * d,
557  vecsi + l * d, d * sizeof (float));
558  } else {
559  j++;
560  }
561  }
562  if (l < idsi.size()) {
563  nremove += idsi.size() - l;
564  idsi.resize (l);
565  vecs[i].resize (l * d);
566  }
567  }
568  ntotal -= nremove;
569  return nremove;
570 }
571 
572 
573 void IndexIVFFlat::reconstruct (idx_t key, float * recons) const
574 {
575  FAISS_THROW_IF_NOT_MSG (direct_map.size() == ntotal,
576  "direct map is not initialized");
577  int list_no = direct_map[key] >> 32;
578  int ofs = direct_map[key] & 0xffffffff;
579  memcpy (recons, &vecs[list_no][ofs * d], d * sizeof(recons[0]));
580 }
581 
582 
583 
584 
585 /*****************************************
586  * IndexIVFFlatIPBounds implementation
587  ******************************************/
588 
589 IndexIVFFlatIPBounds::IndexIVFFlatIPBounds (
590  Index * quantizer, size_t d, size_t nlist,
591  size_t fsize):
592  IndexIVFFlat(quantizer, d, nlist, METRIC_INNER_PRODUCT), fsize(fsize)
593 {
594  part_norms.resize(nlist);
595 }
596 
597 
598 
599 void IndexIVFFlatIPBounds::add_core (idx_t n, const float * x, const long *xids,
600  const long *precomputed_idx) {
601 
602  FAISS_THROW_IF_NOT (is_trained);
603  const long * idx;
604  ScopeDeleter<long> del;
605 
606  if (precomputed_idx) {
607  idx = precomputed_idx;
608  } else {
609  long * idx0 = new long [n];
610  quantizer->assign (n, x, idx0);
611  idx = idx0;
612  del.set (idx);
613  }
614  IndexIVFFlat::add_core(n, x, xids, idx);
615 
616  // compute
617  const float * xi = x + fsize;
618  for (size_t i = 0; i < n; i++) {
619  float norm = std::sqrt (fvec_norm_L2sqr (xi, d - fsize));
620  part_norms[idx[i]].push_back(norm);
621  xi += d;
622  }
623 
624 
625 }
626 
627 namespace {
628 
629 void search_bounds_knn_inner_product (
630  const IndexIVFFlatIPBounds & ivf,
631  const float *x,
632  const long *keys,
634  const float *qnorms)
635 {
636 
637  size_t k = res->k, nx = res->nh, nprobe = ivf.nprobe;
638  size_t d = ivf.d;
639  int fsize = ivf.fsize;
640 
641  size_t nlistv = 0, ndis = 0, npartial = 0;
642 
643 #pragma omp parallel for reduction(+: nlistv, ndis, npartial)
644  for (size_t i = 0; i < nx; i++) {
645  const float * xi = x + i * d;
646  const long * keysi = keys + i * nprobe;
647  float qnorm = qnorms[i];
648  float * __restrict simi = res->get_val (i);
649  long * __restrict idxi = res->get_ids (i);
650  minheap_heapify (k, simi, idxi);
651 
652  for (size_t ik = 0; ik < nprobe; ik++) {
653  long key = keysi[ik]; /* select the list */
654  if (key < 0) {
655  // not enough centroids for multiprobe
656  continue;
657  }
658  assert (key < (long) ivf.nlist);
659  nlistv++;
660 
661  const size_t list_size = ivf.ids[key].size();
662  const float * yj = ivf.vecs[key].data();
663  const float * bnorms = ivf.part_norms[key].data();
664 
665  for (size_t j = 0; j < list_size; j++) {
666  float ip_part = fvec_inner_product (xi, yj, fsize);
667  float bound = ip_part + bnorms[j] * qnorm;
668 
669  if (bound > simi[0]) {
670  float ip = ip_part + fvec_inner_product (
671  xi + fsize, yj + fsize, d - fsize);
672  if (ip > simi[0]) {
673  minheap_pop (k, simi, idxi);
674  minheap_push (k, simi, idxi, ip, ivf.ids[key][j]);
675  }
676  ndis ++;
677  }
678  yj += d;
679  }
680  npartial += list_size;
681  }
682  minheap_reorder (k, simi, idxi);
683  }
684  indexIVFFlat_stats.nq += nx;
685  indexIVFFlat_stats.nlist += nlistv;
686  indexIVFFlat_stats.ndis += ndis;
687  indexIVFFlat_stats.npartial += npartial;
688 }
689 
690 
691 }
692 
693 
695  idx_t n, const float *x, idx_t k,
696  float *distances, idx_t *labels) const
697 {
698  // compute query remainder norms and distances
699  idx_t * idx = new idx_t [n * nprobe];
700  ScopeDeleter<idx_t> del (idx);
701  quantizer->assign (n, x, idx, nprobe);
702 
703  float * qnorms = new float [n];
704  ScopeDeleter <float> del2 (qnorms);
705 
706 #pragma omp parallel for
707  for (size_t i = 0; i < n; i++) {
708  qnorms[i] = std::sqrt (fvec_norm_L2sqr (
709  x + i * d + fsize, d - fsize));
710  }
711 
712  float_minheap_array_t res = {
713  size_t(n), size_t(k), labels, distances};
714 
715  search_bounds_knn_inner_product (*this, x, idx, &res, qnorms);
716 
717 }
718 
719 } // namespace faiss
void search_preassigned(idx_t n, const float *x, idx_t k, const idx_t *assign, float *distances, idx_t *labels) const
perform search, without computing the assignment to the quantizer
Definition: IndexIVF.cpp:383
int niter
clustering iterations
Definition: Clustering.h:25
result structure for a single query
float fvec_L2sqr(const float *x, const float *y, size_t d)
Squared L2 distance between two vectors.
Definition: utils.cpp:481
void search_knn_L2sqr(size_t nx, const float *x, const long *keys, float_maxheap_array_t *res) const
Implementation of the search for the L2 metric.
Definition: IndexIVF.cpp:323
T * get_val(size_t key)
Return the list of values for a heap.
Definition: Heap.h:360
double imbalance_factor() const
1= perfectly balanced, &gt;1: imbalanced
Definition: IndexIVF.cpp:140
virtual void reset()=0
removes all elements from the database.
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:47
void reconstruct(idx_t key, float *recons) const override
Definition: IndexIVF.cpp:573
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:23
bool quantizer_trains_alone
just pass over the trainset to quantizer
Definition: IndexIVF.h:50
void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const override
Definition: IndexIVF.cpp:401
void copy_subset_to(IndexIVFFlat &other, int subset_type, long a1, long a2) const
Definition: IndexIVF.cpp:463
void merge_from_residuals(IndexIVF &other) override
Definition: IndexIVF.cpp:451
virtual void add_with_ids(idx_t n, const float *x, const long *xids)
Definition: Index.cpp:31
virtual void train_residual(idx_t n, const float *x)
Definition: IndexIVF.cpp:131
size_t k
allocated size per heap
Definition: Heap.h:355
double imbalance_factor(int n, int k, const long *assign)
a balanced assignment has a IF of 1
Definition: utils.cpp:1593
long remove_ids(const IDSelector &sel) override
Definition: IndexIVF.cpp:541
std::vector< std::vector< long > > ids
Inverted lists for indexes.
Definition: IndexIVF.h:55
int d
vector dimension
Definition: Index.h:64
Index * quantizer
quantizer that maps vectors to inverted lists
Definition: IndexIVF.h:49
void train(idx_t n, const float *x) override
Trains the quantizer and calls train_residual to train sub-quantizers.
Definition: IndexIVF.cpp:102
ClusteringParameters cp
to override default clustering params
Definition: IndexIVF.h:53
void add_with_ids(idx_t n, const float *x, const long *xids) override
implemented for all IndexIVF* classes
Definition: IndexIVF.cpp:216
bool own_fields
whether object owns the quantizer
Definition: IndexIVF.h:51
long idx_t
all indices are this type
Definition: Index.h:62
void reset() override
removes all elements from the database.
Definition: IndexIVF.cpp:533
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:65
bool verbose
verbosity level
Definition: Index.h:66
void reset() override
removes all elements from the database.
Definition: IndexIVF.cpp:93
QueryResult & new_result(idx_t qno)
begin a new result
void update_vectors(int nv, idx_t *idx, const float *v)
Definition: IndexIVF.cpp:488
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexIVF.cpp:694
std::vector< std::vector< float > > part_norms
norm of remainder (dimensions fsize:d)
Definition: IndexIVF.h:213
float fvec_norm_L2sqr(const float *x, size_t d)
Definition: utils.cpp:538
size_t fsize
nb of dimensions of pre-filter
Definition: IndexIVF.h:210
the entries in the buffers are split per query
virtual void merge_from_residuals(IndexIVF &other)=0
void make_direct_map(bool new_maintain_direct_map=true)
Definition: IndexIVF.cpp:68
TI * get_ids(size_t key)
Correspponding identifiers.
Definition: Heap.h:363
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:72
void print_stats() const
display some stats about the inverted lists
Definition: IndexIVF.cpp:149
void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx) override
same as add_with_ids, with precomputed coarse quantizer
Definition: IndexIVF.cpp:599
size_t nh
number of heaps
Definition: Heap.h:354
size_t nlist
number of possible key values
Definition: IndexIVF.h:46
void add(idx_t n, const float *x) override
Quantizes x and calls add_with_key.
Definition: IndexIVF.cpp:63
virtual void train(idx_t n, const float *x, faiss::Index &index)
Index is used during the assignment stage.
Definition: Clustering.cpp:66
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:69
void search_knn_inner_product(size_t nx, const float *x, const long *keys, float_minheap_array_t *res) const
Implementation of the search for the inner product metric.
Definition: IndexIVF.cpp:272
virtual void train(idx_t n, const float *x)
Definition: Index.h:89
bool maintain_direct_map
map for direct access to the elements. Enables reconstruct().
Definition: IndexIVF.h:58
bool spherical
do we want normalized centroids?
Definition: Clustering.h:29
virtual void merge_from(IndexIVF &other, idx_t add_id)
Definition: IndexIVF.cpp:169
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:43
std::vector< std::vector< float > > vecs
Definition: IndexIVF.h:135
virtual void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx)
same as add_with_ids, with precomputed coarse quantizer
Definition: IndexIVF.cpp:221
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexIVF.cpp:373