Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/IndexIVF.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 /* Copyright 2004-present Facebook. All Rights Reserved.
10  Inverted list structure.
11 */
12 
13 #include "IndexIVF.h"
14 
15 #include <cstdio>
16 
17 #include "utils.h"
18 #include "hamming.h"
19 
20 #include "FaissAssert.h"
21 #include "IndexFlat.h"
22 #include "AuxIndexStructures.h"
23 
24 namespace faiss {
25 
26 /*****************************************
27  * IndexIVF implementation
28  ******************************************/
29 
30 
31 IndexIVF::IndexIVF (Index * quantizer, size_t d, size_t nlist,
32  MetricType metric):
33  Index (d, metric),
34  nlist (nlist),
35  nprobe (1),
36  quantizer (quantizer),
37  quantizer_trains_alone (0),
38  own_fields (false),
39  clustering_index (nullptr),
40  ids (nlist),
41  maintain_direct_map (false)
42 {
43  FAISS_THROW_IF_NOT (d == quantizer->d);
44  is_trained = quantizer->is_trained && (quantizer->ntotal == nlist);
45  // Spherical by default if the metric is inner_product
46  if (metric_type == METRIC_INNER_PRODUCT) {
47  cp.spherical = true;
48  }
49  // here we set a low # iterations because this is typically used
50  // for large clusterings (nb this is not used for the MultiIndex,
51  // for which quantizer_trains_alone = true)
52  cp.niter = 10;
53  cp.verbose = verbose;
54  code_size = 0; // let sub-classes set this
55  codes.resize(nlist);
56 }
57 
58 IndexIVF::IndexIVF ():
59  nlist (0), nprobe (1), quantizer (nullptr),
60  quantizer_trains_alone (0), own_fields (false),
61  clustering_index (nullptr),
62  maintain_direct_map (false)
63 {}
64 
65 
66 void IndexIVF::add (idx_t n, const float * x)
67 {
68  add_with_ids (n, x, nullptr);
69 }
70 
71 void IndexIVF::make_direct_map (bool new_maintain_direct_map)
72 {
73  // nothing to do
74  if (new_maintain_direct_map == maintain_direct_map)
75  return;
76 
77  if (new_maintain_direct_map) {
78  direct_map.resize (ntotal, -1);
79  for (size_t key = 0; key < nlist; key++) {
80  const std::vector<long> & idlist = ids[key];
81 
82  for (long ofs = 0; ofs < idlist.size(); ofs++) {
83  FAISS_THROW_IF_NOT_MSG (
84  0 <= idlist [ofs] && idlist[ofs] < ntotal,
85  "direct map supported only for seuquential ids");
86  direct_map [idlist [ofs]] = key << 32 | ofs;
87  }
88  }
89  } else {
90  direct_map.clear ();
91  }
92  maintain_direct_map = new_maintain_direct_map;
93 }
94 
95 
96 void IndexIVF::search (idx_t n, const float *x, idx_t k,
97  float *distances, idx_t *labels) const
98 {
99  long * idx = new long [n * nprobe];
100  ScopeDeleter<long> del (idx);
101  float * coarse_dis = new float [n * nprobe];
102  ScopeDeleter<float> del2 (coarse_dis);
103 
104  quantizer->search (n, x, nprobe, coarse_dis, idx);
105 
106  search_preassigned (n, x, k, idx, coarse_dis,
107  distances, labels, false);
108 
109 }
110 
111 
113 {
114  ntotal = 0;
115  direct_map.clear();
116  for (size_t i = 0; i < ids.size(); i++) {
117  ids[i].clear();
118  codes[i].clear();
119  }
120 }
121 
122 
124 {
125  FAISS_THROW_IF_NOT_MSG (!maintain_direct_map,
126  "direct map remove not implemented");
127  long nremove = 0;
128 #pragma omp parallel for reduction(+: nremove)
129  for (long i = 0; i < nlist; i++) {
130  std::vector<idx_t> & idsi = ids[i];
131  uint8_t * codesi = codes[i].data();
132 
133  long l = idsi.size(), j = 0;
134  while (j < l) {
135  if (sel.is_member (idsi[j])) {
136  l--;
137  idsi [j] = idsi [l];
138  memmove (codesi + j * code_size,
139  codesi + l * code_size, code_size);
140  } else {
141  j++;
142  }
143  }
144  if (l < idsi.size()) {
145  nremove += idsi.size() - l;
146  idsi.resize (l);
147  codes[i].resize (l * code_size);
148  }
149  }
150  ntotal -= nremove;
151  return nremove;
152 }
153 
154 
155 
156 
157 void IndexIVF::train (idx_t n, const float *x)
158 {
159  if (quantizer->is_trained && (quantizer->ntotal == nlist)) {
160  if (verbose)
161  printf ("IVF quantizer does not need training.\n");
162  } else if (quantizer_trains_alone == 1) {
163  if (verbose)
164  printf ("IVF quantizer trains alone...\n");
165  quantizer->train (n, x);
167  FAISS_THROW_IF_NOT_MSG (quantizer->ntotal == nlist,
168  "nlist not consistent with quantizer size");
169  } else if (quantizer_trains_alone == 0) {
170  if (verbose)
171  printf ("Training IVF quantizer on %ld vectors in %dD\n",
172  n, d);
173 
174  Clustering clus (d, nlist, cp);
175  quantizer->reset();
176  if (clustering_index) {
177  clus.train (n, x, *clustering_index);
178  quantizer->add (nlist, clus.centroids.data());
179  } else {
180  clus.train (n, x, *quantizer);
181  }
182  quantizer->is_trained = true;
183  } else if (quantizer_trains_alone == 2) {
184  if (verbose)
185  printf (
186  "Training L2 quantizer on %ld vectors in %dD%s\n",
187  n, d,
188  clustering_index ? "(user provided index)" : "");
189  FAISS_THROW_IF_NOT (metric_type == METRIC_L2);
190  Clustering clus (d, nlist, cp);
191  if (!clustering_index) {
192  IndexFlatL2 assigner (d);
193  clus.train(n, x, assigner);
194  } else {
195  clus.train(n, x, *clustering_index);
196  }
197  if (verbose)
198  printf ("Adding centroids to quantizer\n");
199  quantizer->add (nlist, clus.centroids.data());
200  }
201  if (verbose)
202  printf ("Training IVF residual\n");
203 
204  train_residual (n, x);
205  is_trained = true;
206 }
207 
208 void IndexIVF::train_residual(idx_t /*n*/, const float* /*x*/) {
209  if (verbose)
210  printf("IndexIVF: no residual training\n");
211  // does nothing by default
212 }
213 
214 
215 
217 {
218  std::vector<int> hist (nlist);
219  for (int i = 0; i < nlist; i++) {
220  hist[i] = ids[i].size();
221  }
222  return faiss::imbalance_factor (nlist, hist.data());
223 }
224 
226 {
227  std::vector<int> sizes(40);
228  for (int i = 0; i < nlist; i++) {
229  for (int j = 0; j < sizes.size(); j++) {
230  if ((ids[i].size() >> j) == 0) {
231  sizes[j]++;
232  break;
233  }
234  }
235  }
236  for (int i = 0; i < sizes.size(); i++) {
237  if (sizes[i]) {
238  printf ("list size in < %d: %d instances\n",
239  1 << i, sizes[i]);
240  }
241  }
242 
243 }
244 
245 void IndexIVF::merge_from (IndexIVF &other, idx_t add_id)
246 {
247  // minimal sanity checks
248  FAISS_THROW_IF_NOT (other.d == d);
249  FAISS_THROW_IF_NOT (other.nlist == nlist);
250  FAISS_THROW_IF_NOT_MSG ((!maintain_direct_map &&
251  !other.maintain_direct_map),
252  "direct map copy not implemented");
253  FAISS_THROW_IF_NOT_MSG (typeid (*this) == typeid (other),
254  "can only merge indexes of the same type");
255  for (long i = 0; i < nlist; i++) {
256  std::vector<idx_t> & src = other.ids[i];
257  std::vector<idx_t> & dest = ids[i];
258  for (long j = 0; j < src.size(); j++)
259  dest.push_back (src[j] + add_id);
260  src.clear();
261  codes[i].insert (codes[i].end(),
262  other.codes[i].begin(),
263  other.codes[i].end());
264  other.codes[i].clear();
265  }
266 
267  ntotal += other.ntotal;
268  other.ntotal = 0;
269 }
270 
271 
272 void IndexIVF::copy_subset_to (IndexIVF & other, int subset_type,
273  long a1, long a2) const
274 {
275  FAISS_THROW_IF_NOT (nlist == other.nlist);
276  FAISS_THROW_IF_NOT (!other.maintain_direct_map);
277  FAISS_THROW_IF_NOT_FMT (
278  subset_type == 0 || subset_type == 1 || subset_type == 2,
279  "subset type %d not implemented", subset_type);
280 
281  size_t accu_n = 0;
282  size_t accu_a1 = 0;
283  size_t accu_a2 = 0;
284 
285  for (long list_no = 0; list_no < nlist; list_no++) {
286  const std::vector<idx_t> & ids_in = ids[list_no];
287  std::vector<idx_t> & ids_out = other.ids[list_no];
288  const std::vector<uint8_t> & codes_in = codes[list_no];
289  std::vector<uint8_t> & codes_out = other.codes[list_no];
290  size_t n = ids_in.size();
291 
292  if (subset_type == 0) {
293  for (long i = 0; i < n; i++) {
294  idx_t id = ids_in[i];
295  if (a1 <= id && id < a2) {
296  ids_out.push_back (id);
297  codes_out.insert (codes_out.end(),
298  codes_in.begin() + i * code_size,
299  codes_in.begin() + (i + 1) * code_size);
300  other.ntotal++;
301  }
302  }
303  } else if (subset_type == 1) {
304  for (long i = 0; i < n; i++) {
305  idx_t id = ids_in[i];
306  if (id % a1 == a2) {
307  ids_out.push_back (id);
308  codes_out.insert (codes_out.end(),
309  codes_in.begin() + i * code_size,
310  codes_in.begin() + (i + 1) * code_size);
311  other.ntotal++;
312  }
313  }
314  } else if (subset_type == 2) {
315  // see what is allocated to a1 and to a2
316  size_t next_accu_n = accu_n + n;
317  size_t next_accu_a1 = next_accu_n * a1 / ntotal;
318  size_t i1 = next_accu_a1 - accu_a1;
319  size_t next_accu_a2 = next_accu_n * a2 / ntotal;
320  size_t i2 = next_accu_a2 - accu_a2;
321  ids_out.insert(ids_out.end(),
322  ids_in.begin() + i1,
323  ids_in.begin() + i2);
324  codes_out.insert (codes_out.end(),
325  codes_in.begin() + i1 * code_size,
326  codes_in.begin() + i2 * code_size);
327  other.ntotal += i2 - i1;
328  accu_a1 = next_accu_a1;
329  accu_a2 = next_accu_a2;
330  }
331  accu_n += n;
332  }
333  FAISS_ASSERT(accu_n == ntotal);
334 }
335 
336 
337 
338 IndexIVF::~IndexIVF()
339 {
340  if (own_fields) delete quantizer;
341 }
342 
343 
344 
345 /*****************************************
346  * IndexIVFFlat implementation
347  ******************************************/
348 
349 IndexIVFFlat::IndexIVFFlat (Index * quantizer,
350  size_t d, size_t nlist, MetricType metric):
351  IndexIVF (quantizer, d, nlist, metric)
352 {
353  code_size = sizeof(float) * d;
354 }
355 
356 
357 
358 
359 
360 
361 void IndexIVFFlat::add_with_ids (idx_t n, const float * x, const long *xids)
362 {
363  add_core (n, x, xids, nullptr);
364 }
365 
366 void IndexIVFFlat::add_core (idx_t n, const float * x, const long *xids,
367  const long *precomputed_idx)
368 
369 {
370  FAISS_THROW_IF_NOT (is_trained);
371  FAISS_THROW_IF_NOT_MSG (!(maintain_direct_map && xids),
372  "cannot have direct map and add with ids");
373  const long * idx;
374  ScopeDeleter<long> del;
375 
376  if (precomputed_idx) {
377  idx = precomputed_idx;
378  } else {
379  long * idx0 = new long [n];
380  quantizer->assign (n, x, idx0);
381  idx = idx0;
382  del.set (idx);
383  }
384  long n_add = 0;
385  for (size_t i = 0; i < n; i++) {
386  long id = xids ? xids[i] : ntotal + i;
387  long list_no = idx [i];
388  if (list_no < 0)
389  continue;
390  assert (list_no < nlist);
391 
392  ids[list_no].push_back (id);
393  const float *xi = x + i * d;
394  /* store the vectors */
395  size_t ofs = codes[list_no].size();
396  codes[list_no].resize(ofs + code_size);
397  memcpy(codes[list_no].data() + ofs,
398  xi, code_size);
399 
401  direct_map.push_back (list_no << 32 | (ids[list_no].size() - 1));
402  n_add++;
403  }
404  if (verbose) {
405  printf("IndexIVFFlat::add_core: added %ld / %ld vectors\n",
406  n_add, n);
407  }
408  ntotal += n_add;
409 }
410 
411 void IndexIVFFlatStats::reset()
412 {
413  memset ((void*)this, 0, sizeof (*this));
414 }
415 
416 
417 IndexIVFFlatStats indexIVFFlat_stats;
418 
419 namespace {
420 
421 void search_knn_inner_product (const IndexIVFFlat & ivf,
422  size_t nx,
423  const float * x,
424  const long * keys,
425  float_minheap_array_t * res,
426  bool store_pairs)
427 {
428 
429  const size_t k = res->k;
430  size_t nlistv = 0, ndis = 0;
431  size_t d = ivf.d;
432 
433 #pragma omp parallel for reduction(+: nlistv, ndis)
434  for (size_t i = 0; i < nx; i++) {
435  const float * xi = x + i * d;
436  const long * keysi = keys + i * ivf.nprobe;
437  float * __restrict simi = res->get_val (i);
438  long * __restrict idxi = res->get_ids (i);
439  minheap_heapify (k, simi, idxi);
440 
441  for (size_t ik = 0; ik < ivf.nprobe; ik++) {
442  long key = keysi[ik]; /* select the list */
443  if (key < 0) {
444  // not enough centroids for multiprobe
445  continue;
446  }
447  FAISS_THROW_IF_NOT_FMT (
448  key < (long) ivf.nlist,
449  "Invalid key=%ld at ik=%ld nlist=%ld\n",
450  key, ik, ivf.nlist);
451 
452  nlistv++;
453  const size_t list_size = ivf.ids[key].size();
454  const float * list_vecs = (const float*)(ivf.codes[key].data());
455 
456  for (size_t j = 0; j < list_size; j++) {
457  const float * yj = list_vecs + d * j;
458  float ip = fvec_inner_product (xi, yj, d);
459  if (ip > simi[0]) {
460  minheap_pop (k, simi, idxi);
461  long id = store_pairs ? (key << 32 | j) : ivf.ids[key][j];
462  minheap_push (k, simi, idxi, ip, id);
463  }
464  }
465  ndis += list_size;
466  }
467  minheap_reorder (k, simi, idxi);
468  }
469  indexIVFFlat_stats.nq += nx;
470  indexIVFFlat_stats.nlist += nlistv;
471  indexIVFFlat_stats.ndis += ndis;
472 }
473 
474 
475 void search_knn_L2sqr (const IndexIVFFlat &ivf,
476  size_t nx,
477  const float * x,
478  const long * keys,
479  float_maxheap_array_t * res,
480  bool store_pairs)
481 {
482  const size_t k = res->k;
483  size_t nlistv = 0, ndis = 0;
484  size_t d = ivf.d;
485 #pragma omp parallel for reduction(+: nlistv, ndis)
486  for (size_t i = 0; i < nx; i++) {
487  const float * xi = x + i * d;
488  const long * keysi = keys + i * ivf.nprobe;
489  float * __restrict disi = res->get_val (i);
490  long * __restrict idxi = res->get_ids (i);
491  maxheap_heapify (k, disi, idxi);
492 
493  for (size_t ik = 0; ik < ivf.nprobe; ik++) {
494  long key = keysi[ik]; /* select the list */
495  if (key < 0) {
496  // not enough centroids for multiprobe
497  continue;
498  }
499  FAISS_THROW_IF_NOT_FMT (
500  key < (long) ivf.nlist,
501  "Invalid key=%ld at ik=%ld nlist=%ld\n",
502  key, ik, ivf.nlist);
503 
504  nlistv++;
505  const size_t list_size = ivf.ids[key].size();
506  const float * list_vecs = (const float*)(ivf.codes[key].data());
507 
508  for (size_t j = 0; j < list_size; j++) {
509  const float * yj = list_vecs + d * j;
510  float disij = fvec_L2sqr (xi, yj, d);
511  if (disij < disi[0]) {
512  maxheap_pop (k, disi, idxi);
513  long id = store_pairs ? (key << 32 | j) : ivf.ids[key][j];
514  maxheap_push (k, disi, idxi, disij, id);
515  }
516  }
517  ndis += list_size;
518  }
519  maxheap_reorder (k, disi, idxi);
520  }
521  indexIVFFlat_stats.nq += nx;
522  indexIVFFlat_stats.nlist += nlistv;
523  indexIVFFlat_stats.ndis += ndis;
524 }
525 
526 
527 } // anonymous namespace
528 
529 void IndexIVFFlat::search_preassigned (idx_t n, const float *x, idx_t k,
530  const idx_t *idx,
531  const float * /* coarse_dis */,
532  float *distances, idx_t *labels,
533  bool store_pairs) const
534 {
535  if (metric_type == METRIC_INNER_PRODUCT) {
536  float_minheap_array_t res = {
537  size_t(n), size_t(k), labels, distances};
538  search_knn_inner_product (*this, n, x, idx, &res, store_pairs);
539 
540  } else if (metric_type == METRIC_L2) {
541  float_maxheap_array_t res = {
542  size_t(n), size_t(k), labels, distances};
543  search_knn_L2sqr (*this, n, x, idx, &res, store_pairs);
544  }
545 }
546 
547 
548 void IndexIVFFlat::range_search (idx_t nx, const float *x, float radius,
549  RangeSearchResult *result) const
550 {
551  idx_t * keys = new idx_t [nx * nprobe];
552  ScopeDeleter<idx_t> del (keys);
553  quantizer->assign (nx, x, keys, nprobe);
554 
555 #pragma omp parallel
556  {
557  RangeSearchPartialResult pres(result);
558 
559  for (size_t i = 0; i < nx; i++) {
560  const float * xi = x + i * d;
561  const long * keysi = keys + i * nprobe;
562 
564  pres.new_result (i);
565 
566  for (size_t ik = 0; ik < nprobe; ik++) {
567  long key = keysi[ik]; /* select the list */
568  if (key < 0 || key >= (long) nlist) {
569  fprintf (stderr, "Invalid key=%ld at ik=%ld nlist=%ld\n",
570  key, ik, nlist);
571  throw;
572  }
573 
574  const size_t list_size = ids[key].size();
575  const float * list_vecs = (const float *)(codes[key].data());
576 
577  for (size_t j = 0; j < list_size; j++) {
578  const float * yj = list_vecs + d * j;
579  if (metric_type == METRIC_L2) {
580  float disij = fvec_L2sqr (xi, yj, d);
581  if (disij < radius) {
582  qres.add (disij, ids[key][j]);
583  }
584  } else if (metric_type == METRIC_INNER_PRODUCT) {
585  float disij = fvec_inner_product(xi, yj, d);
586  if (disij > radius) {
587  qres.add (disij, ids[key][j]);
588  }
589  }
590  }
591  }
592  }
593 
594  pres.finalize ();
595  }
596 }
597 
598 void IndexIVFFlat::update_vectors (int n, idx_t *new_ids, const float *x)
599 {
600  FAISS_THROW_IF_NOT (maintain_direct_map);
601  FAISS_THROW_IF_NOT (is_trained);
602  std::vector<idx_t> assign (n);
603  quantizer->assign (n, x, assign.data());
604 
605  for (int i = 0; i < n; i++) {
606  idx_t id = new_ids[i];
607  FAISS_THROW_IF_NOT_MSG (0 <= id && id < ntotal,
608  "id to update out of range");
609  { // remove old one
610  long dm = direct_map[id];
611  long ofs = dm & 0xffffffff;
612  long il = dm >> 32;
613  size_t l = ids[il].size();
614  if (ofs != l - 1) {
615  long id2 = ids[il].back();
616  ids[il][ofs] = id2;
617  direct_map[id2] = (il << 32) | ofs;
618  float * vecs = (float*)codes[il].data();
619  memcpy (vecs + ofs * d,
620  vecs + (l - 1) * d,
621  d * sizeof(float));
622  }
623  ids[il].pop_back();
624  codes[il].resize((l - 1) * code_size);
625  }
626  { // insert new one
627  long il = assign[i];
628  size_t l = ids[il].size();
629  long dm = (il << 32) | l;
630  direct_map[id] = dm;
631  ids[il].push_back (id);
632  codes[il].resize((l + 1) * code_size);
633  float * vecs = (float*)codes[il].data();
634  memcpy (vecs + l * d,
635  x + i * d,
636  d * sizeof(float));
637  }
638  }
639 
640 }
641 
642 
643 
644 
645 
646 void IndexIVFFlat::reconstruct (idx_t key, float * recons) const
647 {
648  FAISS_THROW_IF_NOT_MSG (direct_map.size() == ntotal,
649  "direct map is not initialized");
650  int list_no = direct_map[key] >> 32;
651  int ofs = direct_map[key] & 0xffffffff;
652  memcpy (recons, &codes[list_no][ofs * code_size], d * sizeof(recons[0]));
653 }
654 
655 
656 
657 
658 } // namespace faiss
virtual void search_preassigned(idx_t n, const float *x, idx_t k, const idx_t *assign, const float *centroid_dis, float *distances, idx_t *labels, bool store_pairs) const =0
int niter
clustering iterations
Definition: Clustering.h:25
result structure for a single query
float fvec_L2sqr(const float *x, const float *y, size_t d)
Squared L2 distance between two vectors.
Definition: utils.cpp:574
double imbalance_factor() const
1= perfectly balanced, &gt;1: imbalanced
Definition: IndexIVF.cpp:216
virtual void reset()=0
removes all elements from the database.
virtual void copy_subset_to(IndexIVF &other, int subset_type, long a1, long a2) const
Definition: IndexIVF.cpp:272
char quantizer_trains_alone
Definition: IndexIVF.h:56
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:47
virtual void train(idx_t, const float *)
Definition: Index.h:89
void reconstruct(idx_t key, float *recons) const override
Definition: IndexIVF.cpp:646
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:23
void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const override
Definition: IndexIVF.cpp:548
virtual void add_with_ids(idx_t n, const float *x, const long *xids)
Definition: Index.cpp:30
virtual void train_residual(idx_t n, const float *x)
Definition: IndexIVF.cpp:208
double imbalance_factor(int n, int k, const long *assign)
a balanced assignment has a IF of 1
Definition: utils.cpp:1625
std::vector< std::vector< long > > ids
Inverted lists for indexes.
Definition: IndexIVF.h:62
int d
vector dimension
Definition: Index.h:64
Index * quantizer
quantizer that maps vectors to inverted lists
Definition: IndexIVF.h:49
void train(idx_t n, const float *x) override
Trains the quantizer and calls train_residual to train sub-quantizers.
Definition: IndexIVF.cpp:157
ClusteringParameters cp
to override default clustering params
Definition: IndexIVF.h:59
void add_with_ids(idx_t n, const float *x, const long *xids) override
implemented for all IndexIVF* classes
Definition: IndexIVF.cpp:361
Index * clustering_index
to override index used during clustering
Definition: IndexIVF.h:60
virtual void add(idx_t n, const float *x)=0
bool own_fields
whether object owns the quantizer
Definition: IndexIVF.h:57
long idx_t
all indices are this type
Definition: Index.h:62
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:65
bool verbose
verbosity level
Definition: Index.h:66
void reset() override
removes all elements from the database.
Definition: IndexIVF.cpp:112
std::vector< float > centroids
centroids (k * d)
Definition: Clustering.h:63
QueryResult & new_result(idx_t qno)
begin a new result
void update_vectors(int nv, idx_t *idx, const float *v)
Definition: IndexIVF.cpp:598
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
the entries in the buffers are split per query
void make_direct_map(bool new_maintain_direct_map=true)
Definition: IndexIVF.cpp:71
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:72
void print_stats() const
display some stats about the inverted lists
Definition: IndexIVF.cpp:225
size_t nlist
number of possible key values
Definition: IndexIVF.h:46
void add(idx_t n, const float *x) override
Quantizes x and calls add_with_key.
Definition: IndexIVF.cpp:66
virtual void train(idx_t n, const float *x, faiss::Index &index)
Index is used during the assignment stage.
Definition: Clustering.cpp:67
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:69
long remove_ids(const IDSelector &sel) override
Dataset manipulation functions.
Definition: IndexIVF.cpp:123
bool maintain_direct_map
map for direct access to the elements. Enables reconstruct().
Definition: IndexIVF.h:68
bool spherical
do we want normalized centroids?
Definition: Clustering.h:29
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexIVF.cpp:96
void search_preassigned(idx_t n, const float *x, idx_t k, const idx_t *assign, const float *centroid_dis, float *distances, idx_t *labels, bool store_pairs) const override
Definition: IndexIVF.cpp:529
virtual void merge_from(IndexIVF &other, idx_t add_id)
Definition: IndexIVF.cpp:245
size_t code_size
code size per vector in bytes
Definition: IndexIVF.h:64
MetricType
Some algorithms support both an inner product version and a L2 search version.
Definition: Index.h:43
virtual void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx)
same as add_with_ids, with precomputed coarse quantizer
Definition: IndexIVF.cpp:366