Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/hoss/faiss/IndexIVF.cpp
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 // -*- c++ -*-
9 
10 #include "IndexIVF.h"
11 
12 
13 #include <omp.h>
14 
15 #include <cstdio>
16 #include <memory>
17 
18 #include "utils.h"
19 #include "hamming.h"
20 
21 #include "FaissAssert.h"
22 #include "IndexFlat.h"
23 #include "AuxIndexStructures.h"
24 
25 namespace faiss {
26 
27 using ScopedIds = InvertedLists::ScopedIds;
28 using ScopedCodes = InvertedLists::ScopedCodes;
29 
30 /*****************************************
31  * Level1Quantizer implementation
32  ******************************************/
33 
34 
35 Level1Quantizer::Level1Quantizer (Index * quantizer, size_t nlist):
36  quantizer (quantizer),
37  nlist (nlist),
38  quantizer_trains_alone (0),
39  own_fields (false),
40  clustering_index (nullptr)
41 {
42  // here we set a low # iterations because this is typically used
43  // for large clusterings (nb this is not used for the MultiIndex,
44  // for which quantizer_trains_alone = true)
45  cp.niter = 10;
46 }
47 
48 Level1Quantizer::Level1Quantizer ():
49  quantizer (nullptr),
50  nlist (0),
51  quantizer_trains_alone (0), own_fields (false),
52  clustering_index (nullptr)
53 {}
54 
55 Level1Quantizer::~Level1Quantizer ()
56 {
57  if (own_fields) delete quantizer;
58 }
59 
60 void Level1Quantizer::train_q1 (size_t n, const float *x, bool verbose, MetricType metric_type)
61 {
62  size_t d = quantizer->d;
63  if (quantizer->is_trained && (quantizer->ntotal == nlist)) {
64  if (verbose)
65  printf ("IVF quantizer does not need training.\n");
66  } else if (quantizer_trains_alone == 1) {
67  if (verbose)
68  printf ("IVF quantizer trains alone...\n");
69  quantizer->train (n, x);
70  quantizer->verbose = verbose;
71  FAISS_THROW_IF_NOT_MSG (quantizer->ntotal == nlist,
72  "nlist not consistent with quantizer size");
73  } else if (quantizer_trains_alone == 0) {
74  if (verbose)
75  printf ("Training level-1 quantizer on %ld vectors in %ldD\n",
76  n, d);
77 
78  Clustering clus (d, nlist, cp);
79  quantizer->reset();
80  if (clustering_index) {
81  clus.train (n, x, *clustering_index);
82  quantizer->add (nlist, clus.centroids.data());
83  } else {
84  clus.train (n, x, *quantizer);
85  }
86  quantizer->is_trained = true;
87  } else if (quantizer_trains_alone == 2) {
88  if (verbose)
89  printf (
90  "Training L2 quantizer on %ld vectors in %ldD%s\n",
91  n, d,
92  clustering_index ? "(user provided index)" : "");
93  FAISS_THROW_IF_NOT (metric_type == METRIC_L2);
94  Clustering clus (d, nlist, cp);
95  if (!clustering_index) {
96  IndexFlatL2 assigner (d);
97  clus.train(n, x, assigner);
98  } else {
99  clus.train(n, x, *clustering_index);
100  }
101  if (verbose)
102  printf ("Adding centroids to quantizer\n");
103  quantizer->add (nlist, clus.centroids.data());
104  }
105 }
106 
107 
108 
109 /*****************************************
110  * IndexIVF implementation
111  ******************************************/
112 
113 
114 IndexIVF::IndexIVF (Index * quantizer, size_t d,
115  size_t nlist, size_t code_size,
116  MetricType metric):
117  Index (d, metric),
118  Level1Quantizer (quantizer, nlist),
119  invlists (new ArrayInvertedLists (nlist, code_size)),
120  own_invlists (true),
121  code_size (code_size),
122  nprobe (1),
123  max_codes (0),
124  parallel_mode (0),
125  maintain_direct_map (false)
126 {
127  FAISS_THROW_IF_NOT (d == quantizer->d);
128  is_trained = quantizer->is_trained && (quantizer->ntotal == nlist);
129  // Spherical by default if the metric is inner_product
130  if (metric_type == METRIC_INNER_PRODUCT) {
131  cp.spherical = true;
132  }
133 
134 }
135 
136 IndexIVF::IndexIVF ():
137  invlists (nullptr), own_invlists (false),
138  code_size (0),
139  nprobe (1), max_codes (0), parallel_mode (0),
140  maintain_direct_map (false)
141 {}
142 
143 void IndexIVF::add (idx_t n, const float * x)
144 {
145  add_with_ids (n, x, nullptr);
146 }
147 
148 
149 void IndexIVF::add_with_ids (idx_t n, const float * x, const long *xids)
150 {
151  // do some blocking to avoid excessive allocs
152  idx_t bs = 65536;
153  if (n > bs) {
154  for (idx_t i0 = 0; i0 < n; i0 += bs) {
155  idx_t i1 = std::min (n, i0 + bs);
156  if (verbose) {
157  printf(" IndexIVF::add_with_ids %ld:%ld\n", i0, i1);
158  }
159  add_with_ids (i1 - i0, x + i0 * d,
160  xids ? xids + i0 : nullptr);
161  }
162  return;
163  }
164 
165  FAISS_THROW_IF_NOT (is_trained);
166  std::unique_ptr<idx_t []> idx(new idx_t[n]);
167  quantizer->assign (n, x, idx.get());
168  size_t nadd = 0, nminus1 = 0;
169 
170  for (size_t i = 0; i < n; i++) {
171  if (idx[i] < 0) nminus1++;
172  }
173 
174  std::unique_ptr<uint8_t []> flat_codes(new uint8_t [n * code_size]);
175  encode_vectors (n, x, idx.get(), flat_codes.get());
176 
177 #pragma omp parallel reduction(+: nadd)
178  {
179  int nt = omp_get_num_threads();
180  int rank = omp_get_thread_num();
181 
182  // each thread takes care of a subset of lists
183  for (size_t i = 0; i < n; i++) {
184  long list_no = idx [i];
185  if (list_no >= 0 && list_no % nt == rank) {
186  long id = xids ? xids[i] : ntotal + i;
187  invlists->add_entry (list_no, id,
188  flat_codes.get() + i * code_size);
189  nadd++;
190  }
191  }
192  }
193 
194  if (verbose) {
195  printf(" added %ld / %ld vectors (%ld -1s)\n", nadd, n, nminus1);
196  }
197 
198  ntotal += n;
199 }
200 
201 
202 void IndexIVF::make_direct_map (bool new_maintain_direct_map)
203 {
204  // nothing to do
205  if (new_maintain_direct_map == maintain_direct_map)
206  return;
207 
208  if (new_maintain_direct_map) {
209  direct_map.resize (ntotal, -1);
210  for (size_t key = 0; key < nlist; key++) {
211  size_t list_size = invlists->list_size (key);
212  ScopedIds idlist (invlists, key);
213 
214  for (long ofs = 0; ofs < list_size; ofs++) {
215  FAISS_THROW_IF_NOT_MSG (
216  0 <= idlist [ofs] && idlist[ofs] < ntotal,
217  "direct map supported only for seuquential ids");
218  direct_map [idlist [ofs]] = key << 32 | ofs;
219  }
220  }
221  } else {
222  direct_map.clear ();
223  }
224  maintain_direct_map = new_maintain_direct_map;
225 }
226 
227 
228 void IndexIVF::search (idx_t n, const float *x, idx_t k,
229  float *distances, idx_t *labels) const
230 {
231  long * idx = new long [n * nprobe];
232  ScopeDeleter<long> del (idx);
233  float * coarse_dis = new float [n * nprobe];
234  ScopeDeleter<float> del2 (coarse_dis);
235 
236  double t0 = getmillisecs();
237  quantizer->search (n, x, nprobe, coarse_dis, idx);
238  indexIVF_stats.quantization_time += getmillisecs() - t0;
239 
240  t0 = getmillisecs();
241  invlists->prefetch_lists (idx, n * nprobe);
242 
243  search_preassigned (n, x, k, idx, coarse_dis,
244  distances, labels, false);
245  indexIVF_stats.search_time += getmillisecs() - t0;
246 }
247 
248 
249 
250 void IndexIVF::search_preassigned (idx_t n, const float *x, idx_t k,
251  const idx_t *keys,
252  const float *coarse_dis ,
253  float *distances, idx_t *labels,
254  bool store_pairs,
255  const IVFSearchParameters *params) const
256 {
257  long nprobe = params ? params->nprobe : this->nprobe;
258  long max_codes = params ? params->max_codes : this->max_codes;
259 
260  size_t nlistv = 0, ndis = 0, nheap = 0;
261 
262  using HeapForIP = CMin<float, idx_t>;
263  using HeapForL2 = CMax<float, idx_t>;
264 
265  idx_t check_period = InterruptCallback::get_period_hint
266  (nprobe * ntotal * d / nlist);
267 
268  for (idx_t i0 = 0; i0 < n; i0 += check_period) {
269  idx_t i1 = std::min(i0 + check_period, n);
270 
271 #pragma omp parallel reduction(+: nlistv, ndis, nheap)
272  {
273  InvertedListScanner *scanner = get_InvertedListScanner(store_pairs);
275 
276  /*****************************************************
277  * Depending on parallel_mode, there are two possible ways
278  * to organize the search. Here we define local functions
279  * that are in common between the two
280  ******************************************************/
281 
282  // intialize + reorder a result heap
283 
284  auto init_result = [&](float *simi, idx_t *idxi) {
285  if (metric_type == METRIC_INNER_PRODUCT) {
286  heap_heapify<HeapForIP> (k, simi, idxi);
287  } else {
288  heap_heapify<HeapForL2> (k, simi, idxi);
289  }
290  };
291 
292  auto reorder_result = [&] (float *simi, idx_t *idxi) {
293  if (metric_type == METRIC_INNER_PRODUCT) {
294  heap_reorder<HeapForIP> (k, simi, idxi);
295  } else {
296  heap_reorder<HeapForL2> (k, simi, idxi);
297  }
298  };
299 
300  // single list scan using the current scanner (with query
301  // set porperly) and storing results in simi and idxi
302  auto scan_one_list = [&] (idx_t key, float coarse_dis_i,
303  float *simi, idx_t *idxi) {
304 
305  if (key < 0) {
306  // not enough centroids for multiprobe
307  return (size_t)0;
308  }
309  FAISS_THROW_IF_NOT_FMT (key < (idx_t) nlist,
310  "Invalid key=%ld nlist=%ld\n",
311  key, nlist);
312 
313  size_t list_size = invlists->list_size(key);
314 
315  // don't waste time on empty lists
316  if (list_size == 0) {
317  return (size_t)0;
318  }
319 
320  scanner->set_list (key, coarse_dis_i);
321 
322  nlistv++;
323 
324  InvertedLists::ScopedCodes scodes (invlists, key);
325 
326  std::unique_ptr<InvertedLists::ScopedIds> sids;
327  const Index::idx_t * ids = nullptr;
328 
329  if (!store_pairs) {
330  sids.reset (new InvertedLists::ScopedIds (invlists, key));
331  ids = sids->get();
332  }
333 
334  nheap += scanner->scan_codes (list_size, scodes.get(),
335  ids, simi, idxi, k);
336 
337  return list_size;
338  };
339 
340  /****************************************************
341  * Actual loops, depending on parallel_mode
342  ****************************************************/
343 
344  if (parallel_mode == 0) {
345 
346 #pragma omp for
347  for (size_t i = i0; i < i1; i++) {
348  // loop over queries
349  scanner->set_query (x + i * d);
350  float * simi = distances + i * k;
351  idx_t * idxi = labels + i * k;
352 
353  init_result (simi, idxi);
354 
355  long nscan = 0;
356 
357  // loop over probes
358  for (size_t ik = 0; ik < nprobe; ik++) {
359 
360  nscan += scan_one_list (
361  keys [i * nprobe + ik],
362  coarse_dis[i * nprobe + ik],
363  simi, idxi
364  );
365 
366  if (max_codes && nscan >= max_codes) {
367  break;
368  }
369  }
370 
371  ndis += nscan;
372  reorder_result (simi, idxi);
373  } // parallel for
374  } else if (parallel_mode == 1) {
375  std::vector <idx_t> local_idx (k);
376  std::vector <float> local_dis (k);
377 
378  for (size_t i = i0; i < i1; i++) {
379  scanner->set_query (x + i * d);
380  init_result (local_dis.data(), local_idx.data());
381 
382 #pragma omp for schedule(dynamic)
383  for (size_t ik = 0; ik < nprobe; ik++) {
384  ndis += scan_one_list (
385  keys [i * nprobe + ik],
386  coarse_dis[i * nprobe + ik],
387  local_dis.data(), local_idx.data()
388  );
389 
390  // can't do the test on max_codes
391  }
392  // merge thread-local results
393 
394  float * simi = distances + i * k;
395  idx_t * idxi = labels + i * k;
396 #pragma omp single
397  init_result (simi, idxi);
398 
399 #pragma omp barrier
400 #pragma omp critical
401  {
402  if (metric_type == METRIC_INNER_PRODUCT) {
403  heap_addn<HeapForIP>
404  (k, simi, idxi,
405  local_dis.data(), local_idx.data(), k);
406  } else {
407  heap_addn<HeapForL2>
408  (k, simi, idxi,
409  local_dis.data(), local_idx.data(), k);
410  }
411  }
412 #pragma omp barrier
413 #pragma omp single
414  reorder_result (simi, idxi);
415  }
416  } else {
417  FAISS_THROW_FMT ("parallel_mode %d not supported\n",
418  parallel_mode);
419  }
420  } // loop over blocks
422  } // loop over blocks
423 
424  indexIVF_stats.nq += n;
425  indexIVF_stats.nlist += nlistv;
426  indexIVF_stats.ndis += ndis;
427  indexIVF_stats.nheap_updates += nheap;
428 
429 }
430 
431 
432 
433 
434 void IndexIVF::range_search (idx_t nx, const float *x, float radius,
435  RangeSearchResult *result) const
436 {
437  std::unique_ptr<idx_t[]> keys (new idx_t[nx * nprobe]);
438  std::unique_ptr<float []> coarse_dis (new float[nx * nprobe]);
439 
440  double t0 = getmillisecs();
441  quantizer->search (nx, x, nprobe, coarse_dis.get (), keys.get ());
442  indexIVF_stats.quantization_time += getmillisecs() - t0;
443 
444  t0 = getmillisecs();
445  invlists->prefetch_lists (keys.get(), nx * nprobe);
446 
447  range_search_preassigned (nx, x, radius, keys.get (), coarse_dis.get (),
448  result);
449 
450  indexIVF_stats.search_time += getmillisecs() - t0;
451 }
452 
453 void IndexIVF::range_search_preassigned (
454  idx_t nx, const float *x, float radius,
455  const idx_t *keys, const float *coarse_dis,
456  RangeSearchResult *result) const
457 {
458 
459  size_t nlistv = 0, ndis = 0;
460  bool store_pairs = false;
461 
462  std::vector<RangeSearchPartialResult *> all_pres (omp_get_max_threads());
463 
464 #pragma omp parallel reduction(+: nlistv, ndis)
465  {
466  RangeSearchPartialResult pres(result);
467  std::unique_ptr<InvertedListScanner> scanner
468  (get_InvertedListScanner(store_pairs));
469  FAISS_THROW_IF_NOT (scanner.get ());
470  all_pres[omp_get_thread_num()] = &pres;
471 
472  // prepare the list scanning function
473 
474  auto scan_list_func = [&](size_t i, size_t ik, RangeQueryResult &qres) {
475 
476  idx_t key = keys[i * nprobe + ik]; /* select the list */
477  if (key < 0) return;
478  FAISS_THROW_IF_NOT_FMT (
479  key < (idx_t) nlist,
480  "Invalid key=%ld at ik=%ld nlist=%ld\n",
481  key, ik, nlist);
482  const size_t list_size = invlists->list_size(key);
483 
484  if (list_size == 0) return;
485 
486  InvertedLists::ScopedCodes scodes (invlists, key);
488 
489  scanner->set_list (key, coarse_dis[i * nprobe + ik]);
490  nlistv++;
491  ndis += list_size;
492  scanner->scan_codes_range (list_size, scodes.get(),
493  ids.get(), radius, qres);
494  };
495 
496  if (parallel_mode == 0) {
497 
498 #pragma omp for
499  for (size_t i = 0; i < nx; i++) {
500  scanner->set_query (x + i * d);
501 
502  RangeQueryResult & qres = pres.new_result (i);
503 
504  for (size_t ik = 0; ik < nprobe; ik++) {
505  scan_list_func (i, ik, qres);
506  }
507 
508  }
509 
510  } else if (parallel_mode == 1) {
511 
512  for (size_t i = 0; i < nx; i++) {
513  scanner->set_query (x + i * d);
514 
515  RangeQueryResult & qres = pres.new_result (i);
516 
517 #pragma omp for schedule(dynamic)
518  for (size_t ik = 0; ik < nprobe; ik++) {
519  scan_list_func (i, ik, qres);
520  }
521  }
522  } else if (parallel_mode == 2) {
523  std::vector<RangeQueryResult *> all_qres (nx);
524  RangeQueryResult *qres = nullptr;
525 
526 #pragma omp for schedule(dynamic)
527  for (size_t iik = 0; iik < nx * nprobe; iik++) {
528  size_t i = iik / nprobe;
529  size_t ik = iik % nprobe;
530  if (qres == nullptr || qres->qno != i) {
531  FAISS_ASSERT (!qres || i > qres->qno);
532  qres = &pres.new_result (i);
533  scanner->set_query (x + i * d);
534  }
535  scan_list_func (i, ik, *qres);
536  }
537  } else {
538  FAISS_THROW_FMT ("parallel_mode %d not supported\n", parallel_mode);
539  }
540  if (parallel_mode == 0) {
541  pres.finalize ();
542  } else {
543 #pragma omp barrier
544 #pragma omp single
545  RangeSearchPartialResult::merge (all_pres, false);
546 #pragma omp barrier
547 
548  }
549  }
550  indexIVF_stats.nq += nx;
551  indexIVF_stats.nlist += nlistv;
552  indexIVF_stats.ndis += ndis;
553 }
554 
555 
557  bool /*store_pairs*/) const
558 {
559  return nullptr;
560 }
561 
562 void IndexIVF::reconstruct (idx_t key, float* recons) const
563 {
564  FAISS_THROW_IF_NOT_MSG (direct_map.size() == ntotal,
565  "direct map is not initialized");
566  FAISS_THROW_IF_NOT_MSG (key >= 0 && key < direct_map.size(),
567  "invalid key");
568  idx_t list_no = direct_map[key] >> 32;
569  idx_t offset = direct_map[key] & 0xffffffff;
570  reconstruct_from_offset (list_no, offset, recons);
571 }
572 
573 
574 void IndexIVF::reconstruct_n (idx_t i0, idx_t ni, float* recons) const
575 {
576  FAISS_THROW_IF_NOT (ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
577 
578  for (idx_t list_no = 0; list_no < nlist; list_no++) {
579  size_t list_size = invlists->list_size (list_no);
580  ScopedIds idlist (invlists, list_no);
581 
582  for (idx_t offset = 0; offset < list_size; offset++) {
583  idx_t id = idlist[offset];
584  if (!(id >= i0 && id < i0 + ni)) {
585  continue;
586  }
587 
588  float* reconstructed = recons + (id - i0) * d;
589  reconstruct_from_offset (list_no, offset, reconstructed);
590  }
591  }
592 }
593 
594 
595 void IndexIVF::search_and_reconstruct (idx_t n, const float *x, idx_t k,
596  float *distances, idx_t *labels,
597  float *recons) const
598 {
599  idx_t * idx = new idx_t [n * nprobe];
600  ScopeDeleter<idx_t> del (idx);
601  float * coarse_dis = new float [n * nprobe];
602  ScopeDeleter<float> del2 (coarse_dis);
603 
604  quantizer->search (n, x, nprobe, coarse_dis, idx);
605 
606  invlists->prefetch_lists (idx, n * nprobe);
607 
608  // search_preassigned() with `store_pairs` enabled to obtain the list_no
609  // and offset into `codes` for reconstruction
610  search_preassigned (n, x, k, idx, coarse_dis,
611  distances, labels, true /* store_pairs */);
612  for (idx_t i = 0; i < n; ++i) {
613  for (idx_t j = 0; j < k; ++j) {
614  idx_t ij = i * k + j;
615  idx_t key = labels[ij];
616  float* reconstructed = recons + ij * d;
617  if (key < 0) {
618  // Fill with NaNs
619  memset(reconstructed, -1, sizeof(*reconstructed) * d);
620  } else {
621  int list_no = key >> 32;
622  int offset = key & 0xffffffff;
623 
624  // Update label to the actual id
625  labels[ij] = invlists->get_single_id (list_no, offset);
626 
627  reconstruct_from_offset (list_no, offset, reconstructed);
628  }
629  }
630  }
631 }
632 
634  idx_t /*list_no*/,
635  idx_t /*offset*/,
636  float* /*recons*/) const {
637  FAISS_THROW_MSG ("reconstruct_from_offset not implemented");
638 }
639 
641 {
642  direct_map.clear ();
643  invlists->reset ();
644  ntotal = 0;
645 }
646 
647 
649 {
650  FAISS_THROW_IF_NOT_MSG (!maintain_direct_map,
651  "direct map remove not implemented");
652 
653  std::vector<idx_t> toremove(nlist);
654 
655 #pragma omp parallel for
656  for (idx_t i = 0; i < nlist; i++) {
657  idx_t l0 = invlists->list_size (i), l = l0, j = 0;
658  ScopedIds idsi (invlists, i);
659  while (j < l) {
660  if (sel.is_member (idsi[j])) {
661  l--;
662  invlists->update_entry (
663  i, j,
664  invlists->get_single_id (i, l),
665  ScopedCodes (invlists, i, l).get());
666  } else {
667  j++;
668  }
669  }
670  toremove[i] = l0 - l;
671  }
672  // this will not run well in parallel on ondisk because of possible shrinks
673  idx_t nremove = 0;
674  for (idx_t i = 0; i < nlist; i++) {
675  if (toremove[i] > 0) {
676  nremove += toremove[i];
677  invlists->resize(
678  i, invlists->list_size(i) - toremove[i]);
679  }
680  }
681  ntotal -= nremove;
682  return nremove;
683 }
684 
685 
686 
687 
688 void IndexIVF::train (idx_t n, const float *x)
689 {
690  if (verbose)
691  printf ("Training level-1 quantizer\n");
692 
693  train_q1 (n, x, verbose, metric_type);
694 
695  if (verbose)
696  printf ("Training IVF residual\n");
697 
698  train_residual (n, x);
699  is_trained = true;
700 
701 }
702 
703 void IndexIVF::train_residual(idx_t /*n*/, const float* /*x*/) {
704  if (verbose)
705  printf("IndexIVF: no residual training\n");
706  // does nothing by default
707 }
708 
709 
711 {
712  // minimal sanity checks
713  FAISS_THROW_IF_NOT (other.d == d);
714  FAISS_THROW_IF_NOT (other.nlist == nlist);
715  FAISS_THROW_IF_NOT (other.code_size == code_size);
716  FAISS_THROW_IF_NOT_MSG (typeid (*this) == typeid (other),
717  "can only merge indexes of the same type");
718 }
719 
720 
721 void IndexIVF::merge_from (IndexIVF &other, idx_t add_id)
722 {
724  FAISS_THROW_IF_NOT_MSG ((!maintain_direct_map &&
725  !other.maintain_direct_map),
726  "direct map copy not implemented");
727 
728  invlists->merge_from (other.invlists, add_id);
729 
730  ntotal += other.ntotal;
731  other.ntotal = 0;
732 }
733 
734 
736 {
737  //FAISS_THROW_IF_NOT (ntotal == 0);
738  FAISS_THROW_IF_NOT (il->nlist == nlist &&
739  il->code_size == code_size);
740  if (own_invlists) {
741  delete invlists;
742  }
743  invlists = il;
744  own_invlists = own;
745 }
746 
747 
748 void IndexIVF::copy_subset_to (IndexIVF & other, int subset_type,
749  idx_t a1, idx_t a2) const
750 {
751 
752  FAISS_THROW_IF_NOT (nlist == other.nlist);
753  FAISS_THROW_IF_NOT (code_size == other.code_size);
754  FAISS_THROW_IF_NOT (!other.maintain_direct_map);
755  FAISS_THROW_IF_NOT_FMT (
756  subset_type == 0 || subset_type == 1 || subset_type == 2,
757  "subset type %d not implemented", subset_type);
758 
759  size_t accu_n = 0;
760  size_t accu_a1 = 0;
761  size_t accu_a2 = 0;
762 
763  InvertedLists *oivf = other.invlists;
764 
765  for (idx_t list_no = 0; list_no < nlist; list_no++) {
766  size_t n = invlists->list_size (list_no);
767  ScopedIds ids_in (invlists, list_no);
768 
769  if (subset_type == 0) {
770  for (idx_t i = 0; i < n; i++) {
771  idx_t id = ids_in[i];
772  if (a1 <= id && id < a2) {
773  oivf->add_entry (list_no,
774  invlists->get_single_id (list_no, i),
775  ScopedCodes (invlists, list_no, i).get());
776  other.ntotal++;
777  }
778  }
779  } else if (subset_type == 1) {
780  for (idx_t i = 0; i < n; i++) {
781  idx_t id = ids_in[i];
782  if (id % a1 == a2) {
783  oivf->add_entry (list_no,
784  invlists->get_single_id (list_no, i),
785  ScopedCodes (invlists, list_no, i).get());
786  other.ntotal++;
787  }
788  }
789  } else if (subset_type == 2) {
790  // see what is allocated to a1 and to a2
791  size_t next_accu_n = accu_n + n;
792  size_t next_accu_a1 = next_accu_n * a1 / ntotal;
793  size_t i1 = next_accu_a1 - accu_a1;
794  size_t next_accu_a2 = next_accu_n * a2 / ntotal;
795  size_t i2 = next_accu_a2 - accu_a2;
796 
797  for (idx_t i = i1; i < i2; i++) {
798  oivf->add_entry (list_no,
799  invlists->get_single_id (list_no, i),
800  ScopedCodes (invlists, list_no, i).get());
801  }
802 
803  other.ntotal += i2 - i1;
804  accu_a1 = next_accu_a1;
805  accu_a2 = next_accu_a2;
806  }
807  accu_n += n;
808  }
809  FAISS_ASSERT(accu_n == ntotal);
810 
811 }
812 
813 
814 IndexIVF::~IndexIVF()
815 {
816  if (own_invlists) {
817  delete invlists;
818  }
819 }
820 
821 
822 void IndexIVFStats::reset()
823 {
824  memset ((void*)this, 0, sizeof (*this));
825 }
826 
827 
828 IndexIVFStats indexIVF_stats;
829 
831  const uint8_t *,
832  const idx_t *,
833  float ,
834  RangeQueryResult &) const
835 {
836  FAISS_THROW_MSG ("scan_codes_range not implemented");
837 }
838 
839 
840 
841 } // namespace faiss
virtual void encode_vectors(idx_t n, const float *x, const idx_t *list_nos, uint8_t *codes) const =0
virtual void search_preassigned(idx_t n, const float *x, idx_t k, const idx_t *assign, const float *centroid_dis, float *distances, idx_t *labels, bool store_pairs, const IVFSearchParameters *params=nullptr) const
Definition: IndexIVF.cpp:250
result structure for a single query
simple (default) implementation as an array of inverted lists
void check_compatible_for_merge(const IndexIVF &other) const
Definition: IndexIVF.cpp:710
void search_and_reconstruct(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels, float *recons) const override
Definition: IndexIVF.cpp:595
virtual void reset()=0
removes all elements from the database.
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:97
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:34
idx_t remove_ids(const IDSelector &sel) override
Dataset manipulation functions.
Definition: IndexIVF.cpp:648
virtual size_t list_size(size_t list_no) const =0
get the size of a list
void reconstruct(idx_t key, float *recons) const override
Definition: IndexIVF.cpp:562
virtual void train(idx_t n, const float *x)
Definition: Index.cpp:23
virtual void train_residual(idx_t n, const float *x)
Definition: IndexIVF.cpp:703
void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const override
Definition: IndexIVF.cpp:434
void merge_from(InvertedLists *oivf, size_t add_id)
move all entries from oivf (empty on output)
virtual idx_t get_single_id(size_t list_no, size_t offset) const
int d
vector dimension
Definition: Index.h:66
long idx_t
all indices are this type
Definition: Index.h:62
size_t code_size
code size per vector in bytes
Definition: InvertedLists.h:35
virtual void copy_subset_to(IndexIVF &other, int subset_type, idx_t a1, idx_t a2) const
Definition: IndexIVF.cpp:748
void train(idx_t n, const float *x) override
Trains the quantizer and calls train_residual to train sub-quantizers.
Definition: IndexIVF.cpp:688
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
Definition: IndexIVF.cpp:574
virtual void add(idx_t n, const float *x)=0
virtual void set_list(idx_t list_no, float coarse_dis)=0
following codes come from this inverted list
void add_with_ids(idx_t n, const float *x, const idx_t *xids) override
default implementation that calls encode_vectors
Definition: IndexIVF.cpp:149
virtual size_t add_entry(size_t list_no, idx_t theid, const uint8_t *code)
add one entry to an inverted list
virtual InvertedListScanner * get_InvertedListScanner(bool store_pairs=false) const
get a scanner for this index (store_pairs means ignore labels)
Definition: IndexIVF.cpp:556
virtual size_t scan_codes(size_t n, const uint8_t *codes, const idx_t *ids, float *distances, idx_t *labels, size_t k) const =0
void replace_invlists(InvertedLists *il, bool own=false)
replace the inverted lists, old one is deallocated if own_invlists
Definition: IndexIVF.cpp:735
ClusteringParameters cp
to override default clustering params
Definition: IndexIVF.h:43
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
bool verbose
verbosity level
Definition: Index.h:68
void reset() override
removes all elements from the database.
Definition: IndexIVF.cpp:640
double getmillisecs()
ms elapsed since some arbitrary epoch
Definition: utils.cpp:69
std::vector< float > centroids
centroids (k * d)
Definition: Clustering.h:62
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
Index * clustering_index
to override index used during clustering
Definition: IndexIVF.h:44
void train_q1(size_t n, const float *x, bool verbose, MetricType metric_type)
Trains the quantizer and calls train_residual to train sub-quantizers.
Definition: IndexIVF.cpp:60
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:61
the entries in the buffers are split per query
size_t nlist
number of possible key values
Definition: InvertedLists.h:34
static void merge(std::vector< RangeSearchPartialResult * > &partial_results, bool do_delete=true)
void make_direct_map(bool new_maintain_direct_map=true)
Definition: IndexIVF.cpp:202
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:74
InvertedLists * invlists
Acess to the actual data.
Definition: IndexIVF.h:92
virtual void reconstruct_from_offset(idx_t list_no, idx_t offset, float *recons) const
Definition: IndexIVF.cpp:633
static size_t get_period_hint(size_t flops)
void add(idx_t n, const float *x) override
Calls add_with_ids with NULL ids.
Definition: IndexIVF.cpp:143
virtual void train(idx_t n, const float *x, faiss::Index &index)
Index is used during the assignment stage.
Definition: Clustering.cpp:77
size_t max_codes
max nb of codes to visit to do a query
Definition: IndexIVF.h:62
virtual void prefetch_lists(const idx_t *list_nos, int nlist) const
Index * quantizer
quantizer that maps vectors to inverted lists
Definition: IndexIVF.h:32
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:71
size_t max_codes
max nb of codes to visit to do a query
Definition: IndexIVF.h:98
bool maintain_direct_map
map for direct access to the elements. Enables reconstruct().
Definition: IndexIVF.h:109
bool spherical
do we want normalized centroids?
Definition: Clustering.h:27
bool own_fields
whether object owns the quantizer
Definition: IndexIVF.h:41
virtual void set_query(const float *query_vector)=0
from now on we handle this query.
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexIVF.cpp:228
virtual void merge_from(IndexIVF &other, idx_t add_id)
Definition: IndexIVF.cpp:721
size_t nlist
number of possible key values
Definition: IndexIVF.h:33
size_t code_size
code size per vector in bytes
Definition: IndexIVF.h:95
virtual void scan_codes_range(size_t n, const uint8_t *codes, const idx_t *ids, float radius, RangeQueryResult &result) const
Definition: IndexIVF.cpp:830
MetricType
Some algorithms support both an inner product version and a L2 search version.
Definition: Index.h:44