Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/IndexIVF.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 /* Copyright 2004-present Facebook. All Rights Reserved.
10  Inverted list structure.
11 */
12 
13 #include "IndexIVF.h"
14 
15 #include <cstdio>
16 
17 #include "utils.h"
18 #include "hamming.h"
19 
20 #include "FaissAssert.h"
21 #include "IndexFlat.h"
22 #include "AuxIndexStructures.h"
23 
24 namespace faiss {
25 
26 /*****************************************
27  * Level1Quantizer implementation
28  ******************************************/
29 
30 
31 Level1Quantizer::Level1Quantizer (Index * quantizer, size_t nlist):
32  quantizer (quantizer),
33  nlist (nlist),
34  quantizer_trains_alone (0),
35  own_fields (false),
36  clustering_index (nullptr)
37 {
38  cp.niter = 10;
39 }
40 
41 Level1Quantizer::Level1Quantizer ():
42  quantizer (nullptr),
43  nlist (0),
44  quantizer_trains_alone (0), own_fields (false),
45  clustering_index (nullptr)
46 {}
47 
48 Level1Quantizer::~Level1Quantizer ()
49 {
50  if (own_fields) delete quantizer;
51 }
52 
53 void Level1Quantizer::train_q1 (size_t n, const float *x, bool verbose, MetricType metric_type)
54 {
55  size_t d = quantizer->d;
56  if (quantizer->is_trained && (quantizer->ntotal == nlist)) {
57  if (verbose)
58  printf ("IVF quantizer does not need training.\n");
59  } else if (quantizer_trains_alone == 1) {
60  if (verbose)
61  printf ("IVF quantizer trains alone...\n");
62  quantizer->train (n, x);
63  quantizer->verbose = verbose;
64  FAISS_THROW_IF_NOT_MSG (quantizer->ntotal == nlist,
65  "nlist not consistent with quantizer size");
66  } else if (quantizer_trains_alone == 0) {
67  if (verbose)
68  printf ("Training level-1 quantizer on %ld vectors in %ldD\n",
69  n, d);
70 
71  Clustering clus (d, nlist, cp);
72  quantizer->reset();
73  if (clustering_index) {
74  clus.train (n, x, *clustering_index);
75  quantizer->add (nlist, clus.centroids.data());
76  } else {
77  clus.train (n, x, *quantizer);
78  }
79  quantizer->is_trained = true;
80  } else if (quantizer_trains_alone == 2) {
81  if (verbose)
82  printf (
83  "Training L2 quantizer on %ld vectors in %ldD%s\n",
84  n, d,
85  clustering_index ? "(user provided index)" : "");
86  FAISS_THROW_IF_NOT (metric_type == METRIC_L2);
87  Clustering clus (d, nlist, cp);
88  if (!clustering_index) {
89  IndexFlatL2 assigner (d);
90  clus.train(n, x, assigner);
91  } else {
92  clus.train(n, x, *clustering_index);
93  }
94  if (verbose)
95  printf ("Adding centroids to quantizer\n");
96  quantizer->add (nlist, clus.centroids.data());
97  }
98 }
99 
100 
101 /*****************************************
102  * IndexIVF implementation
103  ******************************************/
104 
105 
106 IndexIVF::IndexIVF (Index * quantizer, size_t d, size_t nlist,
107  MetricType metric):
108  Index (d, metric),
109  Level1Quantizer (quantizer, nlist),
110  nprobe (1),
111  max_codes (0),
112  maintain_direct_map (false)
113 {
114  FAISS_THROW_IF_NOT (d == quantizer->d);
115  is_trained = quantizer->is_trained && (quantizer->ntotal == nlist);
116  // Spherical by default if the metric is inner_product
117  if (metric_type == METRIC_INNER_PRODUCT) {
118  cp.spherical = true;
119  }
120  // here we set a low # iterations because this is typically used
121  // for large clusterings (nb this is not used for the MultiIndex,
122  // for which quantizer_trains_alone = true)
123  code_size = 0; // let sub-classes set this
124  ids.resize (nlist);
125  codes.resize (nlist);
126 }
127 
128 IndexIVF::IndexIVF ():
129  nprobe (1), max_codes (0),
130  maintain_direct_map (false)
131 {}
132 
133 
134 void IndexIVF::add (idx_t n, const float * x)
135 {
136  add_with_ids (n, x, nullptr);
137 }
138 
139 void IndexIVF::make_direct_map (bool new_maintain_direct_map)
140 {
141  // nothing to do
142  if (new_maintain_direct_map == maintain_direct_map)
143  return;
144 
145  if (new_maintain_direct_map) {
146  direct_map.resize (ntotal, -1);
147  for (size_t key = 0; key < nlist; key++) {
148  const std::vector<long> & idlist = ids[key];
149 
150  for (long ofs = 0; ofs < idlist.size(); ofs++) {
151  FAISS_THROW_IF_NOT_MSG (
152  0 <= idlist [ofs] && idlist[ofs] < ntotal,
153  "direct map supported only for seuquential ids");
154  direct_map [idlist [ofs]] = key << 32 | ofs;
155  }
156  }
157  } else {
158  direct_map.clear ();
159  }
160  maintain_direct_map = new_maintain_direct_map;
161 }
162 
163 
164 void IndexIVF::search (idx_t n, const float *x, idx_t k,
165  float *distances, idx_t *labels) const
166 {
167  long * idx = new long [n * nprobe];
168  ScopeDeleter<long> del (idx);
169  float * coarse_dis = new float [n * nprobe];
170  ScopeDeleter<float> del2 (coarse_dis);
171 
172  quantizer->search (n, x, nprobe, coarse_dis, idx);
173 
174  search_preassigned (n, x, k, idx, coarse_dis,
175  distances, labels, false);
176 
177 }
178 
179 
180 void IndexIVF::reconstruct (idx_t key, float* recons) const
181 {
182  FAISS_THROW_IF_NOT_MSG (direct_map.size() == ntotal,
183  "direct map is not initialized");
184  long list_no = direct_map[key] >> 32;
185  long offset = direct_map[key] & 0xffffffff;
186  reconstruct_from_offset (list_no, offset, recons);
187 }
188 
189 
190 void IndexIVF::reconstruct_n (idx_t i0, idx_t ni, float* recons) const
191 {
192  FAISS_THROW_IF_NOT (ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
193 
194  for (long list_no = 0; list_no < nlist; list_no++) {
195  const std::vector<long>& idlist = ids[list_no];
196 
197  for (long offset = 0; offset < idlist.size(); offset++) {
198  long id = idlist[offset];
199  if (!(id >= i0 && id < i0 + ni)) {
200  continue;
201  }
202 
203  float* reconstructed = recons + (id - i0) * d;
204  reconstruct_from_offset (list_no, offset, reconstructed);
205  }
206  }
207 }
208 
209 
210 void IndexIVF::search_and_reconstruct (idx_t n, const float *x, idx_t k,
211  float *distances, idx_t *labels,
212  float *recons) const
213 {
214  long * idx = new long [n * nprobe];
215  ScopeDeleter<long> del (idx);
216  float * coarse_dis = new float [n * nprobe];
217  ScopeDeleter<float> del2 (coarse_dis);
218 
219  quantizer->search (n, x, nprobe, coarse_dis, idx);
220 
221  // search_preassigned() with `store_pairs` enabled to obtain the list_no
222  // and offset into `codes` for reconstruction
223  search_preassigned (n, x, k, idx, coarse_dis,
224  distances, labels, true /* store_pairs */);
225  for (idx_t i = 0; i < n; ++i) {
226  for (idx_t j = 0; j < k; ++j) {
227  idx_t ij = i * k + j;
228  idx_t key = labels[ij];
229  float* reconstructed = recons + ij * d;
230  if (key < 0) {
231  // Fill with NaNs
232  memset(reconstructed, -1, sizeof(*reconstructed) * d);
233  } else {
234  int list_no = key >> 32;
235  int offset = key & 0xffffffff;
236 
237  // Update label to the actual id
238  labels[ij] = ids[list_no][offset];
239 
240  reconstruct_from_offset (list_no, offset, reconstructed);
241  }
242  }
243  }
244 }
245 
246 void IndexIVF::reconstruct_from_offset (long list_no, long offset,
247  float* recons) const
248 {
249  FAISS_THROW_MSG ("reconstruct_from_offset not implemented");
250 }
251 
253 {
254  ntotal = 0;
255  direct_map.clear();
256  for (size_t i = 0; i < ids.size(); i++) {
257  ids[i].clear();
258  codes[i].clear();
259  }
260 }
261 
262 
264 {
265  FAISS_THROW_IF_NOT_MSG (!maintain_direct_map,
266  "direct map remove not implemented");
267  long nremove = 0;
268 #pragma omp parallel for reduction(+: nremove)
269  for (long i = 0; i < nlist; i++) {
270  std::vector<idx_t> & idsi = ids[i];
271  uint8_t * codesi = codes[i].data();
272 
273  long l = idsi.size(), j = 0;
274  while (j < l) {
275  if (sel.is_member (idsi[j])) {
276  l--;
277  idsi [j] = idsi [l];
278  memmove (codesi + j * code_size,
279  codesi + l * code_size, code_size);
280  } else {
281  j++;
282  }
283  }
284  if (l < idsi.size()) {
285  nremove += idsi.size() - l;
286  idsi.resize (l);
287  codes[i].resize (l * code_size);
288  }
289  }
290  ntotal -= nremove;
291  return nremove;
292 }
293 
294 
295 
296 
297 void IndexIVF::train (idx_t n, const float *x)
298 {
299  if (verbose)
300  printf ("Training level-1 quantizer\n");
301 
302  train_q1 (n, x, verbose, metric_type);
303 
304  if (verbose)
305  printf ("Training IVF residual\n");
306 
307  train_residual (n, x);
308  is_trained = true;
309 }
310 
311 void IndexIVF::train_residual(idx_t /*n*/, const float* /*x*/) {
312  if (verbose)
313  printf("IndexIVF: no residual training\n");
314  // does nothing by default
315 }
316 
317 
318 
320 {
321  std::vector<int> hist (nlist);
322  for (int i = 0; i < nlist; i++) {
323  hist[i] = ids[i].size();
324  }
325  return faiss::imbalance_factor (nlist, hist.data());
326 }
327 
329 {
330  std::vector<int> sizes(40);
331  for (int i = 0; i < nlist; i++) {
332  for (int j = 0; j < sizes.size(); j++) {
333  if ((ids[i].size() >> j) == 0) {
334  sizes[j]++;
335  break;
336  }
337  }
338  }
339  for (int i = 0; i < sizes.size(); i++) {
340  if (sizes[i]) {
341  printf ("list size in < %d: %d instances\n",
342  1 << i, sizes[i]);
343  }
344  }
345 
346 }
347 
348 void IndexIVF::merge_from (IndexIVF &other, idx_t add_id)
349 {
350  // minimal sanity checks
351  FAISS_THROW_IF_NOT (other.d == d);
352  FAISS_THROW_IF_NOT (other.nlist == nlist);
353  FAISS_THROW_IF_NOT_MSG ((!maintain_direct_map &&
354  !other.maintain_direct_map),
355  "direct map copy not implemented");
356  FAISS_THROW_IF_NOT_MSG (typeid (*this) == typeid (other),
357  "can only merge indexes of the same type");
358  for (long i = 0; i < nlist; i++) {
359  std::vector<idx_t> & src = other.ids[i];
360  std::vector<idx_t> & dest = ids[i];
361  for (long j = 0; j < src.size(); j++)
362  dest.push_back (src[j] + add_id);
363  src.clear();
364  codes[i].insert (codes[i].end(),
365  other.codes[i].begin(),
366  other.codes[i].end());
367  other.codes[i].clear();
368  }
369 
370  ntotal += other.ntotal;
371  other.ntotal = 0;
372 }
373 
374 
375 void IndexIVF::copy_subset_to (IndexIVF & other, int subset_type,
376  long a1, long a2) const
377 {
378  FAISS_THROW_IF_NOT (nlist == other.nlist);
379  FAISS_THROW_IF_NOT (!other.maintain_direct_map);
380  FAISS_THROW_IF_NOT_FMT (
381  subset_type == 0 || subset_type == 1 || subset_type == 2,
382  "subset type %d not implemented", subset_type);
383 
384  size_t accu_n = 0;
385  size_t accu_a1 = 0;
386  size_t accu_a2 = 0;
387 
388  for (long list_no = 0; list_no < nlist; list_no++) {
389  const std::vector<idx_t> & ids_in = ids[list_no];
390  std::vector<idx_t> & ids_out = other.ids[list_no];
391  const std::vector<uint8_t> & codes_in = codes[list_no];
392  std::vector<uint8_t> & codes_out = other.codes[list_no];
393  size_t n = ids_in.size();
394 
395  if (subset_type == 0) {
396  for (long i = 0; i < n; i++) {
397  idx_t id = ids_in[i];
398  if (a1 <= id && id < a2) {
399  ids_out.push_back (id);
400  codes_out.insert (codes_out.end(),
401  codes_in.begin() + i * code_size,
402  codes_in.begin() + (i + 1) * code_size);
403  other.ntotal++;
404  }
405  }
406  } else if (subset_type == 1) {
407  for (long i = 0; i < n; i++) {
408  idx_t id = ids_in[i];
409  if (id % a1 == a2) {
410  ids_out.push_back (id);
411  codes_out.insert (codes_out.end(),
412  codes_in.begin() + i * code_size,
413  codes_in.begin() + (i + 1) * code_size);
414  other.ntotal++;
415  }
416  }
417  } else if (subset_type == 2) {
418  // see what is allocated to a1 and to a2
419  size_t next_accu_n = accu_n + n;
420  size_t next_accu_a1 = next_accu_n * a1 / ntotal;
421  size_t i1 = next_accu_a1 - accu_a1;
422  size_t next_accu_a2 = next_accu_n * a2 / ntotal;
423  size_t i2 = next_accu_a2 - accu_a2;
424  ids_out.insert(ids_out.end(),
425  ids_in.begin() + i1,
426  ids_in.begin() + i2);
427  codes_out.insert (codes_out.end(),
428  codes_in.begin() + i1 * code_size,
429  codes_in.begin() + i2 * code_size);
430  other.ntotal += i2 - i1;
431  accu_a1 = next_accu_a1;
432  accu_a2 = next_accu_a2;
433  }
434  accu_n += n;
435  }
436  FAISS_ASSERT(accu_n == ntotal);
437 }
438 
439 
440 
441 IndexIVF::~IndexIVF()
442 {
443 }
444 
445 
446 
447 /*****************************************
448  * IndexIVFFlat implementation
449  ******************************************/
450 
451 IndexIVFFlat::IndexIVFFlat (Index * quantizer,
452  size_t d, size_t nlist, MetricType metric):
453  IndexIVF (quantizer, d, nlist, metric)
454 {
455  code_size = sizeof(float) * d;
456 }
457 
458 
459 
460 
461 
462 
463 void IndexIVFFlat::add_with_ids (idx_t n, const float * x, const long *xids)
464 {
465  add_core (n, x, xids, nullptr);
466 }
467 
468 void IndexIVFFlat::add_core (idx_t n, const float * x, const long *xids,
469  const long *precomputed_idx)
470 
471 {
472  FAISS_THROW_IF_NOT (is_trained);
473  FAISS_THROW_IF_NOT_MSG (!(maintain_direct_map && xids),
474  "cannot have direct map and add with ids");
475  const long * idx;
476  ScopeDeleter<long> del;
477 
478  if (precomputed_idx) {
479  idx = precomputed_idx;
480  } else {
481  long * idx0 = new long [n];
482  quantizer->assign (n, x, idx0);
483  idx = idx0;
484  del.set (idx);
485  }
486  long n_add = 0;
487  for (size_t i = 0; i < n; i++) {
488  long id = xids ? xids[i] : ntotal + i;
489  long list_no = idx [i];
490  if (list_no < 0)
491  continue;
492  assert (list_no < nlist);
493 
494  ids[list_no].push_back (id);
495  const float *xi = x + i * d;
496  /* store the vectors */
497  size_t ofs = codes[list_no].size();
498  codes[list_no].resize(ofs + code_size);
499  memcpy(codes[list_no].data() + ofs,
500  xi, code_size);
501 
503  direct_map.push_back (list_no << 32 | (ids[list_no].size() - 1));
504  n_add++;
505  }
506  if (verbose) {
507  printf("IndexIVFFlat::add_core: added %ld / %ld vectors\n",
508  n_add, n);
509  }
510  ntotal += n_add;
511 }
512 
513 void IndexIVFStats::reset()
514 {
515  memset ((void*)this, 0, sizeof (*this));
516 }
517 
518 
519 IndexIVFStats indexIVF_stats;
520 
521 namespace {
522 
523 void search_knn_inner_product (const IndexIVFFlat & ivf,
524  size_t nx,
525  const float * x,
526  const long * keys,
527  float_minheap_array_t * res,
528  bool store_pairs)
529 {
530 
531  const size_t k = res->k;
532  size_t nlistv = 0, ndis = 0;
533  size_t d = ivf.d;
534 
535 #pragma omp parallel for reduction(+: nlistv, ndis)
536  for (size_t i = 0; i < nx; i++) {
537  const float * xi = x + i * d;
538  const long * keysi = keys + i * ivf.nprobe;
539  float * __restrict simi = res->get_val (i);
540  long * __restrict idxi = res->get_ids (i);
541  minheap_heapify (k, simi, idxi);
542  size_t nscan = 0;
543 
544  for (size_t ik = 0; ik < ivf.nprobe; ik++) {
545  long key = keysi[ik]; /* select the list */
546  if (key < 0) {
547  // not enough centroids for multiprobe
548  continue;
549  }
550  FAISS_THROW_IF_NOT_FMT (
551  key < (long) ivf.nlist,
552  "Invalid key=%ld at ik=%ld nlist=%ld\n",
553  key, ik, ivf.nlist);
554 
555  nlistv++;
556  const size_t list_size = ivf.ids[key].size();
557  const float * list_vecs = (const float*)(ivf.codes[key].data());
558 
559  for (size_t j = 0; j < list_size; j++) {
560  const float * yj = list_vecs + d * j;
561  float ip = fvec_inner_product (xi, yj, d);
562  if (ip > simi[0]) {
563  minheap_pop (k, simi, idxi);
564  long id = store_pairs ? (key << 32 | j) : ivf.ids[key][j];
565  minheap_push (k, simi, idxi, ip, id);
566  }
567  }
568  nscan += list_size;
569  if (ivf.max_codes && nscan >= ivf.max_codes)
570  break;
571  }
572  ndis += nscan;
573  minheap_reorder (k, simi, idxi);
574  }
575  indexIVF_stats.nq += nx;
576  indexIVF_stats.nlist += nlistv;
577  indexIVF_stats.ndis += ndis;
578 }
579 
580 
581 void search_knn_L2sqr (const IndexIVFFlat &ivf,
582  size_t nx,
583  const float * x,
584  const long * keys,
585  float_maxheap_array_t * res,
586  bool store_pairs)
587 {
588  const size_t k = res->k;
589  size_t nlistv = 0, ndis = 0;
590  size_t d = ivf.d;
591 #pragma omp parallel for reduction(+: nlistv, ndis)
592  for (size_t i = 0; i < nx; i++) {
593  const float * xi = x + i * d;
594  const long * keysi = keys + i * ivf.nprobe;
595  float * __restrict disi = res->get_val (i);
596  long * __restrict idxi = res->get_ids (i);
597  maxheap_heapify (k, disi, idxi);
598 
599  size_t nscan = 0;
600 
601  for (size_t ik = 0; ik < ivf.nprobe; ik++) {
602  long key = keysi[ik]; /* select the list */
603  if (key < 0) {
604  // not enough centroids for multiprobe
605  continue;
606  }
607  FAISS_THROW_IF_NOT_FMT (
608  key < (long) ivf.nlist,
609  "Invalid key=%ld at ik=%ld nlist=%ld\n",
610  key, ik, ivf.nlist);
611 
612  nlistv++;
613  const size_t list_size = ivf.ids[key].size();
614  const float * list_vecs = (const float*)(ivf.codes[key].data());
615 
616  for (size_t j = 0; j < list_size; j++) {
617  const float * yj = list_vecs + d * j;
618  float disij = fvec_L2sqr (xi, yj, d);
619  if (disij < disi[0]) {
620  maxheap_pop (k, disi, idxi);
621  long id = store_pairs ? (key << 32 | j) : ivf.ids[key][j];
622  maxheap_push (k, disi, idxi, disij, id);
623  }
624  }
625  nscan += list_size;
626  if (ivf.max_codes && nscan >= ivf.max_codes)
627  break;
628  }
629  ndis += nscan;
630  maxheap_reorder (k, disi, idxi);
631  }
632  indexIVF_stats.nq += nx;
633  indexIVF_stats.nlist += nlistv;
634  indexIVF_stats.ndis += ndis;
635 }
636 
637 
638 } // anonymous namespace
639 
640 void IndexIVFFlat::search_preassigned (idx_t n, const float *x, idx_t k,
641  const idx_t *idx,
642  const float * /* coarse_dis */,
643  float *distances, idx_t *labels,
644  bool store_pairs) const
645 {
646  if (metric_type == METRIC_INNER_PRODUCT) {
647  float_minheap_array_t res = {
648  size_t(n), size_t(k), labels, distances};
649  search_knn_inner_product (*this, n, x, idx, &res, store_pairs);
650 
651  } else if (metric_type == METRIC_L2) {
652  float_maxheap_array_t res = {
653  size_t(n), size_t(k), labels, distances};
654  search_knn_L2sqr (*this, n, x, idx, &res, store_pairs);
655  }
656 }
657 
658 
659 void IndexIVFFlat::range_search (idx_t nx, const float *x, float radius,
660  RangeSearchResult *result) const
661 {
662  idx_t * keys = new idx_t [nx * nprobe];
663  ScopeDeleter<idx_t> del (keys);
664  quantizer->assign (nx, x, keys, nprobe);
665 
666 #pragma omp parallel
667  {
668  RangeSearchPartialResult pres(result);
669 
670  for (size_t i = 0; i < nx; i++) {
671  const float * xi = x + i * d;
672  const long * keysi = keys + i * nprobe;
673 
675  pres.new_result (i);
676 
677  for (size_t ik = 0; ik < nprobe; ik++) {
678  long key = keysi[ik]; /* select the list */
679  if (key < 0 || key >= (long) nlist) {
680  fprintf (stderr, "Invalid key=%ld at ik=%ld nlist=%ld\n",
681  key, ik, nlist);
682  throw;
683  }
684 
685  const size_t list_size = ids[key].size();
686  const float * list_vecs = (const float *)(codes[key].data());
687 
688  for (size_t j = 0; j < list_size; j++) {
689  const float * yj = list_vecs + d * j;
690  if (metric_type == METRIC_L2) {
691  float disij = fvec_L2sqr (xi, yj, d);
692  if (disij < radius) {
693  qres.add (disij, ids[key][j]);
694  }
695  } else if (metric_type == METRIC_INNER_PRODUCT) {
696  float disij = fvec_inner_product(xi, yj, d);
697  if (disij > radius) {
698  qres.add (disij, ids[key][j]);
699  }
700  }
701  }
702  }
703  }
704 
705  pres.finalize ();
706  }
707 }
708 
709 void IndexIVFFlat::update_vectors (int n, idx_t *new_ids, const float *x)
710 {
711  FAISS_THROW_IF_NOT (maintain_direct_map);
712  FAISS_THROW_IF_NOT (is_trained);
713  std::vector<idx_t> assign (n);
714  quantizer->assign (n, x, assign.data());
715 
716  for (int i = 0; i < n; i++) {
717  idx_t id = new_ids[i];
718  FAISS_THROW_IF_NOT_MSG (0 <= id && id < ntotal,
719  "id to update out of range");
720  { // remove old one
721  long dm = direct_map[id];
722  long ofs = dm & 0xffffffff;
723  long il = dm >> 32;
724  size_t l = ids[il].size();
725  if (ofs != l - 1) {
726  long id2 = ids[il].back();
727  ids[il][ofs] = id2;
728  direct_map[id2] = (il << 32) | ofs;
729  float * vecs = (float*)codes[il].data();
730  memcpy (vecs + ofs * d,
731  vecs + (l - 1) * d,
732  d * sizeof(float));
733  }
734  ids[il].pop_back();
735  codes[il].resize((l - 1) * code_size);
736  }
737  { // insert new one
738  long il = assign[i];
739  size_t l = ids[il].size();
740  long dm = (il << 32) | l;
741  direct_map[id] = dm;
742  ids[il].push_back (id);
743  codes[il].resize((l + 1) * code_size);
744  float * vecs = (float*)codes[il].data();
745  memcpy (vecs + l * d,
746  x + i * d,
747  d * sizeof(float));
748  }
749  }
750 
751 }
752 
753 void IndexIVFFlat::reconstruct_from_offset (long list_no, long offset,
754  float* recons) const
755 {
756  memcpy (recons, &codes[list_no][offset * code_size], d * sizeof(recons[0]));
757 }
758 
759 
760 } // namespace faiss
virtual void search_preassigned(idx_t n, const float *x, idx_t k, const idx_t *assign, const float *centroid_dis, float *distances, idx_t *labels, bool store_pairs) const =0
result structure for a single query
float fvec_L2sqr(const float *x, const float *y, size_t d)
Squared L2 distance between two vectors.
Definition: utils.cpp:574
double imbalance_factor() const
1= perfectly balanced, &gt;1: imbalanced
Definition: IndexIVF.cpp:319
void search_and_reconstruct(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels, float *recons) const override
Definition: IndexIVF.cpp:210
virtual void reset()=0
removes all elements from the database.
virtual void copy_subset_to(IndexIVF &other, int subset_type, long a1, long a2) const
Definition: IndexIVF.cpp:375
virtual void reconstruct_from_offset(long list_no, long offset, float *recons) const
Definition: IndexIVF.cpp:246
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:78
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:34
void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const override
Definition: IndexIVF.cpp:659
void reconstruct(idx_t key, float *recons) const override
Definition: IndexIVF.cpp:180
virtual void train(idx_t n, const float *x)
Definition: Index.cpp:23
virtual void add_with_ids(idx_t n, const float *x, const long *xids)
Definition: Index.cpp:41
virtual void train_residual(idx_t n, const float *x)
Definition: IndexIVF.cpp:311
double imbalance_factor(int n, int k, const long *assign)
a balanced assignment has a IF of 1
Definition: utils.cpp:1627
std::vector< std::vector< long > > ids
Inverted lists for indexes.
Definition: IndexIVF.h:81
int d
vector dimension
Definition: Index.h:64
void train(idx_t n, const float *x) override
Trains the quantizer and calls train_residual to train sub-quantizers.
Definition: IndexIVF.cpp:297
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
Definition: IndexIVF.cpp:190
void add_with_ids(idx_t n, const float *x, const long *xids) override
implemented for all IndexIVF* classes
Definition: IndexIVF.cpp:463
virtual void add(idx_t n, const float *x)=0
long idx_t
all indices are this type
Definition: Index.h:62
ClusteringParameters cp
to override default clustering params
Definition: IndexIVF.h:44
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:65
bool verbose
verbosity level
Definition: Index.h:66
void reconstruct_from_offset(long list_no, long offset, float *recons) const override
Definition: IndexIVF.cpp:753
void reset() override
removes all elements from the database.
Definition: IndexIVF.cpp:252
std::vector< float > centroids
centroids (k * d)
Definition: Clustering.h:63
QueryResult & new_result(idx_t qno)
begin a new result
void update_vectors(int nv, idx_t *idx, const float *v)
Definition: IndexIVF.cpp:709
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
Index * clustering_index
to override index used during clustering
Definition: IndexIVF.h:45
void train_q1(size_t n, const float *x, bool verbose, MetricType metric_type)
Trains the quantizer and calls train_residual to train sub-quantizers.
Definition: IndexIVF.cpp:53
the entries in the buffers are split per query
void make_direct_map(bool new_maintain_direct_map=true)
Definition: IndexIVF.cpp:139
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:72
void print_stats() const
display some stats about the inverted lists
Definition: IndexIVF.cpp:328
void add(idx_t n, const float *x) override
Quantizes x and calls add_with_key.
Definition: IndexIVF.cpp:134
virtual void train(idx_t n, const float *x, faiss::Index &index)
Index is used during the assignment stage.
Definition: Clustering.cpp:67
Index * quantizer
quantizer that maps vectors to inverted lists
Definition: IndexIVF.h:33
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:69
long remove_ids(const IDSelector &sel) override
Dataset manipulation functions.
Definition: IndexIVF.cpp:263
bool maintain_direct_map
map for direct access to the elements. Enables reconstruct().
Definition: IndexIVF.h:87
bool spherical
do we want normalized centroids?
Definition: Clustering.h:29
bool own_fields
whether object owns the quantizer
Definition: IndexIVF.h:42
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexIVF.cpp:164
void search_preassigned(idx_t n, const float *x, idx_t k, const idx_t *assign, const float *centroid_dis, float *distances, idx_t *labels, bool store_pairs) const override
Definition: IndexIVF.cpp:640
virtual void merge_from(IndexIVF &other, idx_t add_id)
Definition: IndexIVF.cpp:348
size_t nlist
number of possible key values
Definition: IndexIVF.h:34
size_t code_size
code size per vector in bytes
Definition: IndexIVF.h:83
MetricType
Some algorithms support both an inner product version and a L2 search version.
Definition: Index.h:43
virtual void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx)
same as add_with_ids, with precomputed coarse quantizer
Definition: IndexIVF.cpp:468