Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/IndexHNSW.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #include "IndexHNSW.h"
10 
11 
12 #include <cstdlib>
13 #include <cassert>
14 #include <cstring>
15 #include <cstdio>
16 #include <cmath>
17 #include <omp.h>
18 
19 #include <unordered_set>
20 #include <queue>
21 
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 #include <stdint.h>
26 
27 #include <immintrin.h>
28 
29 #include "utils.h"
30 #include "Heap.h"
31 #include "FaissAssert.h"
32 #include "IndexFlat.h"
33 #include "IndexIVFPQ.h"
34 
35 
36 extern "C" {
37 
38 /* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
39 
40 int sgemm_ (const char *transa, const char *transb, FINTEGER *m, FINTEGER *
41  n, FINTEGER *k, const float *alpha, const float *a,
42  FINTEGER *lda, const float *b, FINTEGER *
43  ldb, float *beta, float *c, FINTEGER *ldc);
44 
45 }
46 
47 namespace faiss {
48 
49 /**************************************************************
50  * Auxiliary structures
51  **************************************************************/
52 
53 /// set implementation optimized for fast access.
54 struct VisitedTable {
55  std::vector<uint8_t> visited;
56  int visno;
57 
58  VisitedTable(int size):
59  visited(size), visno(1)
60  {}
61  /// set flog #no to true
62  void set(int no) {
63  visited[no] = visno;
64  }
65  /// get flag #no
66  bool get(int no) const {
67  return visited[no] == visno;
68  }
69  /// reset all flags to false
70  void advance() {
71  visno++;
72  if (visno == 250) {
73  // 250 rather than 255 because sometimes we use visno and visno+1
74  memset (visited.data(), 0, sizeof(visited[0]) * visited.size());
75  visno = 1;
76  }
77  }
78 };
79 
80 
81 namespace {
82 
83 typedef HNSW::idx_t idx_t;
84 typedef HNSW::storage_idx_t storage_idx_t;
85 typedef HNSW::DistanceComputer DistanceComputer;
86  // typedef ::faiss::VisitedTable VisitedTable;
87 
88 /// to sort pairs of (id, distance) from nearest to fathest or the reverse
89 struct NodeDistCloser {
90  float d;
91  int id;
92  NodeDistCloser(float d, int id): d(d), id(id) {}
93  bool operator<(const NodeDistCloser &obj1) const { return d < obj1.d; }
94 };
95 
96 struct NodeDistFarther {
97  float d;
98  int id;
99  NodeDistFarther(float d, int id): d(d), id(id) {}
100  bool operator<(const NodeDistFarther &obj1) const { return d > obj1.d; }
101 };
102 
103 
104 /** Heap structure that allows fast */
105 
106 struct MinimaxHeap {
107  int n;
108  int k;
109  int nvalid;
110 
111  std::vector<storage_idx_t> ids;
112  std::vector<float> dis;
114 
115  explicit MinimaxHeap(int n): n(n), k(0), nvalid(0), ids(n), dis(n) {}
116 
117  void push(storage_idx_t i, float v)
118  {
119  if (k == n) {
120  if (v >= dis[0]) return;
121  faiss::heap_pop<HC> (k--, dis.data(), ids.data());
122  nvalid--;
123  }
124  faiss::heap_push<HC> (++k, dis.data(), ids.data(), v, i);
125  nvalid++;
126  }
127 
128  float max() const
129  {
130  return dis[0];
131  }
132 
133  int size() const {return nvalid;}
134 
135  void clear() {nvalid = k = 0; }
136 
137  int pop_min(float *vmin_out = nullptr)
138  {
139  assert(k > 0);
140  // returns min. This is an O(n) operation
141  int i = k - 1;
142  while (i >= 0) {
143  if (ids[i] != -1) break;
144  i--;
145  }
146  if (i == -1) return -1;
147  int imin = i;
148  float vmin = dis[i];
149  i--;
150  while(i >= 0) {
151  if (ids[i] != -1 && dis[i] < vmin) {
152  vmin = dis[i];
153  imin = i;
154  }
155  i--;
156  }
157  if (vmin_out) *vmin_out = vmin;
158  int ret = ids[imin];
159  ids[imin] = -1;
160  nvalid --;
161  return ret;
162  }
163 
164  int count_below(float thresh) {
165  float n_below = 0;
166  for(int i = 0; i < k; i++) {
167  if (dis[i] < thresh)
168  n_below++;
169  }
170  return n_below;
171  }
172 
173 };
174 
175 
176 /**************************************************************
177  * Addition subroutines
178  **************************************************************/
179 
180 /** Enumerate vertices from farthest to nearest from query, keep a
181  * neighbor only if there is no previous neighbor that is closer to
182  * that vertex than the query.
183  */
184 void shrink_neighbor_list(DistanceComputer & qdis,
185  std::priority_queue<NodeDistFarther> &input,
186  std::vector<NodeDistFarther> &output,
187  int max_size)
188 {
189  while (input.size() > 0) {
190  NodeDistFarther v1 = input.top();
191  input.pop();
192  float dist_v1_q = v1.d;
193 
194  bool good = true;
195  for (NodeDistFarther v2 : output) {
196  float dist_v1_v2 = qdis.symmetric_dis(v2.id, v1.id);
197 
198  if (dist_v1_v2 < dist_v1_q) {
199  good = false;
200  break;
201  }
202  }
203  if (good) {
204  output.push_back(v1);
205  if (output.size() >= max_size)
206  return;
207  }
208  }
209 }
210 
211 
212 
213 
214 /// remove neighbors from the list to make it smaller than max_size
215 void shrink_neighbor_list(DistanceComputer & qdis,
216  std::priority_queue<NodeDistCloser> &resultSet1,
217  int max_size)
218 {
219 
220  if (resultSet1.size() < max_size) {
221  return;
222  }
223  std::priority_queue<NodeDistFarther> resultSet;
224  std::vector<NodeDistFarther> returnlist;
225 
226  while (resultSet1.size() > 0) {
227  resultSet.emplace(resultSet1.top().d, resultSet1.top().id);
228  resultSet1.pop();
229  }
230 
231  shrink_neighbor_list (qdis, resultSet, returnlist, max_size);
232 
233  for (NodeDistFarther curen2 : returnlist) {
234  resultSet1.emplace(curen2.d, curen2.id);
235  }
236 
237 }
238 
239 
240 /// add a link between two elements, possibly shrinking the list
241 /// of links to make room for it.
242 void add_link(HNSW & hnsw,
243  DistanceComputer & qdis,
244  storage_idx_t src, storage_idx_t dest,
245  int level)
246 {
247  size_t begin, end;
248  hnsw.neighbor_range(src, level, &begin, &end);
249  if (hnsw.neighbors[end - 1] == -1) {
250  // there is enough room, find a slot to add it
251  size_t i = end;
252  while(i > begin) {
253  if (hnsw.neighbors[i - 1] != -1) break;
254  i--;
255  }
256  hnsw.neighbors[i] = dest;
257  return;
258  }
259 
260  // otherwise we let them fight out which to keep
261 
262  // copy to resultSet...
263  std::priority_queue<NodeDistCloser> resultSet;
264  resultSet.emplace(qdis.symmetric_dis(src, dest), dest);
265  for (size_t i = begin; i < end; i++) { // HERE WAS THE BUG
266  storage_idx_t neigh = hnsw.neighbors[i];
267  resultSet.emplace(qdis.symmetric_dis(src, neigh), neigh);
268  }
269 
270  shrink_neighbor_list(qdis, resultSet, end - begin);
271 
272  // ...and back
273  size_t i = begin;
274  while (resultSet.size()) {
275  hnsw.neighbors[i++] = resultSet.top().id;
276  resultSet.pop();
277  }
278  // they may have shrunk more than just by 1 element
279  while(i < end) {
280  hnsw.neighbors[i++] = -1;
281  }
282 }
283 
284 /// search neighbors on a single level, starting from an entry point
285 void search_neighbors_to_add(HNSW & hnsw,
286  DistanceComputer &qdis,
287  std::priority_queue<NodeDistCloser> &results,
288  int entry_point,
289  float d_entry_point,
290  int level,
291  VisitedTable &vt)
292 {
293  // top is nearest candidate
294  std::priority_queue<NodeDistFarther> candidates;
295 
296  NodeDistFarther ev(d_entry_point, entry_point);
297  candidates.push(ev);
298  results.emplace(d_entry_point, entry_point);
299  vt.set(entry_point);
300 
301  while (!candidates.empty()) {
302  // get nearest
303  const NodeDistFarther &currEv = candidates.top();
304 
305  if (currEv.d > results.top().d) {
306  break;
307  }
308  int currNode = currEv.id;
309  candidates.pop();
310 
311  // loop over neighbors
312  size_t begin, end;
313  hnsw.neighbor_range(currNode, level, &begin, &end);
314  for(size_t i = begin; i < end; i++) {
315  storage_idx_t nodeId = hnsw.neighbors[i];
316  if (nodeId < 0) break;
317  if (vt.get(nodeId)) continue;
318  vt.set(nodeId);
319 
320  float dis = qdis(nodeId);
321  NodeDistFarther evE1(dis, nodeId);
322 
323  if (results.size() < hnsw.efConstruction ||
324  results.top().d > dis) {
325 
326  results.emplace(dis, nodeId);
327  candidates.emplace(dis, nodeId);
328  if (results.size() > hnsw.efConstruction) {
329  results.pop();
330  }
331  }
332  }
333  }
334  vt.advance();
335 }
336 
337 
338 /// Finds neighbors and builds links with them, starting from an entry
339 /// point. The own neighbor list is assumed to be locked.
340 void add_links_starting_from(HNSW & hnsw,
341  DistanceComputer &ptdis,
342  storage_idx_t pt_id,
343  storage_idx_t nearest,
344  float d_nearest,
345  int level,
346  omp_lock_t * locks,
347  VisitedTable &vt)
348 {
349 
350  std::priority_queue<NodeDistCloser> link_targets;
351 
352  search_neighbors_to_add(
353  hnsw, ptdis, link_targets, nearest, d_nearest,
354  level, vt);
355 
356  // but we can afford only this many neighbors
357  int M = hnsw.nb_neighbors(level);
358 
359  shrink_neighbor_list(ptdis, link_targets, M);
360 
361  while (!link_targets.empty()) {
362  int other_id = link_targets.top().id;
363 
364  omp_set_lock(&locks[other_id]);
365  add_link(hnsw, ptdis, other_id, pt_id, level);
366  omp_unset_lock(&locks[other_id]);
367 
368  add_link(hnsw, ptdis, pt_id, other_id, level);
369 
370  link_targets.pop();
371  }
372 }
373 
374 /**************************************************************
375  * Searching subroutines
376  **************************************************************/
377 
378 /// greedily update a nearest vector at a given level
379 void greedy_update_nearest(const HNSW & hnsw,
380  DistanceComputer & qdis,
381  int level,
382  storage_idx_t & nearest,
383  float & d_nearest)
384 {
385  for(;;) {
386  storage_idx_t prev_nearest = nearest;
387 
388  size_t begin, end;
389  hnsw.neighbor_range(nearest, level, &begin, &end);
390  for(size_t i = begin; i < end; i++) {
391  storage_idx_t v = hnsw.neighbors[i];
392  if (v < 0) break;
393  float dis = qdis(v);
394  if (dis < d_nearest) {
395  nearest = v;
396  d_nearest = dis;
397  }
398  }
399  if (nearest == prev_nearest) {
400  return;
401  }
402  }
403 
404 }
405 
406 
407 /** Do a BFS on the candidates list */
408 
409 int search_from_candidates(const HNSW & hnsw,
410  DistanceComputer & qdis, int k,
411  idx_t *I, float * D,
412  MinimaxHeap &candidates,
413  VisitedTable &vt,
414  int level, int nres_in = 0)
415 {
416  int nres = nres_in;
417  int ndis = 0;
418  for (int i = 0; i < candidates.size(); i++) {
419  idx_t v1 = candidates.ids[i];
420  float d = candidates.dis[i];
421  FAISS_ASSERT(v1 >= 0);
422  if (nres < k) {
423  faiss::maxheap_push (++nres, D, I, d, v1);
424  } else if (d < D[0]) {
425  faiss::maxheap_pop (nres--, D, I);
426  faiss::maxheap_push (++nres, D, I, d, v1);
427  }
428  vt.set(v1);
429  }
430 
431  bool do_dis_check = hnsw.check_relative_distance;
432  int nstep = 0;
433 
434  while (candidates.size() > 0) {
435  float d0 = 0;
436  int v0 = candidates.pop_min(&d0);
437 
438  if (do_dis_check) {
439  // tricky stopping condition: there are more that ef
440  // distances that are processed already that are smaller
441  // than d0
442 
443  int n_dis_below = candidates.count_below(d0);
444  if(n_dis_below >= hnsw.efSearch) {
445  break;
446  }
447  }
448  size_t begin, end;
449  hnsw.neighbor_range(v0, level, &begin, &end);
450 
451  for (size_t j = begin; j < end; j++) {
452  int v1 = hnsw.neighbors[j];
453  if (v1 < 0) break;
454  if (vt.get(v1)) {
455  continue;
456  }
457  vt.set(v1);
458  ndis++;
459  float d = qdis(v1);
460  if (nres < k) {
461  faiss::maxheap_push (++nres, D, I, d, v1);
462  } else if (d < D[0]) {
463  faiss::maxheap_pop (nres--, D, I);
464  faiss::maxheap_push (++nres, D, I, d, v1);
465  }
466  candidates.push(v1, d);
467  }
468 
469  nstep++;
470  if (!do_dis_check && nstep > hnsw.efSearch) {
471  break;
472  }
473  }
474 
475  if (level == 0) {
476 #pragma omp critical
477  {
478  hnsw_stats.n1 ++;
479  if (candidates.size() == 0)
480  hnsw_stats.n2 ++;
481  hnsw_stats.n3 += ndis;
482  }
483  }
484 
485  return nres;
486 }
487 
488 
489 } // anonymous namespace
490 
491 
492 /**************************************************************
493  * HNSW structure implementation
494  **************************************************************/
495 
496 
497 int HNSW::nb_neighbors(int layer_no) const
498 {
499  return cum_nneighbor_per_level[layer_no + 1] -
500  cum_nneighbor_per_level[layer_no];
501 }
502 
503 void HNSW::set_nb_neighbors(int level_no, int n)
504 {
505  FAISS_THROW_IF_NOT(levels.size() == 0);
506  int cur_n = nb_neighbors(level_no);
507  for (int i = level_no + 1; i < cum_nneighbor_per_level.size(); i++) {
508  cum_nneighbor_per_level[i] += n - cur_n;
509  }
510 }
511 
512 int HNSW::cum_nb_neighbors(int layer_no) const
513 {
514  return cum_nneighbor_per_level[layer_no];
515 }
516 
517 void HNSW::neighbor_range(idx_t no, int layer_no,
518  size_t * begin, size_t * end) const
519 {
520  size_t o = offsets[no];
521  *begin = o + cum_nb_neighbors(layer_no);
522  *end = o + cum_nb_neighbors(layer_no + 1);
523 }
524 
525 
526 
527 HNSW::HNSW(int M): rng(12345) {
528  set_default_probas(M, 1.0 / log(M));
529  max_level = -1;
530  entry_point = -1;
531  efSearch = 16;
533  efConstruction = 40;
534  upper_beam = 1;
535  offsets.push_back(0);
536 }
537 
538 
540 {
541  double f = rng.rand_float();
542  // could be a bit faster with bissection
543  for (int level = 0; level < assign_probas.size(); level++) {
544  if (f < assign_probas[level]) {
545  return level;
546  }
547  f -= assign_probas[level];
548  }
549  // happens with exponentially low probability
550  return assign_probas.size() - 1;
551 }
552 
553 void HNSW::set_default_probas(int M, float levelMult)
554 {
555  int nn = 0;
556  cum_nneighbor_per_level.push_back (0);
557  for (int level = 0; ;level++) {
558  float proba = exp(-level / levelMult) * (1 - exp(-1 / levelMult));
559  if (proba < 1e-9) break;
560  assign_probas.push_back(proba);
561  nn += level == 0 ? M * 2 : M;
562  cum_nneighbor_per_level.push_back (nn);
563  }
564 }
565 
566 void HNSW::clear_neighbor_tables(int level)
567 {
568  for (int i = 0; i < levels.size(); i++) {
569  size_t begin, end;
570  neighbor_range(i, level, &begin, &end);
571  for (size_t j = begin; j < end; j++)
572  neighbors[j] = -1;
573  }
574 }
575 
576 
577 void HNSW::reset() {
578  max_level = -1;
579  entry_point = -1;
580  offsets.clear();
581  offsets.push_back(0);
582  levels.clear();
583  neighbors.clear();
584 }
585 
586 
587 
588 void HNSW::print_neighbor_stats(int level) const
589 {
590  FAISS_THROW_IF_NOT (level < cum_nneighbor_per_level.size());
591  printf("stats on level %d, max %d neighbors per vertex:\n",
592  level, nb_neighbors(level));
593  size_t tot_neigh = 0, tot_common = 0, tot_reciprocal = 0, n_node = 0;
594 #pragma omp parallel for reduction(+: tot_neigh) reduction(+: tot_common) \
595  reduction(+: tot_reciprocal) reduction(+: n_node)
596  for (int i = 0; i < levels.size(); i++) {
597  if (levels[i] > level) {
598  n_node++;
599  size_t begin, end;
600  neighbor_range(i, level, &begin, &end);
601  std::unordered_set<int> neighset;
602  for (size_t j = begin; j < end; j++) {
603  if (neighbors [j] < 0) break;
604  neighset.insert(neighbors[j]);
605  }
606  int n_neigh = neighset.size();
607  int n_common = 0;
608  int n_reciprocal = 0;
609  for (size_t j = begin; j < end; j++) {
610  storage_idx_t i2 = neighbors[j];
611  if (i2 < 0) break;
612  FAISS_ASSERT(i2 != i);
613  size_t begin2, end2;
614  neighbor_range(i2, level, &begin2, &end2);
615  for (size_t j2 = begin2; j2 < end2; j2++) {
616  storage_idx_t i3 = neighbors[j2];
617  if (i3 < 0) break;
618  if (i3 == i) {
619  n_reciprocal++;
620  continue;
621  }
622  if (neighset.count(i3)) {
623  neighset.erase(i3);
624  n_common++;
625  }
626  }
627  }
628  tot_neigh += n_neigh;
629  tot_common += n_common;
630  tot_reciprocal += n_reciprocal;
631  }
632  }
633  float normalizer = n_node;
634  printf(" nb of nodes at that level %ld\n", n_node);
635  printf(" neighbors per node: %.2f (%ld)\n", tot_neigh / normalizer, tot_neigh);
636  printf(" nb of reciprocal neighbors: %.2f\n", tot_reciprocal / normalizer);
637  printf(" nb of neighbors that are also neighbor-of-neighbors: %.2f (%ld)\n",
638  tot_common / normalizer, tot_common);
639 
640 
641 
642 }
643 
644 HNSWStats hnsw_stats;
645 
646 void HNSWStats::reset ()
647 {
648  memset(this, 0, sizeof(*this));
649 }
650 
651 
652 
653 /**************************************************************
654  * Building, parallel
655  **************************************************************/
656 
658  DistanceComputer & ptdis, int pt_level, int pt_id,
659  std::vector<omp_lock_t> & locks,
660  VisitedTable &vt)
661 {
662  // greedy search on upper levels
663 
664  storage_idx_t nearest;
665 #pragma omp critical
666  {
667  nearest = entry_point;
668 
669  if (nearest == -1) {
670  max_level = pt_level;
671  entry_point = pt_id;
672  }
673  }
674 
675  if (nearest < 0) {
676  return;
677  }
678 
679  omp_set_lock(&locks[pt_id]);
680 
681  int level = max_level; // level at which we start adding neighbors
682  float d_nearest = ptdis(nearest);
683 
684  for(; level > pt_level; level--) {
685  greedy_update_nearest(*this, ptdis, level, nearest, d_nearest);
686  }
687 
688  for(; level >= 0; level--) {
689  add_links_starting_from(*this, ptdis, pt_id, nearest, d_nearest,
690  level, locks.data(), vt);
691  }
692 
693  omp_unset_lock(&locks[pt_id]);
694 
695  if (pt_level > max_level) {
696  max_level = pt_level;
697  entry_point = pt_id;
698  }
699 
700 }
701 
702 
703 
704 /**************************************************************
705  * Searching
706  **************************************************************/
707 
708 
709 
710 
711 void HNSW::search(DistanceComputer & qdis,
712  int k, idx_t *I, float * D,
713  VisitedTable &vt) const
714 {
715 
716  if (upper_beam == 1) {
717 
718  // greedy search on upper levels
719  storage_idx_t nearest = entry_point;
720  float d_nearest = qdis(nearest);
721 
722  for(int level = max_level; level >= 1; level--) {
723  greedy_update_nearest(*this, qdis, level, nearest, d_nearest);
724  }
725 
726  int candidates_size = std::max(efSearch, k);
727  MinimaxHeap candidates(candidates_size);
728 
729  candidates.push(nearest, d_nearest);
730 
731  search_from_candidates (
732  *this, qdis, k, I, D, candidates, vt, 0);
733  vt.advance();
734 
735  } else {
736 
737  int candidates_size = upper_beam;
738  MinimaxHeap candidates(candidates_size);
739 
740  std::vector<idx_t> I_to_next(candidates_size);
741  std::vector<float> D_to_next(candidates_size);
742 
743  int nres = 1;
744  I_to_next[0] = entry_point;
745  D_to_next[0] = qdis(entry_point);
746 
747  for(int level = max_level; level >= 0; level--) {
748 
749  // copy I, D -> candidates
750 
751  candidates.clear();
752 
753  for (int i = 0; i < nres; i++) {
754  candidates.push(I_to_next[i], D_to_next[i]);
755  }
756 
757  if (level == 0) {
758  nres = search_from_candidates (
759  *this, qdis, k, I, D, candidates, vt, 0);
760  } else {
761  nres = search_from_candidates (
762  *this, qdis, candidates_size,
763  I_to_next.data(), D_to_next.data(),
764  candidates, vt, level);
765  }
766  vt.advance();
767  }
768 
769  }
770 }
771 
772 /**************************************************************
773  * add / search blocks of descriptors
774  **************************************************************/
775 
776 namespace {
777 
778 int prepare_level_tab (HNSW & hnsw, size_t n, bool preset_levels = false)
779 {
780  size_t n0 = hnsw.offsets.size() - 1;
781 
782  if (preset_levels) {
783  FAISS_ASSERT (n0 + n == hnsw.levels.size());
784  } else {
785  FAISS_ASSERT (n0 == hnsw.levels.size());
786  for (int i = 0; i < n; i++) {
787  int pt_level = hnsw.random_level();
788  hnsw.levels.push_back(pt_level + 1);
789  }
790  }
791 
792  int max_level = 0;
793  for (int i = 0; i < n; i++) {
794  int pt_level = hnsw.levels[i + n0] - 1;
795  if (pt_level > max_level) max_level = pt_level;
796  hnsw.offsets.push_back(hnsw.offsets.back() +
797  hnsw.cum_nb_neighbors(pt_level + 1));
798  hnsw.neighbors.resize(hnsw.offsets.back(), -1);
799  }
800  return max_level;
801 }
802 
803 void hnsw_add_vertices(IndexHNSW &index_hnsw,
804  size_t n0,
805  size_t n, const float *x,
806  bool verbose,
807  bool preset_levels = false) {
808  HNSW & hnsw = index_hnsw.hnsw;
809  size_t ntotal = n0 + n;
810  double t0 = getmillisecs();
811  if (verbose) {
812  printf("hnsw_add_vertices: adding %ld elements on top of %ld "
813  "(preset_levels=%d)\n",
814  n, n0, int(preset_levels));
815  }
816 
817  int max_level = prepare_level_tab (index_hnsw.hnsw, n, preset_levels);
818 
819  if (verbose) {
820  printf(" max_level = %d\n", max_level);
821  }
822 
823  std::vector<omp_lock_t> locks(ntotal);
824  for(int i = 0; i < ntotal; i++)
825  omp_init_lock(&locks[i]);
826 
827  // add vectors from highest to lowest level
828  std::vector<int> hist;
829  std::vector<int> order(n);
830 
831  { // make buckets with vectors of the same level
832 
833  // build histogram
834  for (int i = 0; i < n; i++) {
835  storage_idx_t pt_id = i + n0;
836  int pt_level = hnsw.levels[pt_id] - 1;
837  while (pt_level >= hist.size())
838  hist.push_back(0);
839  hist[pt_level] ++;
840  }
841 
842  // accumulate
843  std::vector<int> offsets(hist.size() + 1, 0);
844  for (int i = 0; i < hist.size() - 1; i++) {
845  offsets[i + 1] = offsets[i] + hist[i];
846  }
847 
848  // bucket sort
849  for (int i = 0; i < n; i++) {
850  storage_idx_t pt_id = i + n0;
851  int pt_level = hnsw.levels[pt_id] - 1;
852  order[offsets[pt_level]++] = pt_id;
853  }
854  }
855 
856  { // perform add
857  RandomGenerator rng2(789);
858 
859  int i1 = n;
860 
861  for (int pt_level = hist.size() - 1; pt_level >= 0; pt_level--) {
862  int i0 = i1 - hist[pt_level];
863 
864  if (verbose) {
865  printf("Adding %d elements at level %d\n",
866  i1 - i0, pt_level);
867  }
868 
869  // random permutation to get rid of dataset order bias
870  for (int j = i0; j < i1; j++)
871  std::swap(order[j], order[j + rng2.rand_int(i1 - j)]);
872 
873 #pragma omp parallel
874  {
875  VisitedTable vt (ntotal);
876 
877  DistanceComputer *dis = index_hnsw.get_distance_computer();
878  ScopeDeleter1<DistanceComputer> del(dis);
879  int prev_display = verbose && omp_get_thread_num() == 0 ? 0 : -1;
880 
881 #pragma omp for schedule(dynamic)
882  for (int i = i0; i < i1; i++) {
883  storage_idx_t pt_id = order[i];
884  dis->set_query (x + (pt_id - n0) * dis->d);
885 
886  hnsw.add_with_locks (
887  *dis, pt_level, pt_id, locks,
888  vt);
889 
890  if (prev_display >= 0 && i - i0 > prev_display + 10000) {
891  prev_display = i - i0;
892  printf(" %d / %d\r", i - i0, i1 - i0);
893  fflush(stdout);
894  }
895  }
896  }
897  i1 = i0;
898  }
899  FAISS_ASSERT(i1 == 0);
900  }
901  if (verbose)
902  printf("Done in %.3f ms\n", getmillisecs() - t0);
903 
904  for(int i = 0; i < ntotal; i++)
905  omp_destroy_lock(&locks[i]);
906 
907 }
908 
909 
910 } // anonymous namespace
911 
912 
914 {
915  int max_level = prepare_level_tab (*this, n);
916  RandomGenerator rng2(456);
917 
918  for (int level = max_level - 1; level >= 0; level++) {
919  std::vector<int> elts;
920  for (int i = 0; i < n; i++) {
921  if (levels[i] > level) {
922  elts.push_back(i);
923  }
924  }
925  printf ("linking %ld elements in level %d\n",
926  elts.size(), level);
927 
928  if (elts.size() == 1) continue;
929 
930  for (int ii = 0; ii < elts.size(); ii++) {
931  int i = elts[ii];
932  size_t begin, end;
933  neighbor_range(i, 0, &begin, &end);
934  for (size_t j = begin; j < end; j++) {
935  int other = 0;
936  do {
937  other = elts[rng2.rand_int(elts.size())];
938  } while(other == i);
939 
940  neighbors[j] = other;
941  }
942 
943  }
944 
945  }
946 
947 }
948 
949 
950 
951 
952 /**************************************************************
953  * IndexHNSW implementation
954  **************************************************************/
955 
956 IndexHNSW::IndexHNSW(int d, int M):
957  Index(d, METRIC_L2),
958  hnsw(M),
959  own_fields(false),
960  storage(nullptr),
961  reconstruct_from_neighbors(nullptr)
962 {}
963 
964 IndexHNSW::IndexHNSW(Index *storage, int M):
965  Index(storage->d, METRIC_L2),
966  hnsw(M),
967  own_fields(false),
968  storage(storage),
969  reconstruct_from_neighbors(nullptr)
970 {}
971 
972 IndexHNSW::~IndexHNSW() {
973  if (own_fields) {
974  delete storage;
975  }
976 }
977 
978 void IndexHNSW::train(idx_t n, const float* x)
979 {
980  // hnsw structure does not require training
981  storage->train (n, x);
982  is_trained = true;
983 }
984 
985 void IndexHNSW::search (idx_t n, const float *x, idx_t k,
986  float *distances, idx_t *labels) const
987 
988 {
989 
990 #pragma omp parallel
991  {
992  VisitedTable vt (ntotal);
993  DistanceComputer *dis = get_distance_computer();
995  size_t nreorder = 0;
996 
997 #pragma omp for
998  for(int i = 0; i < n; i++) {
999  idx_t * idxi = labels + i * k;
1000  float * simi = distances + i * k;
1001  dis->set_query(x + i * d);
1002 
1003  maxheap_heapify (k, simi, idxi);
1004  hnsw.search (*dis, k, idxi, simi, vt);
1005 
1006  maxheap_reorder (k, simi, idxi);
1007 
1008  if (reconstruct_from_neighbors &&
1009  reconstruct_from_neighbors->k_reorder != 0) {
1010  int k_reorder = reconstruct_from_neighbors->k_reorder;
1011  if (k_reorder == -1 || k_reorder > k) k_reorder = k;
1012 
1013  nreorder += reconstruct_from_neighbors->compute_distances(
1014  k_reorder, idxi, x + i * d, simi);
1015 
1016  // sort top k_reorder
1017  maxheap_heapify (k_reorder, simi, idxi, simi, idxi, k_reorder);
1018  maxheap_reorder (k_reorder, simi, idxi);
1019  }
1020  }
1021 #pragma omp critical
1022  {
1023  hnsw_stats.nreorder += nreorder;
1024  }
1025  }
1026 
1027 
1028 }
1029 
1030 
1031 void IndexHNSW::add(idx_t n, const float *x)
1032 {
1033  FAISS_THROW_IF_NOT(is_trained);
1034  int n0 = ntotal;
1035  storage->add(n, x);
1036  ntotal = storage->ntotal;
1037 
1038  hnsw_add_vertices (*this, n0, n, x, verbose,
1039  hnsw.levels.size() == ntotal);
1040 
1041 }
1042 
1044 {
1045  hnsw.reset();
1046  storage->reset();
1047  ntotal = 0;
1048 }
1049 
1050 void IndexHNSW::reconstruct (idx_t key, float* recons) const
1051 {
1052  storage->reconstruct(key, recons);
1053 }
1054 
1055 void IndexHNSW::shrink_level_0_neighbors(int new_size)
1056 {
1057 #pragma omp parallel
1058  {
1059  DistanceComputer *dis = get_distance_computer();
1061 
1062 #pragma omp for
1063  for (idx_t i = 0; i < ntotal; i++) {
1064 
1065  size_t begin, end;
1066  hnsw.neighbor_range(i, 0, &begin, &end);
1067 
1068  std::priority_queue<NodeDistFarther> initial_list;
1069 
1070  for (size_t j = begin; j < end; j++) {
1071  int v1 = hnsw.neighbors[j];
1072  if (v1 < 0) break;
1073  initial_list.emplace(dis->symmetric_dis(i, v1), v1);
1074 
1075  // initial_list.emplace(qdis(v1), v1);
1076  }
1077 
1078  std::vector<NodeDistFarther> shrunk_list;
1079  shrink_neighbor_list (*dis, initial_list, shrunk_list, new_size);
1080 
1081  for (size_t j = begin; j < end; j++) {
1082  if (j - begin < shrunk_list.size())
1083  hnsw.neighbors[j] = shrunk_list[j - begin].id;
1084  else
1085  hnsw.neighbors[j] = -1;
1086  }
1087  }
1088  }
1089 
1090 }
1091 
1093  idx_t n, const float *x, idx_t k,
1094  const storage_idx_t *nearest, const float *nearest_d,
1095  float *distances, idx_t *labels, int nprobe,
1096  int search_type) const
1097 {
1098 
1099  storage_idx_t ntotal = hnsw.levels.size();
1100 #pragma omp parallel
1101  {
1102  DistanceComputer *qdis = get_distance_computer();
1104 
1105  VisitedTable vt (ntotal);
1106 
1107 #pragma omp for
1108  for(idx_t i = 0; i < n; i++) {
1109  idx_t * idxi = labels + i * k;
1110  float * simi = distances + i * k;
1111 
1112  qdis->set_query(x + i * d);
1113  maxheap_heapify (k, simi, idxi);
1114 
1115  if (search_type == 1) {
1116 
1117  int nres = 0;
1118 
1119  for(int j = 0; j < nprobe; j++) {
1120  storage_idx_t cj = nearest[i * nprobe + j];
1121 
1122  if (cj < 0) break;
1123 
1124  if (vt.get(cj)) continue;
1125 
1126  int candidates_size = std::max(hnsw.efSearch, int(k));
1127  MinimaxHeap candidates(candidates_size);
1128 
1129  candidates.push(cj, nearest_d[i * nprobe + j]);
1130 
1131  nres = search_from_candidates (
1132  hnsw, *qdis, k, idxi, simi,
1133  candidates, vt, 0, nres);
1134  }
1135  } else if (search_type == 2) {
1136 
1137  int candidates_size = std::max(hnsw.efSearch, int(k));
1138  candidates_size = std::max(candidates_size, nprobe);
1139 
1140  MinimaxHeap candidates(candidates_size);
1141  for(int j = 0; j < nprobe; j++) {
1142  storage_idx_t cj = nearest[i * nprobe + j];
1143 
1144  if (cj < 0) break;
1145  candidates.push(cj, nearest_d[i * nprobe + j]);
1146  }
1147  search_from_candidates (
1148  hnsw, *qdis, k, idxi, simi,
1149  candidates, vt, 0);
1150 
1151  }
1152  vt.advance();
1153 
1154  maxheap_reorder (k, simi, idxi);
1155 
1156  }
1157  }
1158 
1159 
1160 }
1161 
1163  int k, const float *D, const idx_t *I)
1164 {
1165  int dest_size = hnsw.nb_neighbors (0);
1166 
1167 #pragma omp parallel for
1168  for (idx_t i = 0; i < ntotal; i++) {
1169  DistanceComputer *qdis = get_distance_computer();
1170  float vec[d];
1171  storage->reconstruct(i, vec);
1172  qdis->set_query(vec);
1173 
1174  std::priority_queue<NodeDistFarther> initial_list;
1175 
1176  for (size_t j = 0; j < k; j++) {
1177  int v1 = I[i * k + j];
1178  if (v1 == i) continue;
1179  if (v1 < 0) break;
1180  initial_list.emplace(D[i * k + j], v1);
1181  }
1182 
1183  std::vector<NodeDistFarther> shrunk_list;
1184  shrink_neighbor_list (*qdis, initial_list, shrunk_list, dest_size);
1185 
1186  size_t begin, end;
1187  hnsw.neighbor_range(i, 0, &begin, &end);
1188 
1189  for (size_t j = begin; j < end; j++) {
1190  if (j - begin < shrunk_list.size())
1191  hnsw.neighbors[j] = shrunk_list[j - begin].id;
1192  else
1193  hnsw.neighbors[j] = -1;
1194  }
1195  }
1196 }
1197 
1198 
1199 
1201  int n, const storage_idx_t *points,
1202  const storage_idx_t *nearests)
1203 {
1204 
1205  std::vector<omp_lock_t> locks(ntotal);
1206  for(int i = 0; i < ntotal; i++)
1207  omp_init_lock(&locks[i]);
1208 
1209 #pragma omp parallel
1210  {
1211  VisitedTable vt (ntotal);
1212 
1213  DistanceComputer *dis = get_distance_computer();
1215  float vec[storage->d];
1216 
1217 #pragma omp for schedule(dynamic)
1218  for (int i = 0; i < n; i++) {
1219  storage_idx_t pt_id = points[i];
1220  storage_idx_t nearest = nearests[i];
1221  storage->reconstruct (pt_id, vec);
1222  dis->set_query (vec);
1223 
1224  add_links_starting_from(hnsw, *dis, pt_id, nearest, (*dis)(nearest),
1225  0, locks.data(), vt);
1226 
1227  if (verbose && i % 10000 == 0) {
1228  printf(" %d / %d\r", i, n);
1229  fflush(stdout);
1230  }
1231  }
1232  }
1233  if (verbose) {
1234  printf("\n");
1235  }
1236 
1237  for(int i = 0; i < ntotal; i++)
1238  omp_destroy_lock(&locks[i]);
1239 }
1240 
1241 void IndexHNSW::reorder_links()
1242 {
1243  int M = hnsw.nb_neighbors(0);
1244 
1245 #pragma omp parallel
1246  {
1247  std::vector<float> distances (M);
1248  std::vector<size_t> order (M);
1249  std::vector<storage_idx_t> tmp (M);
1250  DistanceComputer *dis = get_distance_computer();
1252 
1253 #pragma omp for
1254  for(storage_idx_t i = 0; i < ntotal; i++) {
1255 
1256  size_t begin, end;
1257  hnsw.neighbor_range(i, 0, &begin, &end);
1258 
1259  for (size_t j = begin; j < end; j++) {
1260  storage_idx_t nj = hnsw.neighbors[j];
1261  if (nj < 0) {
1262  end = j;
1263  break;
1264  }
1265  distances[j - begin] = dis->symmetric_dis(i, nj);
1266  tmp [j - begin] = nj;
1267  }
1268 
1269  fvec_argsort (end - begin, distances.data(), order.data());
1270  for (size_t j = begin; j < end; j++) {
1271  hnsw.neighbors[j] = tmp[order[j - begin]];
1272  }
1273  }
1274 
1275  }
1276 }
1277 
1278 
1279 void IndexHNSW::link_singletons()
1280 {
1281  printf("search for singletons\n");
1282 
1283  std::vector<bool> seen(ntotal);
1284 
1285  for (size_t i = 0; i < ntotal; i++) {
1286  size_t begin, end;
1287  hnsw.neighbor_range(i, 0, &begin, &end);
1288  for (size_t j = begin; j < end; j++) {
1289  storage_idx_t ni = hnsw.neighbors[j];
1290  if (ni >= 0) seen[ni] = true;
1291  }
1292  }
1293 
1294  int n_sing = 0, n_sing_l1 = 0;
1295  std::vector<storage_idx_t> singletons;
1296  for (storage_idx_t i = 0; i < ntotal; i++) {
1297  if (!seen[i]) {
1298  singletons.push_back(i);
1299  n_sing++;
1300  if (hnsw.levels[i] > 1)
1301  n_sing_l1++;
1302  }
1303  }
1304 
1305  printf(" Found %d / %ld singletons (%d appear in a level above)\n",
1306  n_sing, ntotal, n_sing_l1);
1307 
1308  std::vector<float>recons(singletons.size() * d);
1309  for (int i = 0; i < singletons.size(); i++) {
1310 
1311  FAISS_ASSERT(!"not implemented");
1312 
1313  }
1314 
1315 
1316 }
1317 
1318 
1319 
1320 
1321 // storage that explicitly reconstructs vectors before computing distances
1323 
1324  const Index & storage;
1325  std::vector<float> buf;
1326  const float *q;
1327 
1328  GenericDistanceComputer(const Index & storage): storage(storage)
1329  {
1330  d = storage.d;
1331  buf.resize(d * 2);
1332  }
1333 
1334  float operator () (storage_idx_t i) override
1335  {
1336  storage.reconstruct(i, buf.data());
1337  return fvec_L2sqr(q, buf.data(), d);
1338  }
1339 
1340  float symmetric_dis(storage_idx_t i, storage_idx_t j) override
1341  {
1342  storage.reconstruct(i, buf.data());
1343  storage.reconstruct(j, buf.data() + d);
1344  return fvec_L2sqr(buf.data() + d, buf.data(), d);
1345  }
1346 
1347  void set_query(const float *x) override {
1348  q = x;
1349  }
1350 
1351 
1352 };
1353 
1354 HNSW::DistanceComputer * IndexHNSW::get_distance_computer () const
1355 {
1356  return new GenericDistanceComputer (*storage);
1357 }
1358 
1359 
1360 /**************************************************************
1361  * ReconstructFromNeighbors implementation
1362  **************************************************************/
1363 
1364 
1365 ReconstructFromNeighbors::ReconstructFromNeighbors(
1366  const IndexHNSW & index, size_t k, size_t nsq):
1367  index(index), k(k), nsq(nsq) {
1368  M = index.hnsw.nb_neighbors(0);
1369  FAISS_ASSERT(k <= 256);
1370  code_size = k == 1 ? 0 : nsq;
1371  ntotal = 0;
1372  d = index.d;
1373  FAISS_ASSERT(d % nsq == 0);
1374  dsub = d / nsq;
1375  k_reorder = -1;
1376 }
1377 
1378 void ReconstructFromNeighbors::reconstruct(storage_idx_t i, float *x, float *tmp) const
1379 {
1380 
1381 
1382  const HNSW & hnsw = index.hnsw;
1383  size_t begin, end;
1384  hnsw.neighbor_range(i, 0, &begin, &end);
1385 
1386  if (k == 1 || nsq == 1) {
1387  const float * beta;
1388  if (k == 1) {
1389  beta = codebook.data();
1390  } else {
1391  int idx = codes[i];
1392  beta = codebook.data() + idx * (M + 1);
1393  }
1394 
1395  float w0 = beta[0]; // weight of image itself
1396  index.storage->reconstruct(i, tmp);
1397 
1398  for (int l = 0; l < d; l++)
1399  x[l] = w0 * tmp[l];
1400 
1401  for (size_t j = begin; j < end; j++) {
1402 
1403  storage_idx_t ji = hnsw.neighbors[j];
1404  if (ji < 0) ji = i;
1405  float w = beta[j - begin + 1];
1406  index.storage->reconstruct(ji, tmp);
1407  for (int l = 0; l < d; l++)
1408  x[l] += w * tmp[l];
1409  }
1410  } else if (nsq == 2) {
1411  int idx0 = codes[2 * i];
1412  int idx1 = codes[2 * i + 1];
1413 
1414  const float *beta0 = codebook.data() + idx0 * (M + 1);
1415  const float *beta1 = codebook.data() + (idx1 + k) * (M + 1);
1416 
1417  index.storage->reconstruct(i, tmp);
1418 
1419  float w0;
1420 
1421  w0 = beta0[0];
1422  for (int l = 0; l < dsub; l++)
1423  x[l] = w0 * tmp[l];
1424 
1425  w0 = beta1[0];
1426  for (int l = dsub; l < d; l++)
1427  x[l] = w0 * tmp[l];
1428 
1429  for (size_t j = begin; j < end; j++) {
1430  storage_idx_t ji = hnsw.neighbors[j];
1431  if (ji < 0) ji = i;
1432  index.storage->reconstruct(ji, tmp);
1433  float w;
1434  w = beta0[j - begin + 1];
1435  for (int l = 0; l < dsub; l++)
1436  x[l] += w * tmp[l];
1437 
1438  w = beta1[j - begin + 1];
1439  for (int l = dsub; l < d; l++)
1440  x[l] += w * tmp[l];
1441  }
1442  } else {
1443  const float *betas[nsq];
1444  {
1445  const float *b = codebook.data();
1446  const uint8_t *c = &codes[i * code_size];
1447  for (int sq = 0; sq < nsq; sq++) {
1448  betas[sq] = b + (*c++) * (M + 1);
1449  b += (M + 1) * k;
1450  }
1451  }
1452 
1453  index.storage->reconstruct(i, tmp);
1454  {
1455  int d0 = 0;
1456  for (int sq = 0; sq < nsq; sq++) {
1457  float w = *(betas[sq]++);
1458  int d1 = d0 + dsub;
1459  for (int l = d0; l < d1; l++) {
1460  x[l] = w * tmp[l];
1461  }
1462  d0 = d1;
1463  }
1464  }
1465 
1466  for (size_t j = begin; j < end; j++) {
1467  storage_idx_t ji = hnsw.neighbors[j];
1468  if (ji < 0) ji = i;
1469 
1470  index.storage->reconstruct(ji, tmp);
1471  int d0 = 0;
1472  for (int sq = 0; sq < nsq; sq++) {
1473  float w = *(betas[sq]++);
1474  int d1 = d0 + dsub;
1475  for (int l = d0; l < d1; l++) {
1476  x[l] += w * tmp[l];
1477  }
1478  d0 = d1;
1479  }
1480  }
1481  }
1482 }
1483 
1484 void ReconstructFromNeighbors::reconstruct_n(storage_idx_t n0,
1485  storage_idx_t ni,
1486  float *x) const
1487 {
1488 #pragma omp parallel
1489  {
1490  std::vector<float> tmp(index.d);
1491 #pragma omp for
1492  for (storage_idx_t i = 0; i < ni; i++) {
1493  reconstruct(n0 + i, x + i * index.d, tmp.data());
1494  }
1495  }
1496 }
1497 
1498 size_t ReconstructFromNeighbors::compute_distances(size_t n, const idx_t *shortlist,
1499  const float *query, float *distances) const
1500 {
1501  std::vector<float> tmp(2 * index.d);
1502  size_t ncomp = 0;
1503  for (int i = 0; i < n; i++) {
1504  if (shortlist[i] < 0) break;
1505  reconstruct(shortlist[i], tmp.data(), tmp.data() + index.d);
1506  distances[i] = fvec_L2sqr(query, tmp.data(), index.d);
1507  ncomp++;
1508  }
1509  return ncomp;
1510 }
1511 
1512 void ReconstructFromNeighbors::get_neighbor_table(storage_idx_t i, float *tmp1) const
1513 {
1514  const HNSW & hnsw = index.hnsw;
1515  size_t begin, end;
1516  hnsw.neighbor_range(i, 0, &begin, &end);
1517  size_t d = index.d;
1518 
1519  index.storage->reconstruct(i, tmp1);
1520 
1521  for (size_t j = begin; j < end; j++) {
1522  storage_idx_t ji = hnsw.neighbors[j];
1523  if (ji < 0) ji = i;
1524  index.storage->reconstruct(ji, tmp1 + (j - begin + 1) * d);
1525  }
1526 
1527 }
1528 
1529 
1530 /// called by add_codes
1532  const float *x, storage_idx_t i, uint8_t *code) const
1533 {
1534 
1535  // fill in tmp table with the neighbor values
1536  float *tmp1 = new float[d * (M + 1) + (d * k)];
1537  float *tmp2 = tmp1 + d * (M + 1);
1538  ScopeDeleter<float> del(tmp1);
1539 
1540  // collect coordinates of base
1541  get_neighbor_table (i, tmp1);
1542 
1543  for (int sq = 0; sq < nsq; sq++) {
1544  int d0 = sq * dsub;
1545  int d1 = d0 + dsub;
1546 
1547  {
1548  FINTEGER ki = k, di = d, m1 = M + 1;
1549  FINTEGER dsubi = dsub;
1550  float zero = 0, one = 1;
1551 
1552  sgemm_ ("N", "N", &dsubi, &ki, &m1, &one,
1553  tmp1 + d0, &di,
1554  codebook.data() + sq * (m1 * k), &m1,
1555  &zero, tmp2, &dsubi);
1556  }
1557 
1558  float min = HUGE_VAL;
1559  int argmin = -1;
1560  for (int j = 0; j < k; j++) {
1561  float dis = fvec_L2sqr(x + d0, tmp2 + j * dsub, dsub);
1562  if (dis < min) {
1563  min = dis;
1564  argmin = j;
1565  }
1566  }
1567  code[sq] = argmin;
1568  }
1569 
1570 }
1571 
1572 void ReconstructFromNeighbors::add_codes(size_t n, const float *x)
1573 {
1574  if (k == 1) { // nothing to encode
1575  ntotal += n;
1576  return;
1577  }
1578  codes.resize(codes.size() + code_size * n);
1579 #pragma omp parallel for
1580  for (int i = 0; i < n; i++) {
1581  estimate_code(x + i * index.d, ntotal + i,
1582  codes.data() + (ntotal + i) * code_size);
1583  }
1584  ntotal += n;
1585  FAISS_ASSERT (codes.size() == ntotal * code_size);
1586 }
1587 
1588 
1589 /**************************************************************
1590  * IndexHNSWFlat implementation
1591  **************************************************************/
1592 
1593 
1595  Index::idx_t nb;
1596  const float *q;
1597  const float *b;
1598  size_t ndis;
1599 
1600  float operator () (storage_idx_t i) override
1601  {
1602  ndis++;
1603  return (fvec_L2sqr(q, b + i * d, d));
1604  }
1605 
1606  float symmetric_dis(storage_idx_t i, storage_idx_t j) override
1607  {
1608  return (fvec_L2sqr(b + j * d, b + i * d, d));
1609  }
1610 
1611 
1612  FlatL2Dis(const IndexFlatL2 & storage, const float *q = nullptr):
1613  q(q)
1614  {
1615  nb = storage.ntotal;
1616  d = storage.d;
1617  b = storage.xb.data();
1618  ndis = 0;
1619  }
1620 
1621  void set_query(const float *x) override {
1622  q = x;
1623  }
1624 
1625  virtual ~FlatL2Dis () {
1626 #pragma omp critical
1627  {
1628  hnsw_stats.ndis += ndis;
1629  }
1630  }
1631 };
1632 
1633 
1634 IndexHNSWFlat::IndexHNSWFlat()
1635 {
1636  is_trained = true;
1637 }
1638 
1639 
1640 IndexHNSWFlat::IndexHNSWFlat(int d, int M):
1641  IndexHNSW(new IndexFlatL2(d), M)
1642 {
1643  own_fields = true;
1644  is_trained = true;
1645 }
1646 
1647 
1648 HNSW::DistanceComputer * IndexHNSWFlat::get_distance_computer () const
1649 {
1650  return new FlatL2Dis (*dynamic_cast<IndexFlatL2*> (storage));
1651 }
1652 
1653 
1654 
1655 
1656 /**************************************************************
1657  * IndexHNSWPQ implementation
1658  **************************************************************/
1659 
1660 
1662  Index::idx_t nb;
1663  const uint8_t *codes;
1664  size_t code_size;
1665  const ProductQuantizer & pq;
1666  const float *sdc;
1667  std::vector<float> precomputed_table;
1668  size_t ndis;
1669 
1670  float operator () (storage_idx_t i) override
1671  {
1672  const uint8_t *code = codes + i * code_size;
1673  const float *dt = precomputed_table.data();
1674  float accu = 0;
1675  for (int j = 0; j < pq.M; j++) {
1676  accu += dt[*code++];
1677  dt += 256;
1678  }
1679  ndis++;
1680  return accu;
1681  }
1682 
1683  float symmetric_dis(storage_idx_t i, storage_idx_t j) override
1684  {
1685  const float * sdci = sdc;
1686  float accu = 0;
1687  const uint8_t *codei = codes + i * code_size;
1688  const uint8_t *codej = codes + j * code_size;
1689 
1690  for (int l = 0; l < pq.M; l++) {
1691  accu += sdci[(*codei++) + (*codej++) * 256];
1692  sdci += 256 * 256;
1693  }
1694  return accu;
1695  }
1696 
1697 
1698  PQDis(const IndexPQ & storage, const float *q = nullptr):
1699  pq(storage.pq)
1700  {
1701  precomputed_table.resize(pq.M * pq.ksub);
1702  nb = storage.ntotal;
1703  d = storage.d;
1704  codes = storage.codes.data();
1705  code_size = pq.code_size;
1706  FAISS_ASSERT(pq.ksub == 256);
1707  FAISS_ASSERT(pq.sdc_table.size() == pq.ksub * pq.ksub * pq.M);
1708  sdc = pq.sdc_table.data();
1709  ndis = 0;
1710  }
1711 
1712  void set_query(const float *x) override {
1713  pq.compute_distance_table(x, precomputed_table.data());
1714  }
1715 
1716  virtual ~PQDis () {
1717 #pragma omp critical
1718  {
1719  hnsw_stats.ndis += ndis;
1720  }
1721  }
1722 };
1723 
1724 IndexHNSWPQ::IndexHNSWPQ() {}
1725 
1726 IndexHNSWPQ::IndexHNSWPQ(int d, int pq_m, int M):
1727  IndexHNSW(new IndexPQ(d, pq_m, 8), M)
1728 {
1729  own_fields = true;
1730  is_trained = false;
1731 }
1732 
1733 void IndexHNSWPQ::train(idx_t n, const float* x)
1734 {
1735  IndexHNSW::train (n, x);
1736  (dynamic_cast<IndexPQ*> (storage))->pq.compute_sdc_table();
1737 }
1738 
1739 
1740 
1741 HNSW::DistanceComputer * IndexHNSWPQ::get_distance_computer () const
1742 {
1743  return new PQDis (*dynamic_cast<IndexPQ*> (storage));
1744 }
1745 
1746 
1747 /**************************************************************
1748  * IndexHNSWSQ implementation
1749  **************************************************************/
1750 
1751 
1753  Index::idx_t nb;
1754  const uint8_t *codes;
1755  size_t code_size;
1756  const ScalarQuantizer & sq;
1757  const float *q;
1759 
1760  float operator () (storage_idx_t i) override
1761  {
1762  const uint8_t *code = codes + i * code_size;
1763 
1764  return dc->compute_distance (q, code);
1765  }
1766 
1767  float symmetric_dis(storage_idx_t i, storage_idx_t j) override
1768  {
1769  const uint8_t *codei = codes + i * code_size;
1770  const uint8_t *codej = codes + j * code_size;
1771  return dc->compute_code_distance (codei, codej);
1772  }
1773 
1774 
1775  SQDis(const IndexScalarQuantizer & storage, const float *q = nullptr):
1776  sq(storage.sq)
1777  {
1778  nb = storage.ntotal;
1779  d = storage.d;
1780  codes = storage.codes.data();
1781  code_size = sq.code_size;
1782  dc = sq.get_distance_computer();
1783  }
1784 
1785  void set_query(const float *x) override {
1786  q = x;
1787  }
1788 
1789  virtual ~SQDis () {
1790  delete dc;
1791  }
1792 };
1793 
1794 IndexHNSWSQ::IndexHNSWSQ(int d, ScalarQuantizer::QuantizerType qtype, int M):
1795  IndexHNSW (new IndexScalarQuantizer (d, qtype), M)
1796 {
1797  own_fields = true;
1798 }
1799 
1800 IndexHNSWSQ::IndexHNSWSQ() {}
1801 
1802 HNSW::DistanceComputer * IndexHNSWSQ::get_distance_computer () const
1803 {
1804  return new SQDis (*dynamic_cast<IndexScalarQuantizer*> (storage));
1805 }
1806 
1807 
1808 
1809 
1810 /**************************************************************
1811  * IndexHNSW2Level implementation
1812  **************************************************************/
1813 
1814 
1815 
1816 IndexHNSW2Level::IndexHNSW2Level(Index *quantizer, size_t nlist, int m_pq, int M):
1817  IndexHNSW (new Index2Layer (quantizer, nlist, m_pq), M)
1818 {
1819  own_fields = true;
1820  is_trained = false;
1821 }
1822 
1823 IndexHNSW2Level::IndexHNSW2Level() {}
1824 
1826 
1827  const Index2Layer & storage;
1828  std::vector<float> buf;
1829  const float *q;
1830 
1831  const float *pq_l1_tab, *pq_l2_tab;
1832 
1833  Distance2Level(const Index2Layer & storage): storage(storage)
1834  {
1835  d = storage.d;
1836  FAISS_ASSERT(storage.pq.dsub == 4);
1837  pq_l2_tab = storage.pq.centroids.data();
1838  buf.resize(2 * d);
1839  }
1840 
1841  float symmetric_dis(storage_idx_t i, storage_idx_t j) override
1842  {
1843  storage.reconstruct(i, buf.data());
1844  storage.reconstruct(j, buf.data() + d);
1845  return fvec_L2sqr(buf.data() + d, buf.data(), d);
1846  }
1847 
1848  void set_query(const float *x) override {
1849  q = x;
1850  }
1851 
1852 
1853 };
1854 
1855 
1856 // well optimized for xNN+PQNN
1858 
1859  int M, k;
1860 
1861  DistanceXPQ4(const Index2Layer & storage):
1862  Distance2Level (storage)
1863  {
1864  const IndexFlat *quantizer =
1865  dynamic_cast<IndexFlat*> (storage.q1.quantizer);
1866 
1867  FAISS_ASSERT(quantizer);
1868  M = storage.pq.M;
1869  pq_l1_tab = quantizer->xb.data();
1870  }
1871 
1872  float operator () (storage_idx_t i) override
1873  {
1874  const uint8_t *code = storage.codes.data() + i * storage.code_size;
1875  long key = 0;
1876  memcpy (&key, code, storage.code_size_1);
1877  code += storage.code_size_1;
1878 
1879  // walking pointers
1880  const float *qa = q;
1881  const __m128 *l1_t = (const __m128 *)(pq_l1_tab + d * key);
1882  const __m128 *pq_l2_t = (const __m128 *)pq_l2_tab;
1883  __m128 accu = _mm_setzero_ps();
1884 
1885  for (int m = 0; m < M; m++) {
1886  __m128 qi = _mm_loadu_ps(qa);
1887  __m128 recons = l1_t[m] + pq_l2_t[*code++];
1888  __m128 diff = qi - recons;
1889  accu += diff * diff;
1890  pq_l2_t += 256;
1891  qa += 4;
1892  }
1893 
1894  accu = _mm_hadd_ps (accu, accu);
1895  accu = _mm_hadd_ps (accu, accu);
1896  return _mm_cvtss_f32 (accu);
1897  }
1898 
1899 };
1900 
1901 // well optimized for 2xNN+PQNN
1903 
1904  int M_2, mi_nbits;
1905 
1906  Distance2xXPQ4(const Index2Layer & storage):
1907  Distance2Level (storage)
1908  {
1909  const MultiIndexQuantizer *mi =
1910  dynamic_cast<MultiIndexQuantizer*> (storage.q1.quantizer);
1911 
1912  FAISS_ASSERT(mi);
1913  FAISS_ASSERT(storage.pq.M % 2 == 0);
1914  M_2 = storage.pq.M / 2;
1915  mi_nbits = mi->pq.nbits;
1916  pq_l1_tab = mi->pq.centroids.data();
1917  }
1918 
1919  float operator () (storage_idx_t i) override
1920  {
1921  const uint8_t *code = storage.codes.data() + i * storage.code_size;
1922  long key01 = 0;
1923  memcpy (&key01, code, storage.code_size_1);
1924  code += storage.code_size_1;
1925 
1926  // walking pointers
1927  const float *qa = q;
1928  const __m128 *pq_l1_t = (const __m128 *)pq_l1_tab;
1929  const __m128 *pq_l2_t = (const __m128 *)pq_l2_tab;
1930  __m128 accu = _mm_setzero_ps();
1931 
1932  for (int mi_m = 0; mi_m < 2; mi_m++) {
1933  long l1_idx = key01 & ((1L << mi_nbits) - 1);
1934  const __m128 * pq_l1 = pq_l1_t + M_2 * l1_idx;
1935 
1936  for (int m = 0; m < M_2; m++) {
1937  __m128 qi = _mm_loadu_ps(qa);
1938  __m128 recons = pq_l1[m] + pq_l2_t[*code++];
1939  __m128 diff = qi - recons;
1940  accu += diff * diff;
1941  pq_l2_t += 256;
1942  qa += 4;
1943  }
1944  pq_l1_t += M_2 << mi_nbits;
1945  key01 >>= mi_nbits;
1946  }
1947  accu = _mm_hadd_ps (accu, accu);
1948  accu = _mm_hadd_ps (accu, accu);
1949  return _mm_cvtss_f32 (accu);
1950  }
1951 
1952 };
1953 
1954 
1955 
1956 HNSW::DistanceComputer * IndexHNSW2Level::get_distance_computer () const
1957 {
1958  const Index2Layer *storage2l =
1959  dynamic_cast<Index2Layer*>(storage);
1960 
1961  if (storage2l) {
1962 
1963  const MultiIndexQuantizer *mi =
1964  dynamic_cast<MultiIndexQuantizer*> (storage2l->q1.quantizer);
1965 
1966  if (mi && storage2l->pq.M % 2 == 0 && storage2l->pq.dsub == 4) {
1967  return new Distance2xXPQ4(*storage2l);
1968  }
1969 
1970  const IndexFlat *fl =
1971  dynamic_cast<IndexFlat*> (storage2l->q1.quantizer);
1972 
1973  if (fl && storage2l->pq.dsub == 4) {
1974  return new DistanceXPQ4(*storage2l);
1975  }
1976  }
1977 
1978  // IVFPQ and cases not handled above
1979  return new GenericDistanceComputer (*storage);
1980 
1981 }
1982 
1983 
1984 namespace {
1985 
1986 
1987 // same as search_from_candidates but uses v
1988 // visno -> is in result list
1989 // visno + 1 -> in result list + in candidates
1990 int search_from_candidates_2(const HNSW & hnsw,
1991  DistanceComputer & qdis, int k,
1992  idx_t *I, float * D,
1993  MinimaxHeap &candidates,
1994  VisitedTable &vt,
1995  int level, int nres_in = 0)
1996 {
1997  int nres = nres_in;
1998  int ndis = 0;
1999  for (int i = 0; i < candidates.size(); i++) {
2000  idx_t v1 = candidates.ids[i];
2001  float d = candidates.dis[i];
2002  FAISS_ASSERT(v1 >= 0);
2003  vt.visited[v1] = vt.visno + 1;
2004  }
2005 
2006  bool do_dis_check = hnsw.check_relative_distance;
2007  int nstep = 0;
2008 
2009  while (candidates.size() > 0) {
2010  float d0 = 0;
2011  int v0 = candidates.pop_min(&d0);
2012 
2013  if (do_dis_check) {
2014  // tricky stopping condition: there are more that ef
2015  // distances that are processed already that are smaller
2016  // than d0
2017 
2018  int n_dis_below = candidates.count_below(d0);
2019  if(n_dis_below >= hnsw.efSearch) {
2020  break;
2021  }
2022  }
2023  size_t begin, end;
2024  hnsw.neighbor_range(v0, level, &begin, &end);
2025 
2026  for (size_t j = begin; j < end; j++) {
2027  int v1 = hnsw.neighbors[j];
2028  if (v1 < 0) break;
2029  if (vt.visited[v1] == vt.visno + 1) {
2030  // nothing to do
2031  } else {
2032  ndis++;
2033  float d = qdis(v1);
2034  candidates.push(v1, d);
2035 
2036  // never seen before --> add to heap
2037  if (vt.visited[v1] < vt.visno) {
2038  if (nres < k) {
2039  faiss::maxheap_push (++nres, D, I, d, v1);
2040  } else if (d < D[0]) {
2041  faiss::maxheap_pop (nres--, D, I);
2042  faiss::maxheap_push (++nres, D, I, d, v1);
2043  }
2044  }
2045  vt.visited[v1] = vt.visno + 1;
2046  }
2047  }
2048 
2049  nstep++;
2050  if (!do_dis_check && nstep > hnsw.efSearch) {
2051  break;
2052  }
2053  }
2054 
2055  if (level == 0) {
2056 #pragma omp critical
2057  {
2058  hnsw_stats.n1 ++;
2059  if (candidates.size() == 0)
2060  hnsw_stats.n2 ++;
2061  }
2062  }
2063 
2064 
2065  return nres;
2066 }
2067 
2068 
2069 } // anonymous namespace
2070 
2071 void IndexHNSW2Level::search (idx_t n, const float *x, idx_t k,
2072  float *distances, idx_t *labels) const
2073 {
2074  if (dynamic_cast<const Index2Layer*>(storage)) {
2075  IndexHNSW::search (n, x, k, distances, labels);
2076 
2077  } else { // "mixed" search
2078 
2079  const IndexIVFPQ *index_ivfpq =
2080  dynamic_cast<const IndexIVFPQ*>(storage);
2081 
2082  int nprobe = index_ivfpq->nprobe;
2083 
2084  long * coarse_assign = new long [n * nprobe];
2085  ScopeDeleter<long> del (coarse_assign);
2086  float * coarse_dis = new float [n * nprobe];
2087  ScopeDeleter<float> del2 (coarse_dis);
2088 
2089  index_ivfpq->quantizer->search (n, x, nprobe, coarse_dis, coarse_assign);
2090 
2091  index_ivfpq->search_preassigned (
2092  n, x, k, coarse_assign, coarse_dis, distances, labels, false);
2093 
2094 #pragma omp parallel
2095  {
2096  VisitedTable vt (ntotal);
2097  DistanceComputer *dis = get_distance_computer();
2099 
2100  int candidates_size = hnsw.upper_beam;
2101  MinimaxHeap candidates(candidates_size);
2102 
2103 #pragma omp for
2104  for(int i = 0; i < n; i++) {
2105  idx_t * idxi = labels + i * k;
2106  float * simi = distances + i * k;
2107  dis->set_query(x + i * d);
2108 
2109  // mark all inverted list elements as visited
2110 
2111  for (int j = 0; j < nprobe; j++) {
2112  idx_t key = coarse_assign[j + i * nprobe];
2113  if (key < 0) break;
2114  size_t list_length = index_ivfpq->get_list_size (key);
2115  const idx_t * ids = index_ivfpq->invlists->get_ids (key);
2116 
2117  for (int jj = 0; jj < list_length; jj++) {
2118  vt.set (ids[jj]);
2119  }
2120  }
2121 
2122  candidates.clear();
2123  // copy the upper_beam elements to candidates list
2124 
2125  int search_policy = 2;
2126 
2127  if (search_policy == 1) {
2128 
2129  for (int j = 0 ; j < hnsw.upper_beam && j < k; j++) {
2130  if (idxi[j] < 0) break;
2131  candidates.push (idxi[j], simi[j]);
2132  // search_from_candidates adds them back
2133  idxi[j] = -1;
2134  simi[j] = HUGE_VAL;
2135  }
2136 
2137  // reorder from sorted to heap
2138  maxheap_heapify (k, simi, idxi, simi, idxi, k);
2139 
2140  search_from_candidates (
2141  hnsw, *dis, k, idxi, simi,
2142  candidates, vt, 0, k);
2143 
2144  vt.advance();
2145 
2146  } else if (search_policy == 2) {
2147 
2148  for (int j = 0 ; j < hnsw.upper_beam && j < k; j++) {
2149  if (idxi[j] < 0) break;
2150  candidates.push (idxi[j], simi[j]);
2151  }
2152 
2153  // reorder from sorted to heap
2154  maxheap_heapify (k, simi, idxi, simi, idxi, k);
2155 
2156  search_from_candidates_2 (
2157  hnsw, *dis, k, idxi, simi,
2158  candidates, vt, 0, k);
2159  vt.advance ();
2160  vt.advance ();
2161 
2162  }
2163 
2164  maxheap_reorder (k, simi, idxi);
2165  }
2166  }
2167  }
2168 
2169 
2170 }
2171 
2172 
2173 void IndexHNSW2Level::flip_to_ivf ()
2174 {
2175  Index2Layer *storage2l =
2176  dynamic_cast<Index2Layer*>(storage);
2177 
2178  FAISS_THROW_IF_NOT (storage2l);
2179 
2180  IndexIVFPQ * index_ivfpq =
2181  new IndexIVFPQ (storage2l->q1.quantizer,
2182  d, storage2l->q1.nlist,
2183  storage2l->pq.M, 8);
2184  index_ivfpq->pq = storage2l->pq;
2185  index_ivfpq->is_trained = storage2l->is_trained;
2186  index_ivfpq->precompute_table();
2187  index_ivfpq->own_fields = storage2l->q1.own_fields;
2188  storage2l->transfer_to_IVFPQ(*index_ivfpq);
2189  index_ivfpq->make_direct_map (true);
2190 
2191  storage = index_ivfpq;
2192  delete storage2l;
2193 
2194 }
2195 
2196 
2197 } // namespace faiss
random generator that can be used in multithreaded contexts
Definition: utils.h:48
void precompute_table()
build precomputed table
Definition: IndexIVFPQ.cpp:355
void add_with_locks(DistanceComputer &ptdis, int pt_level, int pt_id, std::vector< omp_lock_t > &locks, VisitedTable &vt)
Definition: IndexHNSW.cpp:657
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
Definition: IndexPQ.h:34
void transfer_to_IVFPQ(IndexIVFPQ &other) const
transfer the flat codes to an IVFPQ index
size_t code_size
bytes per vector
void neighbor_range(idx_t no, int layer_no, size_t *begin, size_t *end) const
range of entries in the neighbors table of vertex no at layer_no
Definition: IndexHNSW.cpp:517
size_t nbits
number of bits per quantization index
bool get(int no) const
get flag #no
Definition: IndexHNSW.cpp:66
float fvec_L2sqr(const float *x, const float *y, size_t d)
Squared L2 distance between two vectors.
Definition: utils.cpp:574
void train(idx_t n, const float *x) override
Trains the storage if needed.
Definition: IndexHNSW.cpp:1733
virtual const idx_t * get_ids(size_t list_no) const =0
int nb_neighbors(int layer_no) const
nb of neighbors for this level
Definition: IndexHNSW.cpp:497
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
entry point for search
Definition: IndexHNSW.cpp:2071
size_t code_size
code_size_1 + code_size_2
Definition: IndexIVFPQ.h:213
virtual void reset()=0
removes all elements from the database.
storage_idx_t entry_point
entry point in the search structure (one of the points with maximum level
Definition: IndexHNSW.h:88
int cum_nb_neighbors(int layer_no) const
cumumlative nb up to (and excluding) this level
Definition: IndexHNSW.cpp:512
virtual float compute_code_distance(const uint8_t *code1, const uint8_t *code2)=0
code-to-code distance computation
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:173
std::vector< double > assign_probas
assignment probability to each layer (sum=1)
Definition: IndexHNSW.h:70
std::vector< float > sdc_table
Symmetric Distance Table.
virtual void train(idx_t n, const float *x)
Definition: Index.cpp:23
float rand_float()
between 0 and 1
Definition: utils.cpp:211
std::vector< int > cum_nneighbor_per_level
Definition: IndexHNSW.h:74
size_t dsub
dimensionality of each subvector
Level1Quantizer q1
first level quantizer
Definition: IndexIVFPQ.h:198
std::vector< uint8_t > codes
Codes. Size ntotal * code_size.
Definition: IndexIVFPQ.h:204
void advance()
reset all flags to false
Definition: IndexHNSW.cpp:70
void get_neighbor_table(storage_idx_t i, float *out) const
get the M+1 -by-d table for neighbor coordinates for vector i
Definition: IndexHNSW.cpp:1512
void set_query(const float *x) override
called before computing distances
Definition: IndexHNSW.cpp:1848
void train(idx_t n, const float *x) override
Trains the storage if needed.
Definition: IndexHNSW.cpp:978
int d
vector dimension
Definition: Index.h:64
void compute_distance_table(const float *x, float *dis_table) const
size_t code_size
byte per indexed vector
std::vector< size_t > offsets
Definition: IndexHNSW.h:81
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
set implementation optimized for fast access.
Definition: IndexHNSW.cpp:54
void add(idx_t n, const float *x) override
Definition: IndexHNSW.cpp:1031
void set_query(const float *x) override
called before computing distances
Definition: IndexHNSW.cpp:1785
void reconstruct(storage_idx_t i, float *x, float *tmp) const
called by compute_distances
Definition: IndexHNSW.cpp:1378
virtual void add(idx_t n, const float *x)=0
int rand_int()
random 31-bit positive integer
ScalarQuantizer sq
Used to encode the vectors.
size_t ksub
number of centroids for each subquantizer
int efSearch
expansion factor at search time
Definition: IndexHNSW.h:99
long idx_t
all indices are this type
Definition: Index.h:62
bool check_relative_distance
during search: do we check whether the next best distance is good enough?
Definition: IndexHNSW.h:102
HNSW(int M=32)
only mandatory parameter: nb of neighbors
Definition: IndexHNSW.cpp:527
ProductQuantizer pq
The product quantizer used to encode the vectors.
Definition: IndexPQ.h:31
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:65
int upper_beam
number of entry points in levels &gt; 0.
Definition: IndexHNSW.h:105
double getmillisecs()
ms elapsed since some arbitrary epoch
Definition: utils.cpp:74
void set_nb_neighbors(int level_no, int n)
set nb of neighbors for this level (before adding anything)
Definition: IndexHNSW.cpp:503
void estimate_code(const float *x, storage_idx_t i, uint8_t *code) const
called by add_codes
Definition: IndexHNSW.cpp:1531
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
faiss::Index::idx_t idx_t
Faiss results are 64-bit.
Definition: IndexHNSW.h:46
void add_codes(size_t n, const float *x)
Definition: IndexHNSW.cpp:1572
void make_direct_map(bool new_maintain_direct_map=true)
Definition: IndexIVF.cpp:257
void set_query(const float *x) override
called before computing distances
Definition: IndexHNSW.cpp:1347
int random_level()
pick a random level for a new point
Definition: IndexHNSW.cpp:539
void set_default_probas(int M, float levelMult)
Definition: IndexHNSW.cpp:553
ProductQuantizer pq
produces the codes
Definition: IndexIVFPQ.h:32
InvertedLists * invlists
Acess to the actual data.
Definition: IndexIVF.h:168
void reset() override
removes all elements from the database.
Definition: IndexHNSW.cpp:1043
size_t M
number of subquantizers
size_t code_size_1
size of the code for the first level (ceil(log8(q1.nlist)))
Definition: IndexIVFPQ.h:207
void search(DistanceComputer &qdis, int k, idx_t *I, float *D, VisitedTable &vt) const
search interface
Definition: IndexHNSW.cpp:711
void init_level_0_from_entry_points(int npt, const storage_idx_t *points, const storage_idx_t *nearests)
alternative graph building
Definition: IndexHNSW.cpp:1200
void set_query(const float *x) override
called before computing distances
Definition: IndexHNSW.cpp:1621
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
entry point for search
Definition: IndexHNSW.cpp:985
void search_preassigned(idx_t n, const float *x, idx_t k, const idx_t *assign, const float *centroid_dis, float *distances, idx_t *labels, bool store_pairs) const override
Definition: IndexIVFPQ.cpp:930
Index * quantizer
quantizer that maps vectors to inverted lists
Definition: IndexIVF.h:33
void init_level_0_from_knngraph(int k, const float *D, const idx_t *I)
alternative graph building
Definition: IndexHNSW.cpp:1162
void search_level_0(idx_t n, const float *x, idx_t k, const storage_idx_t *nearest, const float *nearest_d, float *distances, idx_t *labels, int nprobe=1, int search_type=1) const
Definition: IndexHNSW.cpp:1092
void fill_with_random_links(size_t n)
add n random levels to table (for debugging...)
Definition: IndexHNSW.cpp:913
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:69
std::vector< storage_idx_t > neighbors
Definition: IndexHNSW.h:85
void reconstruct(idx_t key, float *recons) const override
Definition: IndexHNSW.cpp:1050
virtual void reconstruct(idx_t key, float *recons) const
Definition: Index.cpp:54
ProductQuantizer pq
second level quantizer is always a PQ
Definition: IndexIVFPQ.h:201
int efConstruction
expansion factor at construction time
Definition: IndexHNSW.h:96
int storage_idx_t
internal storage of vectors (32 bits: this is expensive)
Definition: IndexHNSW.h:43
std::vector< float > xb
database vectors, size ntotal * d
Definition: IndexFlat.h:25
bool own_fields
whether object owns the quantizer
Definition: IndexIVF.h:42
void set(int no)
set flog #no to true
Definition: IndexHNSW.cpp:62
void set_query(const float *x) override
called before computing distances
Definition: IndexHNSW.cpp:1712
void reconstruct(idx_t key, float *recons) const override
std::vector< int > levels
level of each vector (base level = 1), size = ntotal
Definition: IndexHNSW.h:77
int max_level
maximum level
Definition: IndexHNSW.h:93
size_t nlist
number of possible key values
Definition: IndexIVF.h:34
virtual float compute_distance(const float *x, const uint8_t *code)=0
vector-to-code distance computation
std::vector< float > centroids
Centroid table, size M * ksub * dsub.