Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/tmp/faiss/HNSW.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // -*- c++ -*-
10 
11 #include "HNSW.h"
12 
13 
14 namespace faiss {
15 
16 using idx_t = Index::idx_t;
17 using DistanceComputer = HNSW::DistanceComputer;
18 
19 /**************************************************************
20  * HNSW structure implementation
21  **************************************************************/
22 
23 int HNSW::nb_neighbors(int layer_no) const
24 {
25  return cum_nneighbor_per_level[layer_no + 1] -
26  cum_nneighbor_per_level[layer_no];
27 }
28 
29 void HNSW::set_nb_neighbors(int level_no, int n)
30 {
31  FAISS_THROW_IF_NOT(levels.size() == 0);
32  int cur_n = nb_neighbors(level_no);
33  for (int i = level_no + 1; i < cum_nneighbor_per_level.size(); i++) {
34  cum_nneighbor_per_level[i] += n - cur_n;
35  }
36 }
37 
38 int HNSW::cum_nb_neighbors(int layer_no) const
39 {
40  return cum_nneighbor_per_level[layer_no];
41 }
42 
43 void HNSW::neighbor_range(idx_t no, int layer_no,
44  size_t * begin, size_t * end) const
45 {
46  size_t o = offsets[no];
47  *begin = o + cum_nb_neighbors(layer_no);
48  *end = o + cum_nb_neighbors(layer_no + 1);
49 }
50 
51 
52 
53 HNSW::HNSW(int M) : rng(12345) {
54  set_default_probas(M, 1.0 / log(M));
55  max_level = -1;
56  entry_point = -1;
57  efSearch = 16;
58  efConstruction = 40;
59  upper_beam = 1;
60  offsets.push_back(0);
61 }
62 
63 
65 {
66  double f = rng.rand_float();
67  // could be a bit faster with bissection
68  for (int level = 0; level < assign_probas.size(); level++) {
69  if (f < assign_probas[level]) {
70  return level;
71  }
72  f -= assign_probas[level];
73  }
74  // happens with exponentially low probability
75  return assign_probas.size() - 1;
76 }
77 
78 void HNSW::set_default_probas(int M, float levelMult)
79 {
80  int nn = 0;
81  cum_nneighbor_per_level.push_back (0);
82  for (int level = 0; ;level++) {
83  float proba = exp(-level / levelMult) * (1 - exp(-1 / levelMult));
84  if (proba < 1e-9) break;
85  assign_probas.push_back(proba);
86  nn += level == 0 ? M * 2 : M;
87  cum_nneighbor_per_level.push_back (nn);
88  }
89 }
90 
91 void HNSW::clear_neighbor_tables(int level)
92 {
93  for (int i = 0; i < levels.size(); i++) {
94  size_t begin, end;
95  neighbor_range(i, level, &begin, &end);
96  for (size_t j = begin; j < end; j++) {
97  neighbors[j] = -1;
98  }
99  }
100 }
101 
102 
103 void HNSW::reset() {
104  max_level = -1;
105  entry_point = -1;
106  offsets.clear();
107  offsets.push_back(0);
108  levels.clear();
109  neighbors.clear();
110 }
111 
112 
113 
114 void HNSW::print_neighbor_stats(int level) const
115 {
116  FAISS_THROW_IF_NOT (level < cum_nneighbor_per_level.size());
117  printf("stats on level %d, max %d neighbors per vertex:\n",
118  level, nb_neighbors(level));
119  size_t tot_neigh = 0, tot_common = 0, tot_reciprocal = 0, n_node = 0;
120 #pragma omp parallel for reduction(+: tot_neigh) reduction(+: tot_common) \
121  reduction(+: tot_reciprocal) reduction(+: n_node)
122  for (int i = 0; i < levels.size(); i++) {
123  if (levels[i] > level) {
124  n_node++;
125  size_t begin, end;
126  neighbor_range(i, level, &begin, &end);
127  std::unordered_set<int> neighset;
128  for (size_t j = begin; j < end; j++) {
129  if (neighbors [j] < 0) break;
130  neighset.insert(neighbors[j]);
131  }
132  int n_neigh = neighset.size();
133  int n_common = 0;
134  int n_reciprocal = 0;
135  for (size_t j = begin; j < end; j++) {
136  storage_idx_t i2 = neighbors[j];
137  if (i2 < 0) break;
138  FAISS_ASSERT(i2 != i);
139  size_t begin2, end2;
140  neighbor_range(i2, level, &begin2, &end2);
141  for (size_t j2 = begin2; j2 < end2; j2++) {
142  storage_idx_t i3 = neighbors[j2];
143  if (i3 < 0) break;
144  if (i3 == i) {
145  n_reciprocal++;
146  continue;
147  }
148  if (neighset.count(i3)) {
149  neighset.erase(i3);
150  n_common++;
151  }
152  }
153  }
154  tot_neigh += n_neigh;
155  tot_common += n_common;
156  tot_reciprocal += n_reciprocal;
157  }
158  }
159  float normalizer = n_node;
160  printf(" nb of nodes at that level %ld\n", n_node);
161  printf(" neighbors per node: %.2f (%ld)\n",
162  tot_neigh / normalizer, tot_neigh);
163  printf(" nb of reciprocal neighbors: %.2f\n", tot_reciprocal / normalizer);
164  printf(" nb of neighbors that are also neighbor-of-neighbors: %.2f (%ld)\n",
165  tot_common / normalizer, tot_common);
166 
167 
168 
169 }
170 
171 
173 {
174  int max_level = prepare_level_tab(n);
175  RandomGenerator rng2(456);
176 
177  for (int level = max_level - 1; level >= 0; level++) {
178  std::vector<int> elts;
179  for (int i = 0; i < n; i++) {
180  if (levels[i] > level) {
181  elts.push_back(i);
182  }
183  }
184  printf ("linking %ld elements in level %d\n",
185  elts.size(), level);
186 
187  if (elts.size() == 1) continue;
188 
189  for (int ii = 0; ii < elts.size(); ii++) {
190  int i = elts[ii];
191  size_t begin, end;
192  neighbor_range(i, 0, &begin, &end);
193  for (size_t j = begin; j < end; j++) {
194  int other = 0;
195  do {
196  other = elts[rng2.rand_int(elts.size())];
197  } while(other == i);
198 
199  neighbors[j] = other;
200  }
201  }
202  }
203 }
204 
205 
206 int HNSW::prepare_level_tab(size_t n, bool preset_levels)
207 {
208  size_t n0 = offsets.size() - 1;
209 
210  if (preset_levels) {
211  FAISS_ASSERT (n0 + n == levels.size());
212  } else {
213  FAISS_ASSERT (n0 == levels.size());
214  for (int i = 0; i < n; i++) {
215  int pt_level = random_level();
216  levels.push_back(pt_level + 1);
217  }
218  }
219 
220  int max_level = 0;
221  for (int i = 0; i < n; i++) {
222  int pt_level = levels[i + n0] - 1;
223  if (pt_level > max_level) max_level = pt_level;
224  offsets.push_back(offsets.back() +
225  cum_nb_neighbors(pt_level + 1));
226  neighbors.resize(offsets.back(), -1);
227  }
228 
229  return max_level;
230 }
231 
232 
233 /** Enumerate vertices from farthest to nearest from query, keep a
234  * neighbor only if there is no previous neighbor that is closer to
235  * that vertex than the query.
236  */
238  DistanceComputer& qdis,
239  std::priority_queue<NodeDistFarther>& input,
240  std::vector<NodeDistFarther>& output,
241  int max_size)
242 {
243  while (input.size() > 0) {
244  NodeDistFarther v1 = input.top();
245  input.pop();
246  float dist_v1_q = v1.d;
247 
248  bool good = true;
249  for (NodeDistFarther v2 : output) {
250  float dist_v1_v2 = qdis.symmetric_dis(v2.id, v1.id);
251 
252  if (dist_v1_v2 < dist_v1_q) {
253  good = false;
254  break;
255  }
256  }
257 
258  if (good) {
259  output.push_back(v1);
260  if (output.size() >= max_size) {
261  return;
262  }
263  }
264  }
265 }
266 
267 
268 namespace {
269 
270 
271 using storage_idx_t = HNSW::storage_idx_t;
274 
275 
276 /**************************************************************
277  * Addition subroutines
278  **************************************************************/
279 
280 
281 /// remove neighbors from the list to make it smaller than max_size
282 void shrink_neighbor_list(
283  DistanceComputer& qdis,
284  std::priority_queue<NodeDistCloser>& resultSet1,
285  int max_size)
286 {
287  if (resultSet1.size() < max_size) {
288  return;
289  }
290  std::priority_queue<NodeDistFarther> resultSet;
291  std::vector<NodeDistFarther> returnlist;
292 
293  while (resultSet1.size() > 0) {
294  resultSet.emplace(resultSet1.top().d, resultSet1.top().id);
295  resultSet1.pop();
296  }
297 
298  HNSW::shrink_neighbor_list(qdis, resultSet, returnlist, max_size);
299 
300  for (NodeDistFarther curen2 : returnlist) {
301  resultSet1.emplace(curen2.d, curen2.id);
302  }
303 
304 }
305 
306 
307 /// add a link between two elements, possibly shrinking the list
308 /// of links to make room for it.
309 void add_link(HNSW& hnsw,
310  DistanceComputer& qdis,
311  storage_idx_t src, storage_idx_t dest,
312  int level)
313 {
314  size_t begin, end;
315  hnsw.neighbor_range(src, level, &begin, &end);
316  if (hnsw.neighbors[end - 1] == -1) {
317  // there is enough room, find a slot to add it
318  size_t i = end;
319  while(i > begin) {
320  if (hnsw.neighbors[i - 1] != -1) break;
321  i--;
322  }
323  hnsw.neighbors[i] = dest;
324  return;
325  }
326 
327  // otherwise we let them fight out which to keep
328 
329  // copy to resultSet...
330  std::priority_queue<NodeDistCloser> resultSet;
331  resultSet.emplace(qdis.symmetric_dis(src, dest), dest);
332  for (size_t i = begin; i < end; i++) { // HERE WAS THE BUG
333  storage_idx_t neigh = hnsw.neighbors[i];
334  resultSet.emplace(qdis.symmetric_dis(src, neigh), neigh);
335  }
336 
337  shrink_neighbor_list(qdis, resultSet, end - begin);
338 
339  // ...and back
340  size_t i = begin;
341  while (resultSet.size()) {
342  hnsw.neighbors[i++] = resultSet.top().id;
343  resultSet.pop();
344  }
345  // they may have shrunk more than just by 1 element
346  while(i < end) {
347  hnsw.neighbors[i++] = -1;
348  }
349 }
350 
351 /// search neighbors on a single level, starting from an entry point
352 void search_neighbors_to_add(
353  HNSW& hnsw,
354  DistanceComputer& qdis,
355  std::priority_queue<NodeDistCloser>& results,
356  int entry_point,
357  float d_entry_point,
358  int level,
359  VisitedTable &vt)
360 {
361  // top is nearest candidate
362  std::priority_queue<NodeDistFarther> candidates;
363 
364  NodeDistFarther ev(d_entry_point, entry_point);
365  candidates.push(ev);
366  results.emplace(d_entry_point, entry_point);
367  vt.set(entry_point);
368 
369  while (!candidates.empty()) {
370  // get nearest
371  const NodeDistFarther &currEv = candidates.top();
372 
373  if (currEv.d > results.top().d) {
374  break;
375  }
376  int currNode = currEv.id;
377  candidates.pop();
378 
379  // loop over neighbors
380  size_t begin, end;
381  hnsw.neighbor_range(currNode, level, &begin, &end);
382  for(size_t i = begin; i < end; i++) {
383  storage_idx_t nodeId = hnsw.neighbors[i];
384  if (nodeId < 0) break;
385  if (vt.get(nodeId)) continue;
386  vt.set(nodeId);
387 
388  float dis = qdis(nodeId);
389  NodeDistFarther evE1(dis, nodeId);
390 
391  if (results.size() < hnsw.efConstruction ||
392  results.top().d > dis) {
393 
394  results.emplace(dis, nodeId);
395  candidates.emplace(dis, nodeId);
396  if (results.size() > hnsw.efConstruction) {
397  results.pop();
398  }
399  }
400  }
401  }
402  vt.advance();
403 }
404 
405 
406 /**************************************************************
407  * Searching subroutines
408  **************************************************************/
409 
410 /// greedily update a nearest vector at a given level
411 void greedy_update_nearest(const HNSW& hnsw,
412  DistanceComputer& qdis,
413  int level,
414  storage_idx_t& nearest,
415  float& d_nearest)
416 {
417  for(;;) {
418  storage_idx_t prev_nearest = nearest;
419 
420  size_t begin, end;
421  hnsw.neighbor_range(nearest, level, &begin, &end);
422  for(size_t i = begin; i < end; i++) {
423  storage_idx_t v = hnsw.neighbors[i];
424  if (v < 0) break;
425  float dis = qdis(v);
426  if (dis < d_nearest) {
427  nearest = v;
428  d_nearest = dis;
429  }
430  }
431  if (nearest == prev_nearest) {
432  return;
433  }
434  }
435 }
436 
437 
438 } // namespace
439 
440 
441 /// Finds neighbors and builds links with them, starting from an entry
442 /// point. The own neighbor list is assumed to be locked.
444  storage_idx_t pt_id,
445  storage_idx_t nearest,
446  float d_nearest,
447  int level,
448  omp_lock_t *locks,
449  VisitedTable &vt)
450 {
451  std::priority_queue<NodeDistCloser> link_targets;
452 
453  search_neighbors_to_add(*this, ptdis, link_targets, nearest, d_nearest,
454  level, vt);
455 
456  // but we can afford only this many neighbors
457  int M = nb_neighbors(level);
458 
459  ::faiss::shrink_neighbor_list(ptdis, link_targets, M);
460 
461  while (!link_targets.empty()) {
462  int other_id = link_targets.top().id;
463 
464  omp_set_lock(&locks[other_id]);
465  add_link(*this, ptdis, other_id, pt_id, level);
466  omp_unset_lock(&locks[other_id]);
467 
468  add_link(*this, ptdis, pt_id, other_id, level);
469 
470  link_targets.pop();
471  }
472 }
473 
474 
475 /**************************************************************
476  * Building, parallel
477  **************************************************************/
478 
479 void HNSW::add_with_locks(DistanceComputer& ptdis, int pt_level, int pt_id,
480  std::vector<omp_lock_t>& locks,
481  VisitedTable& vt)
482 {
483  // greedy search on upper levels
484 
485  storage_idx_t nearest;
486 #pragma omp critical
487  {
488  nearest = entry_point;
489 
490  if (nearest == -1) {
491  max_level = pt_level;
492  entry_point = pt_id;
493  }
494  }
495 
496  if (nearest < 0) {
497  return;
498  }
499 
500  omp_set_lock(&locks[pt_id]);
501 
502  int level = max_level; // level at which we start adding neighbors
503  float d_nearest = ptdis(nearest);
504 
505  for(; level > pt_level; level--) {
506  greedy_update_nearest(*this, ptdis, level, nearest, d_nearest);
507  }
508 
509  for(; level >= 0; level--) {
510  add_links_starting_from(ptdis, pt_id, nearest, d_nearest,
511  level, locks.data(), vt);
512  }
513 
514  omp_unset_lock(&locks[pt_id]);
515 
516  if (pt_level > max_level) {
517  max_level = pt_level;
518  entry_point = pt_id;
519  }
520 }
521 
522 
523 /** Do a BFS on the candidates list */
524 
526  DistanceComputer& qdis, int k,
527  idx_t *I, float *D,
528  MinimaxHeap& candidates,
529  VisitedTable& vt,
530  int level, int nres_in) const
531 {
532  int nres = nres_in;
533  int ndis = 0;
534  for (int i = 0; i < candidates.size(); i++) {
535  idx_t v1 = candidates.ids[i];
536  float d = candidates.dis[i];
537  FAISS_ASSERT(v1 >= 0);
538  if (nres < k) {
539  faiss::maxheap_push(++nres, D, I, d, v1);
540  } else if (d < D[0]) {
541  faiss::maxheap_pop(nres--, D, I);
542  faiss::maxheap_push(++nres, D, I, d, v1);
543  }
544  vt.set(v1);
545  }
546 
547  int nstep = 0;
548 
549  while (candidates.size() > 0) {
550  float d0 = 0;
551  int v0 = candidates.pop_min(&d0);
552 
553  size_t begin, end;
554  neighbor_range(v0, level, &begin, &end);
555 
556  for (size_t j = begin; j < end; j++) {
557  int v1 = neighbors[j];
558  if (v1 < 0) break;
559  if (vt.get(v1)) {
560  continue;
561  }
562  vt.set(v1);
563  ndis++;
564  float d = qdis(v1);
565  if (nres < k) {
566  faiss::maxheap_push(++nres, D, I, d, v1);
567  } else if (d < D[0]) {
568  faiss::maxheap_pop(nres--, D, I);
569  faiss::maxheap_push(++nres, D, I, d, v1);
570  }
571  candidates.push(v1, d);
572  }
573 
574  nstep++;
575  if (nstep > efSearch) {
576  break;
577  }
578  }
579 
580  if (level == 0) {
581 #pragma omp critical
582  {
583  hnsw_stats.n1 ++;
584  if (candidates.size() == 0) {
585  hnsw_stats.n2 ++;
586  }
587  hnsw_stats.n3 += ndis;
588  }
589  }
590 
591  return nres;
592 }
593 
594 
595 /**************************************************************
596  * Searching
597  **************************************************************/
598 
599 template<typename T>
600 using MaxHeap = std::priority_queue<T, std::vector<T>, std::less<T>>;
601 template<typename T>
602 using MinHeap = std::priority_queue<T, std::vector<T>, std::greater<T>>;
603 
604 
605 MaxHeap<HNSW::Node> HNSW::search_from(
606  const Node& node,
607  DistanceComputer& qdis,
608  int ef,
609  VisitedTable *vt) const
610 {
611  MaxHeap<Node> top_candidates;
612  MinHeap<Node> candidate_set;
613 
614  top_candidates.push(node);
615  candidate_set.push(node);
616 
617  vt->set(node.second);
618 
619  float lower_bound = node.first;
620 
621  while (!candidate_set.empty()) {
622  float d0;
623  storage_idx_t v0;
624  std::tie(d0, v0) = candidate_set.top();
625 
626  if (d0 > lower_bound) {
627  break;
628  }
629 
630  candidate_set.pop();
631 
632  size_t begin, end;
633  neighbor_range(v0, 0, &begin, &end);
634 
635  for (size_t j = begin; j < end; ++j) {
636  int v1 = neighbors[j];
637 
638  if (v1 < 0) {
639  break;
640  }
641  if (vt->get(v1)) {
642  continue;
643  }
644 
645  vt->set(v1);
646 
647  float d1 = qdis(v1);
648 
649  if (top_candidates.top().first > d1 || top_candidates.size() < ef) {
650  candidate_set.emplace(d1, v1);
651  top_candidates.emplace(d1, v1);
652 
653  if (top_candidates.size() > ef) {
654  top_candidates.pop();
655  }
656 
657  lower_bound = top_candidates.top().first;
658  }
659  }
660  }
661 
662  return top_candidates;
663 }
664 
665 void HNSW::search(DistanceComputer& qdis, int k,
666  idx_t *I, float *D,
667  VisitedTable& vt) const
668 {
669  if (upper_beam == 1) {
670 
671  // greedy search on upper levels
672  storage_idx_t nearest = entry_point;
673  float d_nearest = qdis(nearest);
674 
675  for(int level = max_level; level >= 1; level--) {
676  greedy_update_nearest(*this, qdis, level, nearest, d_nearest);
677  }
678 
679  int ef = std::max(efSearch, k);
680  MaxHeap<Node> top_candidates = search_from(Node(d_nearest, nearest), qdis, ef, &vt);
681  while (top_candidates.size() > k) {
682  top_candidates.pop();
683  }
684 
685  int nres = 0;
686  while (!top_candidates.empty()) {
687  float d;
688  storage_idx_t label;
689  std::tie(d, label) = top_candidates.top();
690  faiss::maxheap_push(++nres, D, I, d, label);
691  top_candidates.pop();
692  }
693 
694  // MinimaxHeap candidates(candidates_size);
695 
696 // top_candidates.emplace(d_nearest, nearest);
697 
698  // search_from_candidates(qdis, k, I, D, candidates, vt, 0);
699 
700  // NOTE(hoss): Init at the beginning?
701  vt.advance();
702 
703  } else {
704  assert(false);
705 
706  int candidates_size = upper_beam;
707  MinimaxHeap candidates(candidates_size);
708 
709  std::vector<idx_t> I_to_next(candidates_size);
710  std::vector<float> D_to_next(candidates_size);
711 
712  int nres = 1;
713  I_to_next[0] = entry_point;
714  D_to_next[0] = qdis(entry_point);
715 
716  for(int level = max_level; level >= 0; level--) {
717 
718  // copy I, D -> candidates
719 
720  candidates.clear();
721 
722  for (int i = 0; i < nres; i++) {
723  candidates.push(I_to_next[i], D_to_next[i]);
724  }
725 
726  if (level == 0) {
727  nres = search_from_candidates(qdis, k, I, D, candidates, vt, 0);
728  } else {
729  nres = search_from_candidates(
730  qdis, candidates_size,
731  I_to_next.data(), D_to_next.data(),
732  candidates, vt, level
733  );
734  }
735  vt.advance();
736  }
737  }
738 }
739 
740 
741 void HNSW::MinimaxHeap::push(storage_idx_t i, float v) {
742  if (k == n) {
743  if (v >= dis[0]) return;
744  faiss::heap_pop<HC> (k--, dis.data(), ids.data());
745  }
746  faiss::heap_push<HC> (++k, dis.data(), ids.data(), v, i);
747 }
748 
749 float HNSW::MinimaxHeap::max() const {
750  assert(k > 0);
751 
752  return dis[0];
753 }
754 
755 int HNSW::MinimaxHeap::size() const {
756  return k;
757 }
758 
759 void HNSW::MinimaxHeap::clear() {
760  k = 0;
761 }
762 
763 int HNSW::MinimaxHeap::pop_min(float *vmin_out) {
764  assert(k > 0);
765  // returns min. This is an O(n) operation
766  int i = k - 1;
767  int imin = i;
768  float vmin = dis[i];
769  i--;
770  while(i >= 0) {
771  if (dis[i] < vmin) {
772  vmin = dis[i];
773  imin = i;
774  }
775  i--;
776  }
777  assert(2 * i > k);
778  if (vmin_out) *vmin_out = vmin;
779  int ret = ids[imin];
780 
781  --k;
782  faiss::heap_push<HC>(++imin, dis.data(), ids.data(), ids[k], dis[k]);
783 
784  return ret;
785 }
786 
787 int HNSW::MinimaxHeap::count_below(float thresh) {
788  int n_below = 0;
789  for(int i = 0; i < k; i++) {
790  if (dis[i] < thresh) {
791  n_below++;
792  }
793  }
794 
795  return n_below;
796 }
797 
798 
799 } // namespace faiss
random generator that can be used in multithreaded contexts
Definition: utils.h:48
void add_with_locks(DistanceComputer &ptdis, int pt_level, int pt_id, std::vector< omp_lock_t > &locks, VisitedTable &vt)
Definition: HNSW.cpp:479
void neighbor_range(idx_t no, int layer_no, size_t *begin, size_t *end) const
range of entries in the neighbors table of vertex no at layer_no
Definition: HNSW.cpp:43
bool get(int no) const
get flag #no
Definition: HNSW.h:248
int nb_neighbors(int layer_no) const
nb of neighbors for this level
Definition: HNSW.cpp:23
storage_idx_t entry_point
entry point in the search structure (one of the points with maximum level
Definition: HNSW.h:137
int cum_nb_neighbors(int layer_no) const
cumumlative nb up to (and excluding) this level
Definition: HNSW.cpp:38
Index::idx_t idx_t
Faiss results are 64-bit.
Definition: HNSW.h:52
virtual float symmetric_dis(storage_idx_t i, storage_idx_t j)=0
compute distance between two stored vectors
std::vector< double > assign_probas
assignment probability to each layer (sum=1)
Definition: HNSW.h:119
float rand_float()
between 0 and 1
Definition: utils.cpp:130
std::vector< int > cum_nneighbor_per_level
Definition: HNSW.h:123
void advance()
reset all flags to false
Definition: HNSW.h:253
void add_links_starting_from(DistanceComputer &ptdis, storage_idx_t pt_id, storage_idx_t nearest, float d_nearest, int level, omp_lock_t *locks, VisitedTable &vt)
Definition: HNSW.cpp:443
std::vector< size_t > offsets
Definition: HNSW.h:130
set implementation optimized for fast access.
Definition: HNSW.h:235
int rand_int()
random positive integer
Definition: utils.cpp:115
int efSearch
expansion factor at search time
Definition: HNSW.h:148
long idx_t
all indices are this type
Definition: Index.h:64
HNSW(int M=32)
only mandatory parameter: nb of neighbors
Definition: HNSW.cpp:53
to sort pairs of (id, distance) from nearest to fathest or the reverse
Definition: HNSW.h:103
int upper_beam
number of entry points in levels &gt; 0.
Definition: HNSW.h:151
void set_nb_neighbors(int level_no, int n)
set nb of neighbors for this level (before adding anything)
Definition: HNSW.cpp:29
int search_from_candidates(DistanceComputer &qdis, int k, idx_t *I, float *D, MinimaxHeap &candidates, VisitedTable &vt, int level, int nres_in=0) const
Definition: HNSW.cpp:525
int random_level()
pick a random level for a new point
Definition: HNSW.cpp:64
void set_default_probas(int M, float levelMult)
Definition: HNSW.cpp:78
void search(DistanceComputer &qdis, int k, idx_t *I, float *D, VisitedTable &vt) const
search interface
Definition: HNSW.cpp:665
void fill_with_random_links(size_t n)
add n random levels to table (for debugging...)
Definition: HNSW.cpp:172
std::vector< storage_idx_t > neighbors
Definition: HNSW.h:134
int efConstruction
expansion factor at construction time
Definition: HNSW.h:145
int storage_idx_t
internal storage of vectors (32 bits: this is expensive)
Definition: HNSW.h:49
void set(int no)
set flog #no to true
Definition: HNSW.h:243
std::vector< int > levels
level of each vector (base level = 1), size = ntotal
Definition: HNSW.h:126
int max_level
maximum level
Definition: HNSW.h:142
static void shrink_neighbor_list(DistanceComputer &qdis, std::priority_queue< NodeDistFarther > &input, std::vector< NodeDistFarther > &output, int max_size)
Definition: HNSW.cpp:237