Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/hoss/faiss/HNSW.cpp
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 // -*- c++ -*-
9 
10 #include "HNSW.h"
11 #include "AuxIndexStructures.h"
12 
13 namespace faiss {
14 
15 using idx_t = Index::idx_t;
16 
17 /**************************************************************
18  * HNSW structure implementation
19  **************************************************************/
20 
21 int HNSW::nb_neighbors(int layer_no) const
22 {
23  return cum_nneighbor_per_level[layer_no + 1] -
24  cum_nneighbor_per_level[layer_no];
25 }
26 
27 void HNSW::set_nb_neighbors(int level_no, int n)
28 {
29  FAISS_THROW_IF_NOT(levels.size() == 0);
30  int cur_n = nb_neighbors(level_no);
31  for (int i = level_no + 1; i < cum_nneighbor_per_level.size(); i++) {
32  cum_nneighbor_per_level[i] += n - cur_n;
33  }
34 }
35 
36 int HNSW::cum_nb_neighbors(int layer_no) const
37 {
38  return cum_nneighbor_per_level[layer_no];
39 }
40 
41 void HNSW::neighbor_range(idx_t no, int layer_no,
42  size_t * begin, size_t * end) const
43 {
44  size_t o = offsets[no];
45  *begin = o + cum_nb_neighbors(layer_no);
46  *end = o + cum_nb_neighbors(layer_no + 1);
47 }
48 
49 
50 
51 HNSW::HNSW(int M) : rng(12345) {
52  set_default_probas(M, 1.0 / log(M));
53  max_level = -1;
54  entry_point = -1;
55  efSearch = 16;
56  efConstruction = 40;
57  upper_beam = 1;
58  offsets.push_back(0);
59 }
60 
61 
63 {
64  double f = rng.rand_float();
65  // could be a bit faster with bissection
66  for (int level = 0; level < assign_probas.size(); level++) {
67  if (f < assign_probas[level]) {
68  return level;
69  }
70  f -= assign_probas[level];
71  }
72  // happens with exponentially low probability
73  return assign_probas.size() - 1;
74 }
75 
76 void HNSW::set_default_probas(int M, float levelMult)
77 {
78  int nn = 0;
79  cum_nneighbor_per_level.push_back (0);
80  for (int level = 0; ;level++) {
81  float proba = exp(-level / levelMult) * (1 - exp(-1 / levelMult));
82  if (proba < 1e-9) break;
83  assign_probas.push_back(proba);
84  nn += level == 0 ? M * 2 : M;
85  cum_nneighbor_per_level.push_back (nn);
86  }
87 }
88 
89 void HNSW::clear_neighbor_tables(int level)
90 {
91  for (int i = 0; i < levels.size(); i++) {
92  size_t begin, end;
93  neighbor_range(i, level, &begin, &end);
94  for (size_t j = begin; j < end; j++) {
95  neighbors[j] = -1;
96  }
97  }
98 }
99 
100 
101 void HNSW::reset() {
102  max_level = -1;
103  entry_point = -1;
104  offsets.clear();
105  offsets.push_back(0);
106  levels.clear();
107  neighbors.clear();
108 }
109 
110 
111 
112 void HNSW::print_neighbor_stats(int level) const
113 {
114  FAISS_THROW_IF_NOT (level < cum_nneighbor_per_level.size());
115  printf("stats on level %d, max %d neighbors per vertex:\n",
116  level, nb_neighbors(level));
117  size_t tot_neigh = 0, tot_common = 0, tot_reciprocal = 0, n_node = 0;
118 #pragma omp parallel for reduction(+: tot_neigh) reduction(+: tot_common) \
119  reduction(+: tot_reciprocal) reduction(+: n_node)
120  for (int i = 0; i < levels.size(); i++) {
121  if (levels[i] > level) {
122  n_node++;
123  size_t begin, end;
124  neighbor_range(i, level, &begin, &end);
125  std::unordered_set<int> neighset;
126  for (size_t j = begin; j < end; j++) {
127  if (neighbors [j] < 0) break;
128  neighset.insert(neighbors[j]);
129  }
130  int n_neigh = neighset.size();
131  int n_common = 0;
132  int n_reciprocal = 0;
133  for (size_t j = begin; j < end; j++) {
134  storage_idx_t i2 = neighbors[j];
135  if (i2 < 0) break;
136  FAISS_ASSERT(i2 != i);
137  size_t begin2, end2;
138  neighbor_range(i2, level, &begin2, &end2);
139  for (size_t j2 = begin2; j2 < end2; j2++) {
140  storage_idx_t i3 = neighbors[j2];
141  if (i3 < 0) break;
142  if (i3 == i) {
143  n_reciprocal++;
144  continue;
145  }
146  if (neighset.count(i3)) {
147  neighset.erase(i3);
148  n_common++;
149  }
150  }
151  }
152  tot_neigh += n_neigh;
153  tot_common += n_common;
154  tot_reciprocal += n_reciprocal;
155  }
156  }
157  float normalizer = n_node;
158  printf(" nb of nodes at that level %ld\n", n_node);
159  printf(" neighbors per node: %.2f (%ld)\n",
160  tot_neigh / normalizer, tot_neigh);
161  printf(" nb of reciprocal neighbors: %.2f\n", tot_reciprocal / normalizer);
162  printf(" nb of neighbors that are also neighbor-of-neighbors: %.2f (%ld)\n",
163  tot_common / normalizer, tot_common);
164 
165 
166 
167 }
168 
169 
171 {
172  int max_level = prepare_level_tab(n);
173  RandomGenerator rng2(456);
174 
175  for (int level = max_level - 1; level >= 0; --level) {
176  std::vector<int> elts;
177  for (int i = 0; i < n; i++) {
178  if (levels[i] > level) {
179  elts.push_back(i);
180  }
181  }
182  printf ("linking %ld elements in level %d\n",
183  elts.size(), level);
184 
185  if (elts.size() == 1) continue;
186 
187  for (int ii = 0; ii < elts.size(); ii++) {
188  int i = elts[ii];
189  size_t begin, end;
190  neighbor_range(i, 0, &begin, &end);
191  for (size_t j = begin; j < end; j++) {
192  int other = 0;
193  do {
194  other = elts[rng2.rand_int(elts.size())];
195  } while(other == i);
196 
197  neighbors[j] = other;
198  }
199  }
200  }
201 }
202 
203 
204 int HNSW::prepare_level_tab(size_t n, bool preset_levels)
205 {
206  size_t n0 = offsets.size() - 1;
207 
208  if (preset_levels) {
209  FAISS_ASSERT (n0 + n == levels.size());
210  } else {
211  FAISS_ASSERT (n0 == levels.size());
212  for (int i = 0; i < n; i++) {
213  int pt_level = random_level();
214  levels.push_back(pt_level + 1);
215  }
216  }
217 
218  int max_level = 0;
219  for (int i = 0; i < n; i++) {
220  int pt_level = levels[i + n0] - 1;
221  if (pt_level > max_level) max_level = pt_level;
222  offsets.push_back(offsets.back() +
223  cum_nb_neighbors(pt_level + 1));
224  neighbors.resize(offsets.back(), -1);
225  }
226 
227  return max_level;
228 }
229 
230 
231 /** Enumerate vertices from farthest to nearest from query, keep a
232  * neighbor only if there is no previous neighbor that is closer to
233  * that vertex than the query.
234  */
236  DistanceComputer& qdis,
237  std::priority_queue<NodeDistFarther>& input,
238  std::vector<NodeDistFarther>& output,
239  int max_size)
240 {
241  while (input.size() > 0) {
242  NodeDistFarther v1 = input.top();
243  input.pop();
244  float dist_v1_q = v1.d;
245 
246  bool good = true;
247  for (NodeDistFarther v2 : output) {
248  float dist_v1_v2 = qdis.symmetric_dis(v2.id, v1.id);
249 
250  if (dist_v1_v2 < dist_v1_q) {
251  good = false;
252  break;
253  }
254  }
255 
256  if (good) {
257  output.push_back(v1);
258  if (output.size() >= max_size) {
259  return;
260  }
261  }
262  }
263 }
264 
265 
266 namespace {
267 
268 
269 using storage_idx_t = HNSW::storage_idx_t;
272 
273 
274 /**************************************************************
275  * Addition subroutines
276  **************************************************************/
277 
278 
279 /// remove neighbors from the list to make it smaller than max_size
280 void shrink_neighbor_list(
281  DistanceComputer& qdis,
282  std::priority_queue<NodeDistCloser>& resultSet1,
283  int max_size)
284 {
285  if (resultSet1.size() < max_size) {
286  return;
287  }
288  std::priority_queue<NodeDistFarther> resultSet;
289  std::vector<NodeDistFarther> returnlist;
290 
291  while (resultSet1.size() > 0) {
292  resultSet.emplace(resultSet1.top().d, resultSet1.top().id);
293  resultSet1.pop();
294  }
295 
296  HNSW::shrink_neighbor_list(qdis, resultSet, returnlist, max_size);
297 
298  for (NodeDistFarther curen2 : returnlist) {
299  resultSet1.emplace(curen2.d, curen2.id);
300  }
301 
302 }
303 
304 
305 /// add a link between two elements, possibly shrinking the list
306 /// of links to make room for it.
307 void add_link(HNSW& hnsw,
308  DistanceComputer& qdis,
309  storage_idx_t src, storage_idx_t dest,
310  int level)
311 {
312  size_t begin, end;
313  hnsw.neighbor_range(src, level, &begin, &end);
314  if (hnsw.neighbors[end - 1] == -1) {
315  // there is enough room, find a slot to add it
316  size_t i = end;
317  while(i > begin) {
318  if (hnsw.neighbors[i - 1] != -1) break;
319  i--;
320  }
321  hnsw.neighbors[i] = dest;
322  return;
323  }
324 
325  // otherwise we let them fight out which to keep
326 
327  // copy to resultSet...
328  std::priority_queue<NodeDistCloser> resultSet;
329  resultSet.emplace(qdis.symmetric_dis(src, dest), dest);
330  for (size_t i = begin; i < end; i++) { // HERE WAS THE BUG
331  storage_idx_t neigh = hnsw.neighbors[i];
332  resultSet.emplace(qdis.symmetric_dis(src, neigh), neigh);
333  }
334 
335  shrink_neighbor_list(qdis, resultSet, end - begin);
336 
337  // ...and back
338  size_t i = begin;
339  while (resultSet.size()) {
340  hnsw.neighbors[i++] = resultSet.top().id;
341  resultSet.pop();
342  }
343  // they may have shrunk more than just by 1 element
344  while(i < end) {
345  hnsw.neighbors[i++] = -1;
346  }
347 }
348 
349 /// search neighbors on a single level, starting from an entry point
350 void search_neighbors_to_add(
351  HNSW& hnsw,
352  DistanceComputer& qdis,
353  std::priority_queue<NodeDistCloser>& results,
354  int entry_point,
355  float d_entry_point,
356  int level,
357  VisitedTable &vt)
358 {
359  // top is nearest candidate
360  std::priority_queue<NodeDistFarther> candidates;
361 
362  NodeDistFarther ev(d_entry_point, entry_point);
363  candidates.push(ev);
364  results.emplace(d_entry_point, entry_point);
365  vt.set(entry_point);
366 
367  while (!candidates.empty()) {
368  // get nearest
369  const NodeDistFarther &currEv = candidates.top();
370 
371  if (currEv.d > results.top().d) {
372  break;
373  }
374  int currNode = currEv.id;
375  candidates.pop();
376 
377  // loop over neighbors
378  size_t begin, end;
379  hnsw.neighbor_range(currNode, level, &begin, &end);
380  for(size_t i = begin; i < end; i++) {
381  storage_idx_t nodeId = hnsw.neighbors[i];
382  if (nodeId < 0) break;
383  if (vt.get(nodeId)) continue;
384  vt.set(nodeId);
385 
386  float dis = qdis(nodeId);
387  NodeDistFarther evE1(dis, nodeId);
388 
389  if (results.size() < hnsw.efConstruction ||
390  results.top().d > dis) {
391 
392  results.emplace(dis, nodeId);
393  candidates.emplace(dis, nodeId);
394  if (results.size() > hnsw.efConstruction) {
395  results.pop();
396  }
397  }
398  }
399  }
400  vt.advance();
401 }
402 
403 
404 /**************************************************************
405  * Searching subroutines
406  **************************************************************/
407 
408 /// greedily update a nearest vector at a given level
409 void greedy_update_nearest(const HNSW& hnsw,
410  DistanceComputer& qdis,
411  int level,
412  storage_idx_t& nearest,
413  float& d_nearest)
414 {
415  for(;;) {
416  storage_idx_t prev_nearest = nearest;
417 
418  size_t begin, end;
419  hnsw.neighbor_range(nearest, level, &begin, &end);
420  for(size_t i = begin; i < end; i++) {
421  storage_idx_t v = hnsw.neighbors[i];
422  if (v < 0) break;
423  float dis = qdis(v);
424  if (dis < d_nearest) {
425  nearest = v;
426  d_nearest = dis;
427  }
428  }
429  if (nearest == prev_nearest) {
430  return;
431  }
432  }
433 }
434 
435 
436 } // namespace
437 
438 
439 /// Finds neighbors and builds links with them, starting from an entry
440 /// point. The own neighbor list is assumed to be locked.
442  storage_idx_t pt_id,
443  storage_idx_t nearest,
444  float d_nearest,
445  int level,
446  omp_lock_t *locks,
447  VisitedTable &vt)
448 {
449  std::priority_queue<NodeDistCloser> link_targets;
450 
451  search_neighbors_to_add(*this, ptdis, link_targets, nearest, d_nearest,
452  level, vt);
453 
454  // but we can afford only this many neighbors
455  int M = nb_neighbors(level);
456 
457  ::faiss::shrink_neighbor_list(ptdis, link_targets, M);
458 
459  while (!link_targets.empty()) {
460  int other_id = link_targets.top().id;
461 
462  omp_set_lock(&locks[other_id]);
463  add_link(*this, ptdis, other_id, pt_id, level);
464  omp_unset_lock(&locks[other_id]);
465 
466  add_link(*this, ptdis, pt_id, other_id, level);
467 
468  link_targets.pop();
469  }
470 }
471 
472 
473 /**************************************************************
474  * Building, parallel
475  **************************************************************/
476 
477 void HNSW::add_with_locks(DistanceComputer& ptdis, int pt_level, int pt_id,
478  std::vector<omp_lock_t>& locks,
479  VisitedTable& vt)
480 {
481  // greedy search on upper levels
482 
483  storage_idx_t nearest;
484 #pragma omp critical
485  {
486  nearest = entry_point;
487 
488  if (nearest == -1) {
489  max_level = pt_level;
490  entry_point = pt_id;
491  }
492  }
493 
494  if (nearest < 0) {
495  return;
496  }
497 
498  omp_set_lock(&locks[pt_id]);
499 
500  int level = max_level; // level at which we start adding neighbors
501  float d_nearest = ptdis(nearest);
502 
503  for(; level > pt_level; level--) {
504  greedy_update_nearest(*this, ptdis, level, nearest, d_nearest);
505  }
506 
507  for(; level >= 0; level--) {
508  add_links_starting_from(ptdis, pt_id, nearest, d_nearest,
509  level, locks.data(), vt);
510  }
511 
512  omp_unset_lock(&locks[pt_id]);
513 
514  if (pt_level > max_level) {
515  max_level = pt_level;
516  entry_point = pt_id;
517  }
518 }
519 
520 
521 /** Do a BFS on the candidates list */
522 
524  DistanceComputer& qdis, int k,
525  idx_t *I, float *D,
526  MinimaxHeap& candidates,
527  VisitedTable& vt,
528  int level, int nres_in) const
529 {
530  int nres = nres_in;
531  int ndis = 0;
532  for (int i = 0; i < candidates.size(); i++) {
533  idx_t v1 = candidates.ids[i];
534  float d = candidates.dis[i];
535  FAISS_ASSERT(v1 >= 0);
536  if (nres < k) {
537  faiss::maxheap_push(++nres, D, I, d, v1);
538  } else if (d < D[0]) {
539  faiss::maxheap_pop(nres--, D, I);
540  faiss::maxheap_push(++nres, D, I, d, v1);
541  }
542  vt.set(v1);
543  }
544 
545  bool do_dis_check = check_relative_distance;
546  int nstep = 0;
547 
548  while (candidates.size() > 0) {
549  float d0 = 0;
550  int v0 = candidates.pop_min(&d0);
551 
552  if (do_dis_check) {
553  // tricky stopping condition: there are more that ef
554  // distances that are processed already that are smaller
555  // than d0
556 
557  int n_dis_below = candidates.count_below(d0);
558  if(n_dis_below >= efSearch) {
559  break;
560  }
561  }
562 
563  size_t begin, end;
564  neighbor_range(v0, level, &begin, &end);
565 
566  for (size_t j = begin; j < end; j++) {
567  int v1 = neighbors[j];
568  if (v1 < 0) break;
569  if (vt.get(v1)) {
570  continue;
571  }
572  vt.set(v1);
573  ndis++;
574  float d = qdis(v1);
575  if (nres < k) {
576  faiss::maxheap_push(++nres, D, I, d, v1);
577  } else if (d < D[0]) {
578  faiss::maxheap_pop(nres--, D, I);
579  faiss::maxheap_push(++nres, D, I, d, v1);
580  }
581  candidates.push(v1, d);
582  }
583 
584  nstep++;
585  if (!do_dis_check && nstep > efSearch) {
586  break;
587  }
588  }
589 
590  if (level == 0) {
591 #pragma omp critical
592  {
593  hnsw_stats.n1 ++;
594  if (candidates.size() == 0) {
595  hnsw_stats.n2 ++;
596  }
597  hnsw_stats.n3 += ndis;
598  }
599  }
600 
601  return nres;
602 }
603 
604 
605 /**************************************************************
606  * Searching
607  **************************************************************/
608 
609 std::priority_queue<HNSW::Node> HNSW::search_from_candidate_unbounded(
610  const Node& node,
611  DistanceComputer& qdis,
612  int ef,
613  VisitedTable *vt) const
614 {
615  int ndis = 0;
616  std::priority_queue<Node> top_candidates;
617  std::priority_queue<Node, std::vector<Node>, std::greater<Node>> candidates;
618 
619  top_candidates.push(node);
620  candidates.push(node);
621 
622  vt->set(node.second);
623 
624  while (!candidates.empty()) {
625  float d0;
626  storage_idx_t v0;
627  std::tie(d0, v0) = candidates.top();
628 
629  if (d0 > top_candidates.top().first) {
630  break;
631  }
632 
633  candidates.pop();
634 
635  size_t begin, end;
636  neighbor_range(v0, 0, &begin, &end);
637 
638  for (size_t j = begin; j < end; ++j) {
639  int v1 = neighbors[j];
640 
641  if (v1 < 0) {
642  break;
643  }
644  if (vt->get(v1)) {
645  continue;
646  }
647 
648  vt->set(v1);
649 
650  float d1 = qdis(v1);
651  ++ndis;
652 
653  if (top_candidates.top().first > d1 || top_candidates.size() < ef) {
654  candidates.emplace(d1, v1);
655  top_candidates.emplace(d1, v1);
656 
657  if (top_candidates.size() > ef) {
658  top_candidates.pop();
659  }
660  }
661  }
662  }
663 
664 #pragma omp critical
665  {
666  ++hnsw_stats.n1;
667  if (candidates.size() == 0) {
668  ++hnsw_stats.n2;
669  }
670  hnsw_stats.n3 += ndis;
671  }
672 
673  return top_candidates;
674 }
675 
676 void HNSW::search(DistanceComputer& qdis, int k,
677  idx_t *I, float *D,
678  VisitedTable& vt) const
679 {
680  if (upper_beam == 1) {
681 
682  // greedy search on upper levels
683  storage_idx_t nearest = entry_point;
684  float d_nearest = qdis(nearest);
685 
686  for(int level = max_level; level >= 1; level--) {
687  greedy_update_nearest(*this, qdis, level, nearest, d_nearest);
688  }
689 
690  int ef = std::max(efSearch, k);
691  if (search_bounded_queue) {
692  MinimaxHeap candidates(ef);
693 
694  candidates.push(nearest, d_nearest);
695 
696  search_from_candidates(qdis, k, I, D, candidates, vt, 0);
697  } else {
698  std::priority_queue<Node> top_candidates =
699  search_from_candidate_unbounded(Node(d_nearest, nearest),
700  qdis, ef, &vt);
701 
702  while (top_candidates.size() > k) {
703  top_candidates.pop();
704  }
705 
706  int nres = 0;
707  while (!top_candidates.empty()) {
708  float d;
709  storage_idx_t label;
710  std::tie(d, label) = top_candidates.top();
711  faiss::maxheap_push(++nres, D, I, d, label);
712  top_candidates.pop();
713  }
714  }
715 
716  vt.advance();
717 
718  } else {
719  int candidates_size = upper_beam;
720  MinimaxHeap candidates(candidates_size);
721 
722  std::vector<idx_t> I_to_next(candidates_size);
723  std::vector<float> D_to_next(candidates_size);
724 
725  int nres = 1;
726  I_to_next[0] = entry_point;
727  D_to_next[0] = qdis(entry_point);
728 
729  for(int level = max_level; level >= 0; level--) {
730 
731  // copy I, D -> candidates
732 
733  candidates.clear();
734 
735  for (int i = 0; i < nres; i++) {
736  candidates.push(I_to_next[i], D_to_next[i]);
737  }
738 
739  if (level == 0) {
740  nres = search_from_candidates(qdis, k, I, D, candidates, vt, 0);
741  } else {
742  nres = search_from_candidates(
743  qdis, candidates_size,
744  I_to_next.data(), D_to_next.data(),
745  candidates, vt, level
746  );
747  }
748  vt.advance();
749  }
750  }
751 }
752 
753 
754 void HNSW::MinimaxHeap::push(storage_idx_t i, float v) {
755  if (k == n) {
756  if (v >= dis[0]) return;
757  faiss::heap_pop<HC> (k--, dis.data(), ids.data());
758  --nvalid;
759  }
760  faiss::heap_push<HC> (++k, dis.data(), ids.data(), v, i);
761  ++nvalid;
762 }
763 
764 float HNSW::MinimaxHeap::max() const {
765  return dis[0];
766 }
767 
768 int HNSW::MinimaxHeap::size() const {
769  return nvalid;
770 }
771 
772 void HNSW::MinimaxHeap::clear() {
773  nvalid = k = 0;
774 }
775 
776 int HNSW::MinimaxHeap::pop_min(float *vmin_out) {
777  assert(k > 0);
778  // returns min. This is an O(n) operation
779  int i = k - 1;
780  while (i >= 0) {
781  if (ids[i] != -1) break;
782  i--;
783  }
784  if (i == -1) return -1;
785  int imin = i;
786  float vmin = dis[i];
787  i--;
788  while(i >= 0) {
789  if (ids[i] != -1 && dis[i] < vmin) {
790  vmin = dis[i];
791  imin = i;
792  }
793  i--;
794  }
795  if (vmin_out) *vmin_out = vmin;
796  int ret = ids[imin];
797  ids[imin] = -1;
798  --nvalid;
799 
800  return ret;
801 }
802 
803 int HNSW::MinimaxHeap::count_below(float thresh) {
804  int n_below = 0;
805  for(int i = 0; i < k; i++) {
806  if (dis[i] < thresh) {
807  n_below++;
808  }
809  }
810 
811  return n_below;
812 }
813 
814 
815 } // namespace faiss
random generator that can be used in multithreaded contexts
Definition: utils.h:47
void add_with_locks(DistanceComputer &ptdis, int pt_level, int pt_id, std::vector< omp_lock_t > &locks, VisitedTable &vt)
Definition: HNSW.cpp:477
virtual float symmetric_dis(idx_t i, idx_t j)=0
compute distance between two stored vectors
void neighbor_range(idx_t no, int layer_no, size_t *begin, size_t *end) const
range of entries in the neighbors table of vertex no at layer_no
Definition: HNSW.cpp:41
bool get(int no) const
get flag #no
Definition: HNSW.h:236
int nb_neighbors(int layer_no) const
nb of neighbors for this level
Definition: HNSW.cpp:21
storage_idx_t entry_point
entry point in the search structure (one of the points with maximum level
Definition: HNSW.h:117
int cum_nb_neighbors(int layer_no) const
cumumlative nb up to (and excluding) this level
Definition: HNSW.cpp:36
Index::idx_t idx_t
Faiss results are 64-bit.
Definition: HNSW.h:51
std::vector< double > assign_probas
assignment probability to each layer (sum=1)
Definition: HNSW.h:99
float rand_float()
between 0 and 1
Definition: utils.cpp:129
bool search_bounded_queue
use bounded queue during exploration
Definition: HNSW.h:137
std::vector< int > cum_nneighbor_per_level
Definition: HNSW.h:103
void advance()
reset all flags to false
Definition: HNSW.h:241
void add_links_starting_from(DistanceComputer &ptdis, storage_idx_t pt_id, storage_idx_t nearest, float d_nearest, int level, omp_lock_t *locks, VisitedTable &vt)
Definition: HNSW.cpp:441
long idx_t
all indices are this type
Definition: Index.h:62
std::vector< size_t > offsets
Definition: HNSW.h:110
set implementation optimized for fast access.
Definition: HNSW.h:223
int rand_int()
random positive integer
Definition: utils.cpp:114
int efSearch
expansion factor at search time
Definition: HNSW.h:128
bool check_relative_distance
during search: do we check whether the next best distance is good enough?
Definition: HNSW.h:131
HNSW(int M=32)
only mandatory parameter: nb of neighbors
Definition: HNSW.cpp:51
to sort pairs of (id, distance) from nearest to fathest or the reverse
Definition: HNSW.h:83
int upper_beam
number of entry points in levels &gt; 0.
Definition: HNSW.h:134
void set_nb_neighbors(int level_no, int n)
set nb of neighbors for this level (before adding anything)
Definition: HNSW.cpp:27
int search_from_candidates(DistanceComputer &qdis, int k, idx_t *I, float *D, MinimaxHeap &candidates, VisitedTable &vt, int level, int nres_in=0) const
Definition: HNSW.cpp:523
int random_level()
pick a random level for a new point
Definition: HNSW.cpp:62
void set_default_probas(int M, float levelMult)
Definition: HNSW.cpp:76
void search(DistanceComputer &qdis, int k, idx_t *I, float *D, VisitedTable &vt) const
search interface
Definition: HNSW.cpp:676
void fill_with_random_links(size_t n)
add n random levels to table (for debugging...)
Definition: HNSW.cpp:170
std::vector< storage_idx_t > neighbors
Definition: HNSW.h:114
int efConstruction
expansion factor at construction time
Definition: HNSW.h:125
int storage_idx_t
internal storage of vectors (32 bits: this is expensive)
Definition: HNSW.h:48
void set(int no)
set flog #no to true
Definition: HNSW.h:231
std::vector< int > levels
level of each vector (base level = 1), size = ntotal
Definition: HNSW.h:106
int max_level
maximum level
Definition: HNSW.h:122
static void shrink_neighbor_list(DistanceComputer &qdis, std::priority_queue< NodeDistFarther > &input, std::vector< NodeDistFarther > &output, int max_size)
Definition: HNSW.cpp:235