17 using DistanceComputer = HNSW::DistanceComputer;
31 FAISS_THROW_IF_NOT(
levels.size() == 0);
44 size_t * begin,
size_t * end)
const
82 for (
int level = 0; ;level++) {
83 float proba = exp(-level / levelMult) * (1 - exp(-1 / levelMult));
84 if (proba < 1e-9)
break;
86 nn += level == 0 ? M * 2 : M;
91 void HNSW::clear_neighbor_tables(
int level)
93 for (
int i = 0; i <
levels.size(); i++) {
96 for (
size_t j = begin; j < end; j++) {
114 void HNSW::print_neighbor_stats(
int level)
const
117 printf(
"stats on level %d, max %d neighbors per vertex:\n",
119 size_t tot_neigh = 0, tot_common = 0, tot_reciprocal = 0, n_node = 0;
120 #pragma omp parallel for reduction(+: tot_neigh) reduction(+: tot_common) \
121 reduction(+: tot_reciprocal) reduction(+: n_node)
122 for (
int i = 0; i <
levels.size(); i++) {
127 std::unordered_set<int> neighset;
128 for (
size_t j = begin; j < end; j++) {
132 int n_neigh = neighset.size();
134 int n_reciprocal = 0;
135 for (
size_t j = begin; j < end; j++) {
138 FAISS_ASSERT(i2 != i);
141 for (
size_t j2 = begin2; j2 < end2; j2++) {
148 if (neighset.count(i3)) {
154 tot_neigh += n_neigh;
155 tot_common += n_common;
156 tot_reciprocal += n_reciprocal;
159 float normalizer = n_node;
160 printf(
" nb of nodes at that level %ld\n", n_node);
161 printf(
" neighbors per node: %.2f (%ld)\n",
162 tot_neigh / normalizer, tot_neigh);
163 printf(
" nb of reciprocal neighbors: %.2f\n", tot_reciprocal / normalizer);
164 printf(
" nb of neighbors that are also neighbor-of-neighbors: %.2f (%ld)\n",
165 tot_common / normalizer, tot_common);
177 for (
int level = max_level - 1; level >= 0; level++) {
178 std::vector<int> elts;
179 for (
int i = 0; i < n; i++) {
184 printf (
"linking %ld elements in level %d\n",
187 if (elts.size() == 1)
continue;
189 for (
int ii = 0; ii < elts.size(); ii++) {
193 for (
size_t j = begin; j < end; j++) {
196 other = elts[rng2.
rand_int(elts.size())];
206 int HNSW::prepare_level_tab(
size_t n,
bool preset_levels)
208 size_t n0 =
offsets.size() - 1;
211 FAISS_ASSERT (n0 + n ==
levels.size());
213 FAISS_ASSERT (n0 ==
levels.size());
214 for (
int i = 0; i < n; i++) {
216 levels.push_back(pt_level + 1);
221 for (
int i = 0; i < n; i++) {
222 int pt_level =
levels[i + n0] - 1;
223 if (pt_level > max_level) max_level = pt_level;
239 std::priority_queue<NodeDistFarther>& input,
240 std::vector<NodeDistFarther>& output,
243 while (input.size() > 0) {
246 float dist_v1_q = v1.d;
252 if (dist_v1_v2 < dist_v1_q) {
259 output.push_back(v1);
260 if (output.size() >= max_size) {
282 void shrink_neighbor_list(
284 std::priority_queue<NodeDistCloser>& resultSet1,
287 if (resultSet1.size() < max_size) {
290 std::priority_queue<NodeDistFarther> resultSet;
291 std::vector<NodeDistFarther> returnlist;
293 while (resultSet1.size() > 0) {
294 resultSet.emplace(resultSet1.top().d, resultSet1.top().id);
300 for (NodeDistFarther curen2 : returnlist) {
301 resultSet1.emplace(curen2.d, curen2.id);
309 void add_link(HNSW& hnsw,
310 DistanceComputer& qdis,
311 storage_idx_t src, storage_idx_t dest,
315 hnsw.neighbor_range(src, level, &begin, &end);
316 if (hnsw.neighbors[end - 1] == -1) {
320 if (hnsw.neighbors[i - 1] != -1)
break;
323 hnsw.neighbors[i] = dest;
330 std::priority_queue<NodeDistCloser> resultSet;
331 resultSet.emplace(qdis.symmetric_dis(src, dest), dest);
332 for (
size_t i = begin; i < end; i++) {
333 storage_idx_t neigh = hnsw.neighbors[i];
334 resultSet.emplace(qdis.symmetric_dis(src, neigh), neigh);
337 shrink_neighbor_list(qdis, resultSet, end - begin);
341 while (resultSet.size()) {
342 hnsw.neighbors[i++] = resultSet.top().id;
347 hnsw.neighbors[i++] = -1;
352 void search_neighbors_to_add(
354 DistanceComputer& qdis,
355 std::priority_queue<NodeDistCloser>& results,
362 std::priority_queue<NodeDistFarther> candidates;
364 NodeDistFarther ev(d_entry_point, entry_point);
366 results.emplace(d_entry_point, entry_point);
369 while (!candidates.empty()) {
371 const NodeDistFarther &currEv = candidates.top();
373 if (currEv.d > results.top().d) {
376 int currNode = currEv.id;
381 hnsw.neighbor_range(currNode, level, &begin, &end);
382 for(
size_t i = begin; i < end; i++) {
383 storage_idx_t nodeId = hnsw.neighbors[i];
384 if (nodeId < 0)
break;
385 if (vt.get(nodeId))
continue;
388 float dis = qdis(nodeId);
389 NodeDistFarther evE1(dis, nodeId);
391 if (results.size() < hnsw.efConstruction ||
392 results.top().d > dis) {
394 results.emplace(dis, nodeId);
395 candidates.emplace(dis, nodeId);
396 if (results.size() > hnsw.efConstruction) {
411 void greedy_update_nearest(
const HNSW& hnsw,
412 DistanceComputer& qdis,
414 storage_idx_t& nearest,
418 storage_idx_t prev_nearest = nearest;
421 hnsw.neighbor_range(nearest, level, &begin, &end);
422 for(
size_t i = begin; i < end; i++) {
423 storage_idx_t v = hnsw.neighbors[i];
426 if (dis < d_nearest) {
431 if (nearest == prev_nearest) {
451 std::priority_queue<NodeDistCloser> link_targets;
453 search_neighbors_to_add(*
this, ptdis, link_targets, nearest, d_nearest,
459 ::faiss::shrink_neighbor_list(ptdis, link_targets, M);
461 while (!link_targets.empty()) {
462 int other_id = link_targets.top().id;
464 omp_set_lock(&locks[other_id]);
465 add_link(*
this, ptdis, other_id, pt_id, level);
466 omp_unset_lock(&locks[other_id]);
468 add_link(*
this, ptdis, pt_id, other_id, level);
480 std::vector<omp_lock_t>& locks,
491 max_level = pt_level;
500 omp_set_lock(&locks[pt_id]);
503 float d_nearest = ptdis(nearest);
505 for(; level > pt_level; level--) {
506 greedy_update_nearest(*
this, ptdis, level, nearest, d_nearest);
509 for(; level >= 0; level--) {
511 level, locks.data(), vt);
514 omp_unset_lock(&locks[pt_id]);
516 if (pt_level > max_level) {
517 max_level = pt_level;
530 int level,
int nres_in)
const
534 for (
int i = 0; i < candidates.size(); i++) {
535 idx_t v1 = candidates.ids[i];
536 float d = candidates.dis[i];
537 FAISS_ASSERT(v1 >= 0);
539 faiss::maxheap_push(++nres, D, I, d, v1);
540 }
else if (d < D[0]) {
541 faiss::maxheap_pop(nres--, D, I);
542 faiss::maxheap_push(++nres, D, I, d, v1);
549 while (candidates.size() > 0) {
551 int v0 = candidates.pop_min(&d0);
556 for (
size_t j = begin; j < end; j++) {
566 faiss::maxheap_push(++nres, D, I, d, v1);
567 }
else if (d < D[0]) {
568 faiss::maxheap_pop(nres--, D, I);
569 faiss::maxheap_push(++nres, D, I, d, v1);
571 candidates.push(v1, d);
584 if (candidates.size() == 0) {
587 hnsw_stats.n3 += ndis;
600 using MaxHeap = std::priority_queue<T, std::vector<T>, std::less<T>>;
602 using MinHeap = std::priority_queue<T, std::vector<T>, std::greater<T>>;
605 MaxHeap<HNSW::Node> HNSW::search_from(
611 MaxHeap<Node> top_candidates;
612 MinHeap<Node> candidate_set;
614 top_candidates.push(node);
615 candidate_set.push(node);
617 vt->
set(node.second);
619 float lower_bound = node.first;
621 while (!candidate_set.empty()) {
624 std::tie(d0, v0) = candidate_set.top();
626 if (d0 > lower_bound) {
635 for (
size_t j = begin; j < end; ++j) {
649 if (top_candidates.top().first > d1 || top_candidates.size() < ef) {
650 candidate_set.emplace(d1, v1);
651 top_candidates.emplace(d1, v1);
653 if (top_candidates.size() > ef) {
654 top_candidates.pop();
657 lower_bound = top_candidates.top().first;
662 return top_candidates;
673 float d_nearest = qdis(nearest);
675 for(
int level = max_level; level >= 1; level--) {
676 greedy_update_nearest(*
this, qdis, level, nearest, d_nearest);
680 MaxHeap<Node> top_candidates = search_from(Node(d_nearest, nearest), qdis, ef, &vt);
681 while (top_candidates.size() > k) {
682 top_candidates.pop();
686 while (!top_candidates.empty()) {
689 std::tie(d, label) = top_candidates.top();
690 faiss::maxheap_push(++nres, D, I, d, label);
691 top_candidates.pop();
709 std::vector<idx_t> I_to_next(candidates_size);
710 std::vector<float> D_to_next(candidates_size);
714 D_to_next[0] = qdis(entry_point);
716 for(
int level = max_level; level >= 0; level--) {
722 for (
int i = 0; i < nres; i++) {
723 candidates.push(I_to_next[i], D_to_next[i]);
730 qdis, candidates_size,
731 I_to_next.data(), D_to_next.data(),
732 candidates, vt, level
741 void HNSW::MinimaxHeap::push(storage_idx_t i,
float v) {
743 if (v >= dis[0])
return;
744 faiss::heap_pop<HC> (k--, dis.data(), ids.data());
746 faiss::heap_push<HC> (++k, dis.data(), ids.data(), v, i);
749 float HNSW::MinimaxHeap::max()
const {
755 int HNSW::MinimaxHeap::size()
const {
759 void HNSW::MinimaxHeap::clear() {
763 int HNSW::MinimaxHeap::pop_min(
float *vmin_out) {
778 if (vmin_out) *vmin_out = vmin;
782 faiss::heap_push<HC>(++imin, dis.data(), ids.data(), ids[k], dis[k]);
787 int HNSW::MinimaxHeap::count_below(
float thresh) {
789 for(
int i = 0; i < k; i++) {
790 if (dis[i] < thresh) {
random generator that can be used in multithreaded contexts
void add_with_locks(DistanceComputer &ptdis, int pt_level, int pt_id, std::vector< omp_lock_t > &locks, VisitedTable &vt)
void neighbor_range(idx_t no, int layer_no, size_t *begin, size_t *end) const
range of entries in the neighbors table of vertex no at layer_no
bool get(int no) const
get flag #no
int nb_neighbors(int layer_no) const
nb of neighbors for this level
storage_idx_t entry_point
entry point in the search structure (one of the points with maximum level
int cum_nb_neighbors(int layer_no) const
cumumlative nb up to (and excluding) this level
Index::idx_t idx_t
Faiss results are 64-bit.
virtual float symmetric_dis(storage_idx_t i, storage_idx_t j)=0
compute distance between two stored vectors
std::vector< double > assign_probas
assignment probability to each layer (sum=1)
float rand_float()
between 0 and 1
std::vector< int > cum_nneighbor_per_level
void advance()
reset all flags to false
void add_links_starting_from(DistanceComputer &ptdis, storage_idx_t pt_id, storage_idx_t nearest, float d_nearest, int level, omp_lock_t *locks, VisitedTable &vt)
std::vector< size_t > offsets
set implementation optimized for fast access.
int rand_int()
random positive integer
int efSearch
expansion factor at search time
long idx_t
all indices are this type
HNSW(int M=32)
only mandatory parameter: nb of neighbors
to sort pairs of (id, distance) from nearest to fathest or the reverse
int upper_beam
number of entry points in levels > 0.
void set_nb_neighbors(int level_no, int n)
set nb of neighbors for this level (before adding anything)
int search_from_candidates(DistanceComputer &qdis, int k, idx_t *I, float *D, MinimaxHeap &candidates, VisitedTable &vt, int level, int nres_in=0) const
int random_level()
pick a random level for a new point
void set_default_probas(int M, float levelMult)
void search(DistanceComputer &qdis, int k, idx_t *I, float *D, VisitedTable &vt) const
search interface
void fill_with_random_links(size_t n)
add n random levels to table (for debugging...)
std::vector< storage_idx_t > neighbors
int efConstruction
expansion factor at construction time
int storage_idx_t
internal storage of vectors (32 bits: this is expensive)
void set(int no)
set flog #no to true
std::vector< int > levels
level of each vector (base level = 1), size = ntotal
int max_level
maximum level
static void shrink_neighbor_list(DistanceComputer &qdis, std::priority_queue< NodeDistFarther > &input, std::vector< NodeDistFarther > &output, int max_size)