13 #include <unordered_set>
19 #include "FaissAssert.h"
44 struct DistanceComputer;
53 typedef std::pair<float, storage_idx_t> Node;
62 std::vector<storage_idx_t> ids;
63 std::vector<float> dis;
66 explicit MinimaxHeap(
int n): n(n), k(0), nvalid(0), ids(n), dis(n) {}
76 int pop_min(
float *vmin_out =
nullptr);
78 int count_below(
float thresh);
87 bool operator < (
const NodeDistCloser &obj1)
const {
return d < obj1.d; }
94 bool operator < (
const NodeDistFarther &obj1)
const {
return d > obj1.d; }
158 size_t * begin,
size_t * end)
const;
161 explicit HNSW(
int M = 32);
181 std::vector<omp_lock_t>& locks,
188 int level,
int nres_in = 0)
const;
190 std::priority_queue<Node> search_from_candidate_unbounded(
204 void clear_neighbor_tables(
int level);
205 void print_neighbor_stats(
int level)
const;
207 int prepare_level_tab(
size_t n,
bool preset_levels =
false);
211 std::priority_queue<NodeDistFarther>& input,
212 std::vector<NodeDistFarther>& output,
224 std::vector<uint8_t> visited;
228 : visited(size), visno(1) {}
236 bool get(
int no)
const {
237 return visited[no] == visno;
245 memset(visited.data(), 0,
sizeof(visited[0]) * visited.size());
random generator that can be used in multithreaded contexts
void add_with_locks(DistanceComputer &ptdis, int pt_level, int pt_id, std::vector< omp_lock_t > &locks, VisitedTable &vt)
void neighbor_range(idx_t no, int layer_no, size_t *begin, size_t *end) const
range of entries in the neighbors table of vertex no at layer_no
int nb_neighbors(int layer_no) const
nb of neighbors for this level
storage_idx_t entry_point
entry point in the search structure (one of the points with maximum level
int cum_nb_neighbors(int layer_no) const
cumumlative nb up to (and excluding) this level
Index::idx_t idx_t
Faiss results are 64-bit.
std::vector< double > assign_probas
assignment probability to each layer (sum=1)
bool search_bounded_queue
use bounded queue during exploration
std::vector< int > cum_nneighbor_per_level
void advance()
reset all flags to false
void add_links_starting_from(DistanceComputer &ptdis, storage_idx_t pt_id, storage_idx_t nearest, float d_nearest, int level, omp_lock_t *locks, VisitedTable &vt)
long idx_t
all indices are this type
std::vector< size_t > offsets
set implementation optimized for fast access.
int efSearch
expansion factor at search time
bool check_relative_distance
during search: do we check whether the next best distance is good enough?
HNSW(int M=32)
only mandatory parameter: nb of neighbors
to sort pairs of (id, distance) from nearest to fathest or the reverse
int upper_beam
number of entry points in levels > 0.
void set_nb_neighbors(int level_no, int n)
set nb of neighbors for this level (before adding anything)
int search_from_candidates(DistanceComputer &qdis, int k, idx_t *I, float *D, MinimaxHeap &candidates, VisitedTable &vt, int level, int nres_in=0) const
int random_level()
pick a random level for a new point
void set_default_probas(int M, float levelMult)
void search(DistanceComputer &qdis, int k, idx_t *I, float *D, VisitedTable &vt) const
search interface
void fill_with_random_links(size_t n)
add n random levels to table (for debugging...)
std::vector< storage_idx_t > neighbors
int efConstruction
expansion factor at construction time
int storage_idx_t
internal storage of vectors (32 bits: this is expensive)
void set(int no)
set flog #no to true
std::vector< int > levels
level of each vector (base level = 1), size = ntotal
int max_level
maximum level
static void shrink_neighbor_list(DistanceComputer &qdis, std::priority_queue< NodeDistFarther > &input, std::vector< NodeDistFarther > &output, int max_size)