14 #include <unordered_set>
20 #include "FaissAssert.h"
54 typedef std::pair<float, storage_idx_t> Node;
64 virtual void set_query(
const float *x) = 0;
82 std::vector<storage_idx_t> ids;
83 std::vector<float> dis;
86 explicit MinimaxHeap(
int n): n(n), k(0), ids(n), dis(n) {}
96 int pop_min(
float *vmin_out =
nullptr);
98 int count_below(
float thresh);
107 bool operator < (
const NodeDistCloser &obj1)
const {
return d < obj1.d; }
114 bool operator < (
const NodeDistFarther &obj1)
const {
return d > obj1.d; }
172 size_t * begin,
size_t * end)
const;
175 explicit HNSW(
int M = 32);
195 std::vector<omp_lock_t>& locks,
202 int level,
int nres_in = 0)
const;
204 std::priority_queue<Node> search_from(
const Node& node,
216 void clear_neighbor_tables(
int level);
217 void print_neighbor_stats(
int level)
const;
219 int prepare_level_tab(
size_t n,
bool preset_levels =
false);
223 std::priority_queue<NodeDistFarther>& input,
224 std::vector<NodeDistFarther>& output,
236 std::vector<uint8_t> visited;
240 : visited(size), visno(1) {}
248 bool get(
int no)
const {
249 return visited[no] == visno;
257 memset(visited.data(), 0,
sizeof(visited[0]) * visited.size());
random generator that can be used in multithreaded contexts
void add_with_locks(DistanceComputer &ptdis, int pt_level, int pt_id, std::vector< omp_lock_t > &locks, VisitedTable &vt)
void neighbor_range(idx_t no, int layer_no, size_t *begin, size_t *end) const
range of entries in the neighbors table of vertex no at layer_no
virtual float operator()(storage_idx_t i)=0
compute distance of vector i to current query
int nb_neighbors(int layer_no) const
nb of neighbors for this level
storage_idx_t entry_point
entry point in the search structure (one of the points with maximum level
int cum_nb_neighbors(int layer_no) const
cumumlative nb up to (and excluding) this level
Index::idx_t idx_t
Faiss results are 64-bit.
virtual float symmetric_dis(storage_idx_t i, storage_idx_t j)=0
compute distance between two stored vectors
std::vector< double > assign_probas
assignment probability to each layer (sum=1)
std::vector< int > cum_nneighbor_per_level
void advance()
reset all flags to false
void add_links_starting_from(DistanceComputer &ptdis, storage_idx_t pt_id, storage_idx_t nearest, float d_nearest, int level, omp_lock_t *locks, VisitedTable &vt)
std::vector< size_t > offsets
set implementation optimized for fast access.
int efSearch
expansion factor at search time
long idx_t
all indices are this type
virtual void set_query(const float *x)=0
called before computing distances
HNSW(int M=32)
only mandatory parameter: nb of neighbors
to sort pairs of (id, distance) from nearest to fathest or the reverse
int upper_beam
number of entry points in levels > 0.
void set_nb_neighbors(int level_no, int n)
set nb of neighbors for this level (before adding anything)
int search_from_candidates(DistanceComputer &qdis, int k, idx_t *I, float *D, MinimaxHeap &candidates, VisitedTable &vt, int level, int nres_in=0) const
int random_level()
pick a random level for a new point
void set_default_probas(int M, float levelMult)
void search(DistanceComputer &qdis, int k, idx_t *I, float *D, VisitedTable &vt) const
search interface
void fill_with_random_links(size_t n)
add n random levels to table (for debugging...)
std::vector< storage_idx_t > neighbors
int efConstruction
expansion factor at construction time
int storage_idx_t
internal storage of vectors (32 bits: this is expensive)
void set(int no)
set flog #no to true
std::vector< int > levels
level of each vector (base level = 1), size = ntotal
int max_level
maximum level
static void shrink_neighbor_list(DistanceComputer &qdis, std::priority_queue< NodeDistFarther > &input, std::vector< NodeDistFarther > &output, int max_size)