Add files via upload

pull/761/head
Felix 2021-06-01 11:20:46 +08:00 committed by GitHub
parent f2887bdb69
commit 240176b617
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 952 additions and 0 deletions
deploy/vector_search/src

View File

@ -0,0 +1,341 @@
#pragma once
#include<memory>
#include<vector>
#include<math.h>
#include"config.h"
#define ZERO_EPS 1e-10
#define _SCALE_WORLD_DENSE_DATA
#ifdef _SCALE_WORLD_DENSE_DATA
//dense data
class Data{
private:
std::unique_ptr<value_t[]> data;
size_t num;
size_t curr_num = 0;
int dim;
public:
value_t mobius_pow = 2;
value_t max_ip_norm = 1;
value_t max_ip_norm2 = 1;
Data(size_t num, int dim) : num(num),dim(dim){
data = std::unique_ptr<value_t[]>(new value_t[num * dim]);
memset(data.get(),0,sizeof(value_t) * num * dim);
}
value_t* get(idx_t idx) const{
return data.get() + idx * dim;
}
template<class T>
dist_t ipwrap_l2_query_distance(idx_t a,T& v) const{
auto pa = get(a);
dist_t ret = 0;
dist_t normu = 0;
for(int i = 0;i < dim;++i){
auto diff = (*(pa + i) / max_ip_norm) - v[i];
ret += diff * diff;
normu += (*(pa + i)) * (*(pa + i));
}
ret += 1 - normu / max_ip_norm2;
return ret;
}
template<class T>
dist_t ipwrap_l2_build_distance(idx_t a,T& v) const{
auto pa = get(a);
dist_t ret = 0;
dist_t normu = 0;
dist_t normv = 0;
for(int i = 0;i < dim;++i){
auto diff = *(pa + i) - v[i];
ret += diff * diff;
normu += (*(pa + i)) * (*(pa + i));
normv += v[i] * v[i];
}
dist_t wrap_termu = sqrt(1 - normu / max_ip_norm2);
dist_t wrap_termv = sqrt(1 - normv / max_ip_norm2);
dist_t diff_wrap = wrap_termu - wrap_termv;
ret = ret / max_ip_norm2 + diff_wrap * diff_wrap;
return ret;
}
template<class T>
dist_t l2_distance(idx_t a,T& v) const{
auto pa = get(a);
dist_t ret = 0;
for(int i = 0;i < dim;++i){
auto diff = *(pa + i) - v[i];
ret += diff * diff;
}
return ret;
}
template<class T>
dist_t negative_inner_prod_distance(idx_t a,T& v) const{
auto pa = get(a);
dist_t ret = 0;
for(int i = 0;i < dim;++i){
ret -= (*(pa + i)) * v[i];
}
return ret;
}
template<class T>
dist_t negative_cosine_distance(idx_t a,T& v) const{
auto pa = get(a);
dist_t ret = 0;
value_t lena = 0,lenv = 0;
for(int i = 0;i < dim;++i){
ret += (*(pa + i)) * v[i];
lena += (*(pa + i)) * (*(pa + i));
lenv += v[i] * v[i];
}
int sign = ret < 0 ? 1 : -1;
// return sign * (ret * ret / lena);// / lenv);
return sign * (ret * ret / lena / lenv);
}
template<class T>
dist_t mobius_l2_distance(idx_t a,T& v) const{
auto pa = get(a);
dist_t ret = 0;
value_t lena = 0,lenv = 0;
for(int i = 0;i < dim;++i){
lena += (*(pa + i)) * (*(pa + i));
lenv += v[i] * v[i];
}
value_t modifier_a = pow(lena,0.5 * mobius_pow);
value_t modifier_v = pow(lenv,0.5 * mobius_pow);
if(fabs(modifier_a) < ZERO_EPS)
modifier_a = 1;
if(fabs(modifier_v) < ZERO_EPS)
modifier_v = 1;
for(int i = 0;i < dim;++i){
value_t tmp = (*(pa + i)) / modifier_a - v[i] / modifier_v;
ret += tmp * tmp;
}
return ret;
}
template<class T>
dist_t real_nn(T& v) const{
dist_t minn = 1e100;
for(size_t i = 0;i < curr_num;++i){
auto res = l2_distance(i,v);
if(res < minn){
minn = res;
}
}
return minn;
}
std::vector<value_t> organize_point_mobius(const std::vector<std::pair<int,value_t>>& v){
std::vector<value_t> ret(dim,0);
value_t lena = 0;
for(const auto& p : v){
// ret[p.first] = p.second;
lena += p.second * p.second;
}
value_t modifier_a = pow(lena,0.5 * mobius_pow);
if(fabs(modifier_a) < ZERO_EPS)
modifier_a = 1;
for(const auto& p : v){
ret[p.first] = p.second / modifier_a;
}
return std::move(ret);
}
std::vector<value_t> organize_point(const std::vector<std::pair<int,value_t>>& v){
std::vector<value_t> ret(dim,0);
for(const auto& p : v){
if(p.first >= dim)
printf("error %d %d\n",p.first,dim);
ret[p.first] = p.second;
}
return std::move(ret);
}
value_t vec_sum2(const std::vector<std::pair<int,value_t>>& v){
value_t ret = 0;
for(const auto& p : v){
if(p.first >= dim)
printf("error %d %d\n",p.first,dim);
ret += p.second * p.second;
}
return std::move(ret);
}
void add(idx_t idx, std::vector<std::pair<int,value_t>>& value){
//printf("adding %zu\n",idx);
//for(auto p : value)
// printf("%zu %d %f\n",idx,p.first,p.second);
curr_num = std::max(curr_num,idx);
auto p = get(idx);
for(const auto& v : value)
*(p + v.first) = v.second;
}
void add_mobius(idx_t idx, std::vector<std::pair<int,value_t>>& value){
//printf("adding %zu\n",idx);
//for(auto p : value)
// printf("%zu %d %f\n",idx,p.first,p.second);
curr_num = std::max(curr_num,idx);
auto p = get(idx);
value_t lena = 0;
for(const auto& v : value){
*(p + v.first) = v.second;
lena += v.second * v.second;
}
value_t modifier_a = pow(lena,0.5 * mobius_pow);
if(fabs(modifier_a) < ZERO_EPS)
modifier_a = 1;
for(const auto& v : value){
*(p + v.first) = v.second / modifier_a;
}
}
inline size_t max_vertices(){
return num;
}
inline size_t curr_vertices(){
return curr_num;
}
void print(){
for(int i = 0;i < num && i < 10;++i)
printf("%f ",*(data.get() + i));
printf("\n");
}
int get_dim(){
return dim;
}
void dump(std::string path = "bfsg.data"){
FILE* fp = fopen(path.c_str(),"wb");
fwrite(data.get(),sizeof(value_t) * num * dim,1,fp);
fclose(fp);
}
void load(std::string path = "bfsg.data"){
curr_num = num;
FILE* fp = fopen(path.c_str(),"rb");
auto cnt = fread(data.get(),sizeof(value_t) * num * dim,1,fp);
fclose(fp);
}
};
template<>
dist_t Data::ipwrap_l2_build_distance(idx_t a,idx_t& b) const{
auto pa = get(a);
auto pb = get(b);
dist_t ret = 0;
dist_t normu = 0;
dist_t normv = 0;
for(int i = 0;i < dim;++i){
auto diff = *(pa + i) - *(pb + i);
ret += diff * diff;
normu += (*(pa + i)) * (*(pa + i));
normv += (*(pb + i)) * (*(pb + i));
}
dist_t wrap_termu = sqrt(1 - normu / max_ip_norm2);
dist_t wrap_termv = sqrt(1 - normv / max_ip_norm2);
dist_t diff_wrap = wrap_termu - wrap_termv;
ret = ret / max_ip_norm2 + diff_wrap * diff_wrap;
return ret;
}
template<>
dist_t Data::ipwrap_l2_query_distance(idx_t a,idx_t& b) const{
auto pa = get(a);
auto pb = get(b);
dist_t ret = 0;
dist_t normu = 0;
for(int i = 0;i < dim;++i){
auto diff = (*(pa + i) / max_ip_norm) - *(pb + i);
ret += diff * diff;
normu += (*(pa + i)) * (*(pa + i));
}
ret += 1 - normu / max_ip_norm2;
return ret;
}
template<>
dist_t Data::l2_distance(idx_t a,idx_t& b) const{
auto pa = get(a),
pb = get(b);
dist_t ret = 0;
for(int i = 0;i < dim;++i){
auto diff = *(pa + i) - *(pb + i);
ret += diff * diff;
}
return ret;
}
template<>
dist_t Data::negative_inner_prod_distance(idx_t a,idx_t& b) const{
auto pa = get(a),
pb = get(b);
dist_t ret = 0;
for(int i = 0;i < dim;++i){
ret -= (*(pa + i)) * (*(pb + i));
}
return ret;
}
template<>
dist_t Data::negative_cosine_distance(idx_t a,idx_t& b) const{
auto pa = get(a),
pb = get(b);
dist_t ret = 0;
value_t lena = 0,lenv = 0;
for(int i = 0;i < dim;++i){
ret += (*(pa + i)) * (*(pb + i));
lena += (*(pa + i)) * (*(pa + i));
lenv += (*(pb + i)) * (*(pb + i));
}
int sign = ret < 0 ? 1 : -1;
// return sign * (ret * ret / lena);
return sign * (ret * ret / lena / lenv);
}
template<>
dist_t Data::mobius_l2_distance(idx_t a,idx_t& b) const{
auto pa = get(a),
pb = get(b);
dist_t ret = 0;
value_t lena = 0,lenv = 0;
for(int i = 0;i < dim;++i){
lena += (*(pa + i)) * (*(pa + i));
lenv += (*(pb + i)) * (*(pb + i));
}
value_t modifier_a = pow(lena,0.5 * mobius_pow);
value_t modifier_v = pow(lenv,0.5 * mobius_pow);
if(fabs(modifier_a) < ZERO_EPS)
modifier_a = 1;
if(fabs(modifier_v) < ZERO_EPS)
modifier_v = 1;
for(int i = 0;i < dim;++i){
value_t tmp = (*(pa + i)) / modifier_a - (*(pb + i)) / modifier_v;
ret += tmp * tmp;
}
return ret;
}
#else
//sparse data
class Data{
public:
//TODO
};
#endif

View File

@ -0,0 +1,611 @@
#pragma once
#include<vector>
#include<algorithm>
#include<queue>
#include<stdlib.h>
#include<random>
#include<unordered_set>
#include<mutex>
#include<time.h>
#include"config.h"
#include"data.h"
#ifdef OMP
#include<omp.h>
#endif
typedef unsigned int vl_type;
class VisitedList {
public:
vl_type curV;
vl_type *mass;
unsigned int numelements;
VisitedList(int numelements1) {
curV = 1;
numelements = numelements1;
mass = new vl_type[numelements];
memset(mass, 0, sizeof(vl_type) * numelements);
}
void reset() {
++curV;
if (curV == 0) {
curV = 1;
memset(mass, 0, sizeof(vl_type) * numelements);
}
};
~VisitedList() { delete mass; }
};
struct GraphMeasures{
int distance_cnt = 0;
};
class GraphWrapper{
public:
virtual void add_vertex(idx_t vertex_id,std::vector<std::pair<int,value_t>>& point) = 0;
virtual void add_vertex_lock(idx_t vertex_id,std::vector<std::pair<int,value_t>>& point) = 0;
virtual void search_top_k(const std::vector<std::pair<int,value_t>>& query,int k,std::vector<idx_t>& result) = 0;
virtual void search_top_k_with_score(const std::vector<std::pair<int,value_t>>& query,int k,std::vector<idx_t>& result,std::vector<double>& score){}
virtual void dump(std::string path = "bfsg.graph") = 0;
virtual void load(std::string path = "bfsg.graph") = 0;
virtual ~GraphWrapper(){}
virtual void set_construct_pq_size(int size){};
GraphMeasures measures;
};
template<const int dist_type>
class FixedDegreeGraph : public GraphWrapper{
private:
const int degree = SEARCH_DEGREE;
const int flexible_degree = FIXED_DEGREE;
const int vertex_offset_shift = FIXED_DEGREE_SHIFT;
std::vector<idx_t> edges;
std::vector<dist_t> edge_dist;
Data* data;
std::mt19937_64 rand_gen = std::mt19937_64(1234567);//std::random_device{}());
std::vector<std::mutex> edge_mutex;//do not push back on this vector, it will destroy the mutex
bool debug = false;
VisitedList* p_visited = NULL;
#ifdef OMP
std::vector<VisitedList*> visited_pool;
#endif
int construct_pq_size = CONSTRUCT_SEARCH_BUDGET;
void rank_and_switch_ordered(idx_t v_id,idx_t u_id){
//We assume the neighbors of v_ids in edges[offset] are sorted
//by the distance to v_id ascendingly when it is full
//NOTICE: before it is full, it is unsorted
auto curr_dist = pair_distance(v_id,u_id);
auto offset = ((size_t)v_id) << vertex_offset_shift;
int degree = edges[offset];
std::vector<idx_t> neighbor;
neighbor.reserve(degree + 1);
for(int i = 0;i < degree;++i)
neighbor.push_back(edges[offset + i + 1]);
neighbor.push_back(u_id);
neighbor = edge_selection_filter_neighbor(neighbor,v_id,flexible_degree);
edges[offset] = neighbor.size();
for(int i = 0;i < neighbor.size();++i)
edges[offset + i + 1] = neighbor[i];
return;
//We assert edges[offset] > 0 here
if(curr_dist >= edge_dist[offset + edges[offset]]){
return;
}
edges[offset + edges[offset]] = u_id;
edge_dist[offset + edges[offset]] = curr_dist;
for(size_t i = offset + edges[offset] - 1;i > offset;--i){
if(edge_dist[i] > edge_dist[i + 1]){
std::swap(edges[i],edges[i + 1]);
std::swap(edge_dist[i],edge_dist[i + 1]);
}else{
break;
}
}
}
void rank_and_switch(idx_t v_id,idx_t u_id){
rank_and_switch_ordered(v_id,u_id);
//TODO:
//Implement an unordered version to compare with
}
template<class T>
dist_t distance(idx_t a,T& b){
if(dist_type == 0)
return data->l2_distance(a,b);
else if(dist_type == 1)
return data->negative_inner_prod_distance(a,b);
else if(dist_type == 2)
return data->negative_cosine_distance(a,b);
else if(dist_type == 3)
return data->l2_distance(a,b);
else if(dist_type == 4)
return data->ipwrap_l2_build_distance(a,b);
else if(dist_type == 5)
return data->ipwrap_l2_query_distance(a,b);
else{
// should not happen
fprintf(stderr,"unsupported dist_type %d\n",dist_type);
return 0;
}
}
void compute_distance_naive(size_t offset,std::vector<dist_t>& dists){
dists.resize(edges[offset]);
auto degree = edges[offset];
for(int i = 0;i < degree;++i){
dists[i] = distance(offset >> vertex_offset_shift,edges[offset + i + 1]);
}
}
void compute_distance(size_t offset,std::vector<dist_t>& dists){
compute_distance_naive(offset,dists);
}
template<class T>
dist_t pair_distance_naive(idx_t a,T& b){
++measures.distance_cnt;
return distance(a,b);
}
template<class T>
dist_t pair_distance(idx_t a,T& b){
return pair_distance_naive(a,b);
}
void qsort(size_t l,size_t r){
auto mid = (l + r) >> 1;
int i = l,j = r;
auto k = edge_dist[mid];
do{
while(edge_dist[i] < k) ++i;
while(k < edge_dist[j]) --j;
if(i <= j){
std::swap(edge_dist[i],edge_dist[j]);
std::swap(edges[i],edges[j]);
++i;
--j;
}
}while(i <= j);
if(i < r)qsort(i,r);
if(l < j)qsort(l,j);
}
void rank_edges(size_t offset){
std::vector<dist_t> dists;
compute_distance(offset,dists);
for(int i = 0;i < dists.size();++i)
edge_dist[offset + i + 1] = dists[i];
qsort(offset + 1,offset + dists.size());
//TODO:
//use a heap in the edge_dist
}
void add_edge_lock(idx_t v_id,idx_t u_id){
edge_mutex[v_id].lock();
auto offset = ((size_t)v_id) << vertex_offset_shift;
if(edges[offset] < flexible_degree){
++edges[offset];
edges[offset + edges[offset]] = u_id;
}else{
rank_and_switch(v_id,u_id);
}
edge_mutex[v_id].unlock();
}
void add_edge(idx_t v_id,idx_t u_id){
auto offset = ((size_t)v_id) << vertex_offset_shift;
if(edges[offset] < flexible_degree){
++edges[offset];
edges[offset + edges[offset]] = u_id;
}else{
rank_and_switch(v_id,u_id);
}
}
public:
long long total_explore_cnt = 0;
int total_explore_times = 0;
size_t search_start_point = 0;
bool ignore_startpoint = false;
FixedDegreeGraph(Data* data) : data(data){
auto num_vertices = data->max_vertices();
edges = std::vector<idx_t>(((size_t)num_vertices) << vertex_offset_shift);
edge_dist = std::vector<dist_t>(((size_t)num_vertices) << vertex_offset_shift);
edge_mutex = std::vector<std::mutex>(num_vertices);
p_visited = new VisitedList(num_vertices + 5);
#ifdef OMP
int n_threads = 1;
#pragma omp parallel
#pragma omp master
{
n_threads = omp_get_num_threads();
}
visited_pool.resize(n_threads);
for(int i = 0;i < n_threads;++i)
visited_pool[i] = new VisitedList(num_vertices + 5);
#endif
}
void set_construct_pq_size(int size){
construct_pq_size = size;
}
std::vector<idx_t> edge_selection_filter_neighbor(std::vector<idx_t>& neighbor,idx_t vertex_id,int desired_size){
std::vector<idx_t> filtered_neighbor;
std::vector<dist_t> dists(neighbor.size());
for(int i = 0;i < dists.size();++i)
dists[i] = pair_distance(vertex_id,neighbor[i]);
std::vector<int> idx(neighbor.size());
for(int i = 0;i < idx.size();++i)
idx[i] = i;
std::sort(idx.begin(),idx.end(),[&](int a,int b){return dists[a] < dists[b];});
for(int i = 0;i < idx.size();++i){
dist_t cur_dist = dists[idx[i]];
bool pass = true;
for(auto neighbor_id : filtered_neighbor){
if(cur_dist > pair_distance(neighbor_id,neighbor[idx[i]])){
pass = false;
break;
}
}
if(pass){
filtered_neighbor.push_back(neighbor[idx[i]]);
if(filtered_neighbor.size() >= desired_size)
break;
}else{
}
}
return std::move(filtered_neighbor);
}
void add_vertex_lock(idx_t vertex_id,std::vector<std::pair<int,value_t>>& point){
std::vector<idx_t> neighbor;
search_top_k_lock(point,construct_pq_size,neighbor);
auto offset = ((size_t)vertex_id) << vertex_offset_shift;
int num_neighbors = degree < neighbor.size() ? degree : neighbor.size();
edge_mutex[vertex_id].lock();
// TODO:
// it is possible to save this space --- edges[offset]
// by set the last number in the range as
// a large number - current degree
if(neighbor.size() >= degree)
neighbor = edge_selection_filter_neighbor(neighbor,vertex_id,degree);
edges[offset] = neighbor.size();
for(int i = 0;i < neighbor.size() && i < degree;++i){
edges[offset + i + 1] = neighbor[i];
}
edge_mutex[vertex_id].unlock();
for(int i = 0;i < neighbor.size() && i < degree;++i){
add_edge_lock(neighbor[i],vertex_id);
}
}
void add_vertex(idx_t vertex_id,std::vector<std::pair<int,value_t>>& point){
std::vector<idx_t> neighbor;
search_top_k(point,construct_pq_size,neighbor);
auto offset = ((size_t)vertex_id) << vertex_offset_shift;
int num_neighbors = degree < neighbor.size() ? degree : neighbor.size();
// TODO:
// it is possible to save this space --- edges[offset]
// by set the last number in the range as
// a large number - current degree
if(neighbor.size() >= degree){
neighbor = edge_selection_filter_neighbor(neighbor,vertex_id,degree);
}
edges[offset] = neighbor.size();
for(int i = 0;i < neighbor.size() && i < degree;++i){
edges[offset + i + 1] = neighbor[i];
}
for(int i = 0;i < neighbor.size() && i < degree;++i){
add_edge(neighbor[i],vertex_id);
}
}
void astar_multi_start_search_lock(const std::vector<std::pair<int,value_t>>& query,int k,std::vector<idx_t>& result){
std::priority_queue<std::pair<dist_t,idx_t>,std::vector<std::pair<dist_t,idx_t>>,std::greater<std::pair<dist_t,idx_t>>> q;
const int num_start_point = 1;
auto converted_query = dist_type == 3 ? data->organize_point_mobius(query) : data->organize_point(query);
#ifdef OMP
int tid = omp_get_thread_num();
auto& p_visited = visited_pool[tid];
#endif
p_visited->reset();
auto tag = p_visited->curV;
for(int i = 0;i < num_start_point && i < data->curr_vertices();++i){
auto start = search_start_point;//rand_gen() % data->curr_vertices();
if(p_visited->mass[start] == tag)
continue;
p_visited->mass[start] = tag;
q.push(std::make_pair(pair_distance_naive(start,converted_query),start));
}
std::priority_queue<std::pair<dist_t,idx_t>> topk;
const int max_step = 1000000;
bool found_min_node = false;
dist_t min_dist = 1e100;
int explore_cnt = 0;
for(int iter = 0;iter < max_step && !q.empty();++iter){
auto now = q.top();
if(topk.size() == k && topk.top().first < now.first){
break;
}
++explore_cnt;
min_dist = std::min(min_dist,now.first);
q.pop();
if(ignore_startpoint == false || iter != 0)
topk.push(now);
if(topk.size() > k)
topk.pop();
edge_mutex[now.second].lock();
auto offset = ((size_t)now.second) << vertex_offset_shift;
auto degree = edges[offset];
for(int i = 0;i < degree;++i){
auto start = edges[offset + i + 1];
if(p_visited->mass[start] == tag)
continue;
p_visited->mass[start] = tag;
auto dist = pair_distance_naive(start,converted_query);
if(topk.empty() || dist < topk.top().first || topk.size() < k)
q.push(std::make_pair(dist,start));
}
edge_mutex[now.second].unlock();
}
total_explore_cnt += explore_cnt;
++total_explore_times;
result.resize(topk.size());
int i = result.size() - 1;
while(!topk.empty()){
result[i] = (topk.top().second);
topk.pop();
--i;
}
}
void astar_no_heap_search(const std::vector<std::pair<int,value_t>>& query,std::vector<idx_t>& result){
const int num_start_point = 1;
std::pair<dist_t,idx_t> q_top = std::make_pair(10000000000,0);
auto converted_query = dist_type == 3 ? data->organize_point_mobius(query) : data->organize_point(query);
p_visited->reset();
auto tag = p_visited->curV;
for(int i = 0;i < num_start_point && i < data->curr_vertices();++i){
auto start = search_start_point;//rand_gen() % data->curr_vertices();
p_visited->mass[start] = tag;
if(ignore_startpoint == false){
q_top = (std::make_pair(pair_distance_naive(start,converted_query),start));
}else{
auto offset = ((size_t)start) << vertex_offset_shift;
auto degree = edges[offset];
for(int i = 1;i <= degree;++i){
p_visited->mass[edges[offset + i]] = tag;
auto dis = pair_distance_naive(edges[offset + i],converted_query);
if(dis < q_top.first)
q_top = (std::make_pair(dis,start));
}
}
}
const int max_step = 1000000;
bool found_min_node = false;
dist_t min_dist = 1e100;
int explore_cnt = 0;
for(int iter = 0;iter < max_step;++iter){
++explore_cnt;
auto offset = ((size_t)q_top.second) << vertex_offset_shift;
auto degree = edges[offset];
bool changed = false;
for(int i = 0;i < degree;++i){
auto start = edges[offset + i + 1];
if(p_visited->mass[start] == tag)
continue;
p_visited->mass[start] = tag;
auto dist = pair_distance_naive(start,converted_query);
if(dist < q_top.first){
q_top = (std::make_pair(dist,start));
changed = true;
}
}
if(changed == false)
break;
}
total_explore_cnt += explore_cnt;
++total_explore_times;
result.resize(1);
result[0] = q_top.second;
}
void astar_multi_start_search_with_score(const std::vector<std::pair<int,value_t>>& query,int k,std::vector<idx_t>& result,std::vector<double>& score){
std::priority_queue<std::pair<dist_t,idx_t>,std::vector<std::pair<dist_t,idx_t>>,std::greater<std::pair<dist_t,idx_t>>> q;
const int num_start_point = 1;
auto converted_query = dist_type == 3 ? data->organize_point_mobius(query) : data->organize_point(query);
p_visited->reset();
auto tag = p_visited->curV;
for(int i = 0;i < num_start_point && i < data->curr_vertices();++i){
auto start = search_start_point;//rand_gen() % data->curr_vertices();
if(p_visited->mass[start] == tag)
continue;
p_visited->mass[start] = tag;
q.push(std::make_pair(pair_distance_naive(start,converted_query),start));
}
std::priority_queue<std::pair<dist_t,idx_t>> topk;
const int max_step = 1000000;
bool found_min_node = false;
dist_t min_dist = 1e100;
int explore_cnt = 0;
for(int iter = 0;iter < max_step && !q.empty();++iter){
auto now = q.top();
if(topk.size() == k && topk.top().first < now.first){
break;
}
++explore_cnt;
min_dist = std::min(min_dist,now.first);
q.pop();
if(ignore_startpoint == false || iter != 0)
topk.push(now);
if(topk.size() > k)
topk.pop();
auto offset = ((size_t)now.second) << vertex_offset_shift;
auto degree = edges[offset];
for(int i = 0;i < degree;++i){
auto start = edges[offset + i + 1];
if(p_visited->mass[start] == tag)
continue;
p_visited->mass[start] = tag;
auto dist = pair_distance_naive(start,converted_query);
if(topk.empty() || dist < topk.top().first || topk.size() < k)
q.push(std::make_pair(dist,start));
}
}
total_explore_cnt += explore_cnt;
++total_explore_times;
result.resize(topk.size());
score.resize(topk.size());
int i = result.size() - 1;
while(!topk.empty()){
result[i] = (topk.top().second);
score[i] = -(topk.top().first);
topk.pop();
--i;
}
}
void astar_multi_start_search(const std::vector<std::pair<int,value_t>>& query,int k,std::vector<idx_t>& result){
std::priority_queue<std::pair<dist_t,idx_t>,std::vector<std::pair<dist_t,idx_t>>,std::greater<std::pair<dist_t,idx_t>>> q;
const int num_start_point = 1;
auto converted_query = dist_type == 3 ? data->organize_point_mobius(query) : data->organize_point(query);
p_visited->reset();
auto tag = p_visited->curV;
for(int i = 0;i < num_start_point && i < data->curr_vertices();++i){
auto start = search_start_point;//rand_gen() % data->curr_vertices();
if(p_visited->mass[start] == tag)
continue;
p_visited->mass[start] = tag;
q.push(std::make_pair(pair_distance_naive(start,converted_query),start));
}
std::priority_queue<std::pair<dist_t,idx_t>> topk;
const int max_step = 1000000;
bool found_min_node = false;
dist_t min_dist = 1e100;
int explore_cnt = 0;
for(int iter = 0;iter < max_step && !q.empty();++iter){
auto now = q.top();
if(topk.size() == k && topk.top().first < now.first){
break;
}
++explore_cnt;
min_dist = std::min(min_dist,now.first);
q.pop();
if(ignore_startpoint == false || iter != 0)
topk.push(now);
if(topk.size() > k)
topk.pop();
auto offset = ((size_t)now.second) << vertex_offset_shift;
auto degree = edges[offset];
for(int i = 0;i < degree;++i){
auto start = edges[offset + i + 1];
if(p_visited->mass[start] == tag)
continue;
p_visited->mass[start] = tag;
auto dist = pair_distance_naive(start,converted_query);
if(topk.empty() || dist < topk.top().first || topk.size() < k)
q.push(std::make_pair(dist,start));
}
}
total_explore_cnt += explore_cnt;
++total_explore_times;
result.resize(topk.size());
int i = result.size() - 1;
while(!topk.empty()){
result[i] = (topk.top().second);
topk.pop();
--i;
}
}
void search_top_k(const std::vector<std::pair<int,value_t>>& query,int k,std::vector<idx_t>& result){
if(k == 1)
astar_no_heap_search(query,result);
else
astar_multi_start_search(query,k,result);
}
void search_top_k_with_score(const std::vector<std::pair<int,value_t>>& query,int k,std::vector<idx_t>& result,std::vector<double>& score){
astar_multi_start_search_with_score(query,k,result,score);
}
void search_top_k_lock(const std::vector<std::pair<int,value_t>>& query,int k,std::vector<idx_t>& result){
astar_multi_start_search_lock(query,k,result);
}
void print_stat(){
auto n = data->max_vertices();
size_t sum = 0;
std::vector<size_t> histogram(2 * degree + 1,0);
for(size_t i = 0;i < n;++i){
sum += edges[i << vertex_offset_shift];
int tmp = edges[i << vertex_offset_shift];
if(tmp > 2 * degree + 1)
fprintf(stderr,"[ERROR] node %zu has %d degree\n",i,tmp);
++histogram[edges[i << vertex_offset_shift]];
if(tmp != degree)
fprintf(stderr,"[INFO] %zu has degree %d\n",i,tmp);
}
fprintf(stderr,"[INFO] #vertices %zu, avg degree %f\n",n,sum * 1.0 / n);
std::unordered_set<idx_t> visited;
fprintf(stderr,"[INFO] degree histogram:\n");
for(int i = 0;i <= 2 * degree + 1;++i)
fprintf(stderr,"[INFO] %d:\t%zu\n",i,histogram[i]);
}
void print_edges(int x){
for(size_t i = 0;i < x;++i){
size_t offset = i << vertex_offset_shift;
int degree = edges[offset];
fprintf(stderr,"%d (%d): ",i,degree);
for(int j = 1;j <= degree;++j)
fprintf(stderr,"(%zu,%f) ",edges[offset + j],edge_dist[offset + j]);
fprintf(stderr,"\n");
}
}
void dump(std::string path = "bfsg.graph"){
FILE* fp = fopen(path.c_str(),"wb");
size_t num_vertices = data->max_vertices();
fwrite(&edges[0],sizeof(edges[0]) * (num_vertices << vertex_offset_shift),1,fp);
fclose(fp);
}
void load(std::string path = "bfsg.graph"){
FILE* fp = fopen(path.c_str(),"rb");
size_t num_vertices = data->max_vertices();
auto cnt = fread(&edges[0],sizeof(edges[0]) * (num_vertices << vertex_offset_shift),1,fp);
fclose(fp);
}
Data* get_data(){
return data;
}
};