11 #include "IndexBinaryHNSW.h"
22 #include <unordered_set>
25 #include <sys/types.h>
32 #include "FaissAssert.h"
33 #include "IndexBinaryFlat.h"
47 void hnsw_add_vertices(IndexBinaryHNSW& index_hnsw,
49 size_t n,
const uint8_t *x,
51 bool preset_levels =
false) {
52 HNSW& hnsw = index_hnsw.hnsw;
53 size_t ntotal = n0 + n;
56 printf(
"hnsw_add_vertices: adding %ld elements on top of %ld "
57 "(preset_levels=%d)\n",
58 n, n0,
int(preset_levels));
61 int max_level = hnsw.prepare_level_tab(n, preset_levels);
64 printf(
" max_level = %d\n", max_level);
67 std::vector<omp_lock_t> locks(ntotal);
68 for(
int i = 0; i < ntotal; i++) {
69 omp_init_lock(&locks[i]);
73 std::vector<int> hist;
74 std::vector<int> order(n);
79 for (
int i = 0; i < n; i++) {
81 int pt_level = hnsw.levels[pt_id] - 1;
82 while (pt_level >= hist.size()) {
89 std::vector<int> offsets(hist.size() + 1, 0);
90 for (
int i = 0; i < hist.size() - 1; i++) {
91 offsets[i + 1] = offsets[i] + hist[i];
95 for (
int i = 0; i < n; i++) {
97 int pt_level = hnsw.levels[pt_id] - 1;
98 order[offsets[pt_level]++] = pt_id;
103 RandomGenerator rng2(789);
107 for (
int pt_level = hist.size() - 1; pt_level >= 0; pt_level--) {
108 int i0 = i1 - hist[pt_level];
111 printf(
"Adding %d elements at level %d\n",
116 for (
int j = i0; j < i1; j++) {
117 std::swap(order[j], order[j + rng2.rand_int(i1 - j)]);
122 VisitedTable vt (ntotal);
124 std::unique_ptr<HNSW::DistanceComputer> dis(
125 index_hnsw.get_distance_computer()
127 int prev_display = verbose && omp_get_thread_num() == 0 ? 0 : -1;
129 #pragma omp for schedule(dynamic)
130 for (
int i = i0; i < i1; i++) {
132 dis->set_query((
float *)(x + (pt_id - n0) * index_hnsw.code_size));
134 hnsw.add_with_locks(*dis, pt_level, pt_id, locks, vt);
136 if (prev_display >= 0 && i - i0 > prev_display + 10000) {
137 prev_display = i - i0;
138 printf(
" %d / %d\r", i - i0, i1 - i0);
145 FAISS_ASSERT(i1 == 0);
151 for(
int i = 0; i < ntotal; i++)
152 omp_destroy_lock(&locks[i]);
163 IndexBinaryHNSW::IndexBinaryHNSW()
168 IndexBinaryHNSW::IndexBinaryHNSW(
int d,
int M)
172 storage(new IndexBinaryFlat(d))
177 IndexBinaryHNSW::IndexBinaryHNSW(IndexBinary *storage,
int M)
178 : IndexBinary(storage->d),
186 IndexBinaryHNSW::~IndexBinaryHNSW() {
195 storage->
train(n, x);
200 int32_t *distances,
idx_t *labels)
const
205 std::unique_ptr<HNSW::DistanceComputer> dis(get_distance_computer());
208 for(
idx_t i = 0; i < n; i++) {
209 idx_t *idxi = labels + i * k;
210 float *simi = (
float *)(distances + i * k);
212 dis->set_query((
float *)(x + i *
code_size));
214 maxheap_heapify(k, simi, idxi);
215 hnsw.search(*dis, k, idxi, simi, vt);
216 maxheap_reorder(k, simi, idxi);
220 #pragma omp parallel for
221 for (
int i = 0; i < n * k; ++i) {
222 distances[i] = std::round(((
float *)distances)[i]);
234 hnsw_add_vertices(*
this, n0, n, x, verbose,
235 hnsw.levels.size() ==
ntotal);
254 template<
class HammingComputer>
263 return hc.hamming(b + i * code_size);
267 return HammingComputerDefault(b + j * code_size, code_size)
268 .hamming(b + i * code_size);
272 explicit FlatHammingDis(
const IndexBinaryFlat& storage)
273 : code_size(storage.code_size),
274 b(storage.xb.data()),
280 void set_query(
const float *x)
override {
281 hc.set((uint8_t *)x, code_size);
284 virtual ~FlatHammingDis() {
287 hnsw_stats.ndis += ndis;
296 HNSW::DistanceComputer *IndexBinaryHNSW::get_distance_computer()
const {
297 IndexBinaryFlat *flat_storage =
dynamic_cast<IndexBinaryFlat *
>(storage);
299 FAISS_ASSERT(flat_storage !=
nullptr);
303 return new FlatHammingDis<HammingComputer4>(*flat_storage);
305 return new FlatHammingDis<HammingComputer8>(*flat_storage);
307 return new FlatHammingDis<HammingComputer16>(*flat_storage);
309 return new FlatHammingDis<HammingComputer20>(*flat_storage);
311 return new FlatHammingDis<HammingComputer32>(*flat_storage);
313 return new FlatHammingDis<HammingComputer64>(*flat_storage);
316 return new FlatHammingDis<HammingComputerM8>(*flat_storage);
318 return new FlatHammingDis<HammingComputerM4>(*flat_storage);
322 return new FlatHammingDis<HammingComputerDefault>(*flat_storage);
virtual void reset()=0
Removes all elements from the database.
bool is_trained
set if the Index does not require training, or if training is done already
virtual void train(idx_t n, const uint8_t *x)
int code_size
number of bytes per vector ( = d / 8 )
void add(idx_t n, const uint8_t *x) override
void reconstruct(idx_t key, uint8_t *recons) const override
set implementation optimized for fast access.
virtual void reconstruct(idx_t key, uint8_t *recons) const
double getmillisecs()
ms elapsed since some arbitrary epoch
void train(idx_t n, const uint8_t *x) override
Trains the storage if needed.
idx_t ntotal
total nb of indexed vectors
long idx_t
all indices are this type
virtual void add(idx_t n, const uint8_t *x)=0
void search(idx_t n, const uint8_t *x, idx_t k, int32_t *distances, idx_t *labels) const override
entry point for search
int storage_idx_t
internal storage of vectors (32 bits: this is expensive)
void reset() override
Removes all elements from the database.