36 #include "FaissAssert.h"
38 static const size_t BLOCKSIZE_QUERY = 8192;
43 static const uint8_t hamdis_tab_ham_bytes[256] = {
44 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
45 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
46 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
47 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
48 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
49 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
50 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
51 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
52 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
53 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
54 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
55 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
56 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
57 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
58 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
59 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
64 template <
size_t nbits,
typename T>
65 T hamming (
const uint8_t *bs1,
68 const size_t nbytes = nbits / 8;
71 for (i = 0; i < nbytes; i++)
72 h += (T) hamdis_tab_ham_bytes[bs1[i]^bs2[i]];
78 template <
size_t nbits>
79 hamdis_t hamming (
const uint64_t * bs1,
const uint64_t * bs2)
81 const size_t nwords = nbits / 64;
84 for (i = 0; i < nwords; i++)
85 h += popcount64 (bs1[i] ^ bs2[i]);
93 hamdis_t hamming<64> (
const uint64_t * pa,
const uint64_t * pb)
95 return popcount64 (pa[0] ^ pb[0]);
100 hamdis_t hamming<128> (
const uint64_t *pa,
const uint64_t *pb)
102 return popcount64 (pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]);
107 hamdis_t hamming<256> (
const uint64_t * pa,
const uint64_t * pb)
109 return popcount64 (pa[0] ^ pb[0])
110 + popcount64 (pa[1] ^ pb[1])
111 + popcount64 (pa[2] ^ pb[2])
112 + popcount64 (pa[3] ^ pb[3]);
118 const uint64_t * bs1,
119 const uint64_t * bs2,
124 for (i = 0; i < nwords; i++)
125 h += popcount64 (bs1[i] ^ bs2[i]);
131 template <
size_t nbits>
133 const uint64_t * bs1,
134 const uint64_t * bs2,
135 size_t n1,
size_t n2,
140 const size_t nwords = nbits / 64;
141 for (i = 0; i < n1; i++) {
142 const uint64_t * __restrict bs1_ = bs1 + i * nwords;
143 hamdis_t * __restrict dis_ = dis + i * n2;
144 for (j = 0; j < n2; j++)
145 dis_[j] = hamming<nbits>(bs1_, bs2 + j * nwords);
152 const uint64_t * bs1,
153 const uint64_t * bs2,
157 hamdis_t * __restrict dis)
162 for (i = 0; i < n1; i+=nwords) {
163 const uint64_t * bs1_ = bs1+i;
164 for (j = 0; j < n2; j+=nwords)
165 dis[j] = hamming (bs1_, bs2+j, nwords);
173 template <
size_t nbits>
174 void hamming_count_thres (
175 const uint64_t * bs1,
176 const uint64_t * bs2,
182 const size_t nwords = nbits / 64;
183 size_t i, j, posm = 0;
184 const uint64_t * bs2_ = bs2;
186 for (i = 0; i < n1; i++) {
188 for (j = 0; j < n2; j++) {
190 if (hamming <nbits> (bs1, bs2) <= ht)
200 template <
size_t nbits>
201 void crosshamming_count_thres (
202 const uint64_t * dbs,
207 const size_t nwords = nbits / 64;
208 size_t i, j, posm = 0;
209 const uint64_t * bs1 = dbs;
210 for (i = 0; i < n; i++) {
211 const uint64_t * bs2 = bs1 + 2;
212 for (j = i + 1; j < n; j++) {
214 if (hamming <nbits> (bs1, bs2) <= ht)
224 template <
size_t nbits>
225 size_t match_hamming_thres (
226 const uint64_t * bs1,
227 const uint64_t * bs2,
234 const size_t nwords = nbits / 64;
235 size_t i, j, posm = 0;
237 const uint64_t * bs2_ = bs2;
238 for (i = 0; i < n1; i++) {
240 for (j = 0; j < n2; j++) {
242 h = hamming <nbits> (bs1, bs2);
262 template <
class HammingComputer>
264 void hammings_knn_hc (
266 int_maxheap_array_t * ha,
271 bool init_heap =
true)
276 if (init_heap) ha->heapify ();
280 #pragma omp parallel for
281 for (
size_t i = 0; i < ha->nh; i++) {
282 HammingComputer hc (bs1 + i * bytes_per_code, bytes_per_code);
284 const uint8_t * bs2_ = bs2;
286 hamdis_t * __restrict bh_val_ = ha->val + i * k;
287 long * __restrict bh_ids_ = ha->ids + i * k;
289 for (j = 0; j < n2; j++, bs2_+= bytes_per_code) {
290 dis = hc.hamming (bs2_);
291 if (dis < bh_val_[0]) {
292 faiss::maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
293 faiss::maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, dis, j);
297 if (order) ha->reorder ();
304 void hammings_knn_1 (
305 int_maxheap_array_t * ha,
306 const uint64_t * bs1,
307 const uint64_t * bs2,
310 bool init_heap =
true)
312 const size_t nwords = 1;
320 #pragma omp parallel for
321 for (
size_t i = 0; i < ha->nh; i++) {
322 const uint64_t bs1_ = bs1 [i];
323 const uint64_t * bs2_ = bs2;
325 hamdis_t * bh_val_ = ha->val + i * k;
326 hamdis_t bh_val_0 = bh_val_[0];
327 long * bh_ids_ = ha->ids + i * k;
329 for (j = 0; j < n2; j++, bs2_+= nwords) {
330 dis = popcount64 (bs1_ ^ *bs2_);
331 if (dis < bh_val_0) {
332 faiss::maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
333 faiss::maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, dis, j);
334 bh_val_0 = bh_val_[0];
353 void fvec2bitvec (
const float * x, uint8_t * b,
size_t d)
355 for (
int i = 0; i < d; i += 8) {
358 int nj = i + 8 <= d ? 8 : d - i;
359 for (
int j = 0; j < nj; j++) {
373 void fvecs2bitvecs (
const float * x, uint8_t * b,
size_t d,
size_t n)
375 const long ncodes = ((d + 7) / 8);
376 #pragma omp parallel for
377 for (
size_t i = 0; i < n; i++)
378 fvec2bitvec (x + i * d, b + i * ncodes, d);
383 static uint64_t uint64_reverse_bits (uint64_t b)
387 for (i = 0; i < 64; i++) {
397 void bitvec_print (
const uint8_t * b,
size_t d)
400 for (i = 0; i < d; ) {
401 uint64_t brev = uint64_reverse_bits (* (uint64_t *) b);
402 for (j = 0; j < 64 && i < d; j++, i++) {
403 printf (
"%d", (
int) (brev & 1));
419 #define C64(x) ((uint64_t *)x)
426 size_t na,
size_t nb,
428 hamdis_t * __restrict dis)
430 FAISS_THROW_IF_NOT (ncodes % 8 == 0);
433 faiss::hammings <64> (C64(a), C64(b), na, nb, dis);
return;
435 faiss::hammings <128> (C64(a), C64(b), na, nb, dis);
return;
437 faiss::hammings <256> (C64(a), C64(b), na, nb, dis);
return;
439 faiss::hammings <512> (C64(a), C64(b), na, nb, dis);
return;
446 void hammings_knn_core (
447 int_maxheap_array_t * ha,
453 FAISS_THROW_IF_NOT (ncodes % 8 == 0);
456 hammings_knn_1 (ha, C64(a), C64(b), nb,
false,
true);
461 hammings_knn_hc<faiss::HammingComputer16>
462 (16, ha, a, b, nb,
false,
true);
465 hammings_knn_hc<faiss::HammingComputer32>
466 (32, ha, a, b, nb,
false,
true);
469 hammings_knn_hc<faiss::HammingComputerM8>
470 (ncodes, ha, a, b, nb,
false,
true);
484 hammings_knn_hc<faiss::HammingComputer4>
485 (4, ha, a, b, nb, order,
true);
488 hammings_knn_1 (ha, C64(a), C64(b), nb, order,
true);
493 hammings_knn_hc<faiss::HammingComputer16>
494 (16, ha, a, b, nb, order,
true);
497 hammings_knn_hc<faiss::HammingComputer32>
498 (32, ha, a, b, nb, order,
true);
501 if(ncodes % 8 == 0) {
502 hammings_knn_hc<faiss::HammingComputerM8>
503 (ncodes, ha, a, b, nb, order,
true);
505 hammings_knn_hc<faiss::HammingComputerDefault>
506 (ncodes, ha, a, b, nb, order,
true);
516 void hamming_count_thres (
527 faiss::hamming_count_thres <64> (C64(bs1), C64(bs2),
531 faiss::hamming_count_thres <128> (C64(bs1), C64(bs2),
535 faiss::hamming_count_thres <256> (C64(bs1), C64(bs2),
539 faiss::hamming_count_thres <512> (C64(bs1), C64(bs2),
543 FAISS_THROW_FMT (
"not implemented for %zu bits", ncodes);
549 void crosshamming_count_thres (
558 faiss::crosshamming_count_thres <64> (C64(dbs), n, ht, nptr);
561 faiss::crosshamming_count_thres <128> (C64(dbs), n, ht, nptr);
564 faiss::crosshamming_count_thres <256> (C64(dbs), n, ht, nptr);
567 faiss::crosshamming_count_thres <512> (C64(dbs), n, ht, nptr);
570 FAISS_THROW_FMT (
"not implemented for %zu bits", ncodes);
576 size_t match_hamming_thres (
588 return faiss::match_hamming_thres <64> (C64(bs1), C64(bs2),
589 n1, n2, ht, idx, dis);
591 return faiss::match_hamming_thres <128> (C64(bs1), C64(bs2),
592 n1, n2, ht, idx, dis);
594 return faiss::match_hamming_thres <256> (C64(bs1), C64(bs2),
595 n1, n2, ht, idx, dis);
597 return faiss::match_hamming_thres <512> (C64(bs1), C64(bs2),
598 n1, n2, ht, idx, dis);
600 FAISS_THROW_FMT (
"not implemented for %zu bits", ncodes);
616 template <
class HammingComputer>
617 static void hamming_dis_inner_loop (
627 HammingComputer hc (ca, code_size);
629 for (
size_t j = 0; j < nb; j++) {
630 int ndiff = hc.hamming (cb);
632 if (ndiff < bh_val_[0]) {
633 maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
634 maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, ndiff, j);
653 #pragma omp parallel for
654 for (
int i = 0; i < na; i++) {
655 const uint8_t *ca = a + i * code_size;
656 const uint8_t *cb = b;
658 hamdis_t * bh_val_ = ha->
val + i * k;
659 long * bh_ids_ = ha->
ids + i * k;
663 hamming_dis_inner_loop<GenHammingComputer8>
664 (ca, cb, nb, 8, k, bh_val_, bh_ids_);
667 hamming_dis_inner_loop<GenHammingComputer16>
668 (ca, cb, nb, 16, k, bh_val_, bh_ids_);
671 hamming_dis_inner_loop<GenHammingComputer32>
672 (ca, cb, nb, 32, k, bh_val_, bh_ids_);
675 hamming_dis_inner_loop<GenHammingComputerM8>
676 (ca, cb, nb, code_size, k, bh_val_, bh_ids_);
size_t k
allocated size per heap
void generalized_hammings_knn(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t code_size, int ordered)
void reorder()
reorder all the heaps
void hammings_knn(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t ncodes, int order)
TI * ids
identifiers (size nh * k)
void heapify()
prepare all the heaps before adding
void hammings(const uint8_t *a, const uint8_t *b, size_t na, size_t nb, size_t nbytespercode, hamdis_t *dis)
T * val
values (distances or similarities), size nh * k