37 #include "FaissAssert.h"
39 static const size_t BLOCKSIZE_QUERY = 8192;
44 static const uint8_t hamdis_tab_ham_bytes[256] = {
45 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
46 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
47 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
48 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
49 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
50 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
51 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
52 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
53 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
54 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
55 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
56 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
57 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
58 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
59 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
60 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
65 template <
size_t nbits,
typename T>
66 T hamming (
const uint8_t *bs1,
69 const size_t nbytes = nbits / 8;
72 for (i = 0; i < nbytes; i++)
73 h += (T) hamdis_tab_ham_bytes[bs1[i]^bs2[i]];
79 template <
size_t nbits>
80 hamdis_t hamming (
const uint64_t * bs1,
const uint64_t * bs2)
82 const size_t nwords = nbits / 64;
85 for (i = 0; i < nwords; i++)
86 h += popcount64 (bs1[i] ^ bs2[i]);
94 hamdis_t hamming<64> (
const uint64_t * pa,
const uint64_t * pb)
96 return popcount64 (pa[0] ^ pb[0]);
101 hamdis_t hamming<128> (
const uint64_t *pa,
const uint64_t *pb)
103 return popcount64 (pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]);
108 hamdis_t hamming<256> (
const uint64_t * pa,
const uint64_t * pb)
110 return popcount64 (pa[0] ^ pb[0])
111 + popcount64 (pa[1] ^ pb[1])
112 + popcount64 (pa[2] ^ pb[2])
113 + popcount64 (pa[3] ^ pb[3]);
119 const uint64_t * bs1,
120 const uint64_t * bs2,
125 for (i = 0; i < nwords; i++)
126 h += popcount64 (bs1[i] ^ bs2[i]);
132 template <
size_t nbits>
134 const uint64_t * bs1,
135 const uint64_t * bs2,
136 size_t n1,
size_t n2,
141 const size_t nwords = nbits / 64;
142 for (i = 0; i < n1; i++) {
143 const uint64_t * __restrict bs1_ = bs1 + i * nwords;
144 hamdis_t * __restrict dis_ = dis + i * n2;
145 for (j = 0; j < n2; j++)
146 dis_[j] = hamming<nbits>(bs1_, bs2 + j * nwords);
153 const uint64_t * bs1,
154 const uint64_t * bs2,
158 hamdis_t * __restrict dis)
163 for (i = 0; i < n1; i+=nwords) {
164 const uint64_t * bs1_ = bs1+i;
165 for (j = 0; j < n2; j+=nwords)
166 dis[j] = hamming (bs1_, bs2+j, nwords);
174 template <
size_t nbits>
175 void hamming_count_thres (
176 const uint64_t * bs1,
177 const uint64_t * bs2,
183 const size_t nwords = nbits / 64;
184 size_t i, j, posm = 0;
185 const uint64_t * bs2_ = bs2;
187 for (i = 0; i < n1; i++) {
189 for (j = 0; j < n2; j++) {
191 if (hamming <nbits> (bs1, bs2) <= ht)
201 template <
size_t nbits>
202 void crosshamming_count_thres (
203 const uint64_t * dbs,
208 const size_t nwords = nbits / 64;
209 size_t i, j, posm = 0;
210 const uint64_t * bs1 = dbs;
211 for (i = 0; i < n; i++) {
212 const uint64_t * bs2 = bs1 + 2;
213 for (j = i + 1; j < n; j++) {
215 if (hamming <nbits> (bs1, bs2) <= ht)
225 template <
size_t nbits>
226 size_t match_hamming_thres (
227 const uint64_t * bs1,
228 const uint64_t * bs2,
235 const size_t nwords = nbits / 64;
236 size_t i, j, posm = 0;
238 const uint64_t * bs2_ = bs2;
239 for (i = 0; i < n1; i++) {
241 for (j = 0; j < n2; j++) {
243 h = hamming <nbits> (bs1, bs2);
263 template <
class HammingComputer>
265 void hammings_knn_hc (
267 int_maxheap_array_t * ha,
272 bool init_heap =
true)
277 if (init_heap) ha->heapify ();
281 #pragma omp parallel for
282 for (
size_t i = 0; i < ha->nh; i++) {
283 HammingComputer hc (bs1 + i * bytes_per_code, bytes_per_code);
285 const uint8_t * bs2_ = bs2;
287 hamdis_t * __restrict bh_val_ = ha->val + i * k;
288 long * __restrict bh_ids_ = ha->ids + i * k;
290 for (j = 0; j < n2; j++, bs2_+= bytes_per_code) {
291 dis = hc.hamming (bs2_);
292 if (dis < bh_val_[0]) {
293 faiss::maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
294 faiss::maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, dis, j);
298 if (order) ha->reorder ();
305 void hammings_knn_1 (
306 int_maxheap_array_t * ha,
307 const uint64_t * bs1,
308 const uint64_t * bs2,
311 bool init_heap =
true)
313 const size_t nwords = 1;
321 #pragma omp parallel for
322 for (
size_t i = 0; i < ha->nh; i++) {
323 const uint64_t bs1_ = bs1 [i];
324 const uint64_t * bs2_ = bs2;
326 hamdis_t * bh_val_ = ha->val + i * k;
327 hamdis_t bh_val_0 = bh_val_[0];
328 long * bh_ids_ = ha->ids + i * k;
330 for (j = 0; j < n2; j++, bs2_+= nwords) {
331 dis = popcount64 (bs1_ ^ *bs2_);
332 if (dis < bh_val_0) {
333 faiss::maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
334 faiss::maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, dis, j);
335 bh_val_0 = bh_val_[0];
354 void fvec2bitvec (
const float * x, uint8_t * b,
size_t d)
356 for (
int i = 0; i < d; i += 8) {
359 int nj = i + 8 <= d ? 8 : d - i;
360 for (
int j = 0; j < nj; j++) {
374 void fvecs2bitvecs (
const float * x, uint8_t * b,
size_t d,
size_t n)
376 const long ncodes = ((d + 7) / 8);
377 #pragma omp parallel for
378 for (
size_t i = 0; i < n; i++)
379 fvec2bitvec (x + i * d, b + i * ncodes, d);
384 static uint64_t uint64_reverse_bits (uint64_t b)
388 for (i = 0; i < 64; i++) {
398 void bitvec_print (
const uint8_t * b,
size_t d)
401 for (i = 0; i < d; ) {
402 uint64_t brev = uint64_reverse_bits (* (uint64_t *) b);
403 for (j = 0; j < 64 && i < d; j++, i++) {
404 printf (
"%d", (
int) (brev & 1));
420 #define C64(x) ((uint64_t *)x)
427 size_t na,
size_t nb,
429 hamdis_t * __restrict dis)
431 FAISS_ASSERT (ncodes % 8 == 0);
434 faiss::hammings <64> (C64(a), C64(b), na, nb, dis);
return;
436 faiss::hammings <128> (C64(a), C64(b), na, nb, dis);
return;
438 faiss::hammings <256> (C64(a), C64(b), na, nb, dis);
return;
440 faiss::hammings <512> (C64(a), C64(b), na, nb, dis);
return;
447 void hammings_knn_core (
448 int_maxheap_array_t * ha,
454 FAISS_ASSERT (ncodes % 8 == 0);
457 hammings_knn_1 (ha, C64(a), C64(b), nb,
false,
true);
462 hammings_knn_hc<faiss::HammingComputer16>
463 (16, ha, a, b, nb,
false,
true);
466 hammings_knn_hc<faiss::HammingComputer32>
467 (32, ha, a, b, nb,
false,
true);
470 hammings_knn_hc<faiss::HammingComputerM8>
471 (ncodes, ha, a, b, nb,
false,
true);
485 hammings_knn_hc<faiss::HammingComputer4>
486 (4, ha, a, b, nb, order,
true);
489 hammings_knn_1 (ha, C64(a), C64(b), nb, order,
true);
494 hammings_knn_hc<faiss::HammingComputer16>
495 (16, ha, a, b, nb, order,
true);
498 hammings_knn_hc<faiss::HammingComputer32>
499 (32, ha, a, b, nb, order,
true);
502 FAISS_ASSERT (ncodes % 8 == 0);
503 hammings_knn_hc<faiss::HammingComputerM8>
504 (ncodes, ha, a, b, nb, order,
true);
513 void hamming_count_thres (
524 faiss::hamming_count_thres <64> (C64(bs1), C64(bs2),
528 faiss::hamming_count_thres <128> (C64(bs1), C64(bs2),
532 faiss::hamming_count_thres <256> (C64(bs1), C64(bs2),
536 faiss::hamming_count_thres <512> (C64(bs1), C64(bs2),
540 FAISS_ASSERT (!
"not-implemented for this number of bits");
546 void crosshamming_count_thres (
555 faiss::crosshamming_count_thres <64> (C64(dbs), n, ht, nptr);
558 faiss::crosshamming_count_thres <128> (C64(dbs), n, ht, nptr);
561 faiss::crosshamming_count_thres <256> (C64(dbs), n, ht, nptr);
564 faiss::crosshamming_count_thres <512> (C64(dbs), n, ht, nptr);
567 FAISS_ASSERT (!
"not-implemented for this number of bits");
573 size_t match_hamming_thres (
585 return faiss::match_hamming_thres <64> (C64(bs1), C64(bs2),
586 n1, n2, ht, idx, dis);
588 return faiss::match_hamming_thres <128> (C64(bs1), C64(bs2),
589 n1, n2, ht, idx, dis);
591 return faiss::match_hamming_thres <256> (C64(bs1), C64(bs2),
592 n1, n2, ht, idx, dis);
594 return faiss::match_hamming_thres <512> (C64(bs1), C64(bs2),
595 n1, n2, ht, idx, dis);
597 FAISS_ASSERT (!
"not-implemented for this number of bits");
612 template <
class HammingComputer>
613 static void hamming_dis_inner_loop (
623 HammingComputer hc (ca, code_size);
625 for (
size_t j = 0; j < nb; j++) {
626 int ndiff = hc.hamming (cb);
628 if (ndiff < bh_val_[0]) {
629 maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
630 maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, ndiff, j);
649 #pragma omp parallel for
650 for (
int i = 0; i < na; i++) {
651 const uint8_t *ca = a + i * code_size;
652 const uint8_t *cb = b;
654 hamdis_t * bh_val_ = ha->
val + i * k;
655 long * bh_ids_ = ha->
ids + i * k;
659 hamming_dis_inner_loop<GenHammingComputer8>
660 (ca, cb, nb, 8, k, bh_val_, bh_ids_);
663 hamming_dis_inner_loop<GenHammingComputer16>
664 (ca, cb, nb, 16, k, bh_val_, bh_ids_);
667 hamming_dis_inner_loop<GenHammingComputer32>
668 (ca, cb, nb, 32, k, bh_val_, bh_ids_);
671 hamming_dis_inner_loop<GenHammingComputerM8>
672 (ca, cb, nb, code_size, k, bh_val_, bh_ids_);
size_t k
allocated size per heap
void generalized_hammings_knn(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t code_size, int ordered)
void reorder()
reorder all the heaps
void hammings_knn(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t ncodes, int order)
TI * ids
identifiers (size nh * k)
void heapify()
prepare all the heaps before adding
void hammings(const uint8_t *a, const uint8_t *b, size_t na, size_t nb, size_t nbytespercode, hamdis_t *dis)
T * val
values (distances or similarities), size nh * k