38 #include "FaissAssert.h"
40 static const size_t BLOCKSIZE_QUERY = 8192;
45 size_t hamming_batch_size = 65536;
47 static const uint8_t hamdis_tab_ham_bytes[256] = {
48 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
49 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
50 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
51 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
52 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
53 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
54 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
55 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
56 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
57 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
58 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
59 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
60 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
61 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
62 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
63 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
68 template <
size_t nbits,
typename T>
69 T hamming (
const uint8_t *bs1,
72 const size_t nbytes = nbits / 8;
75 for (i = 0; i < nbytes; i++)
76 h += (T) hamdis_tab_ham_bytes[bs1[i]^bs2[i]];
82 template <
size_t nbits>
83 hamdis_t hamming (
const uint64_t * bs1,
const uint64_t * bs2)
85 const size_t nwords = nbits / 64;
88 for (i = 0; i < nwords; i++)
89 h += popcount64 (bs1[i] ^ bs2[i]);
97 hamdis_t hamming<64> (
const uint64_t * pa,
const uint64_t * pb)
99 return popcount64 (pa[0] ^ pb[0]);
104 hamdis_t hamming<128> (
const uint64_t *pa,
const uint64_t *pb)
106 return popcount64 (pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]);
111 hamdis_t hamming<256> (
const uint64_t * pa,
const uint64_t * pb)
113 return popcount64 (pa[0] ^ pb[0])
114 + popcount64 (pa[1] ^ pb[1])
115 + popcount64 (pa[2] ^ pb[2])
116 + popcount64 (pa[3] ^ pb[3]);
122 const uint64_t * bs1,
123 const uint64_t * bs2,
128 for (i = 0; i < nwords; i++)
129 h += popcount64 (bs1[i] ^ bs2[i]);
135 template <
size_t nbits>
137 const uint64_t * bs1,
138 const uint64_t * bs2,
139 size_t n1,
size_t n2,
144 const size_t nwords = nbits / 64;
145 for (i = 0; i < n1; i++) {
146 const uint64_t * __restrict bs1_ = bs1 + i * nwords;
147 hamdis_t * __restrict dis_ = dis + i * n2;
148 for (j = 0; j < n2; j++)
149 dis_[j] = hamming<nbits>(bs1_, bs2 + j * nwords);
156 const uint64_t * bs1,
157 const uint64_t * bs2,
161 hamdis_t * __restrict dis)
166 for (i = 0; i < n1; i+=nwords) {
167 const uint64_t * bs1_ = bs1+i;
168 for (j = 0; j < n2; j+=nwords)
169 dis[j] = hamming (bs1_, bs2+j, nwords);
177 template <
size_t nbits>
178 void hamming_count_thres (
179 const uint64_t * bs1,
180 const uint64_t * bs2,
186 const size_t nwords = nbits / 64;
187 size_t i, j, posm = 0;
188 const uint64_t * bs2_ = bs2;
190 for (i = 0; i < n1; i++) {
192 for (j = 0; j < n2; j++) {
194 if (hamming <nbits> (bs1, bs2) <= ht)
204 template <
size_t nbits>
205 void crosshamming_count_thres (
206 const uint64_t * dbs,
211 const size_t nwords = nbits / 64;
212 size_t i, j, posm = 0;
213 const uint64_t * bs1 = dbs;
214 for (i = 0; i < n; i++) {
215 const uint64_t * bs2 = bs1 + 2;
216 for (j = i + 1; j < n; j++) {
218 if (hamming <nbits> (bs1, bs2) <= ht)
228 template <
size_t nbits>
229 size_t match_hamming_thres (
230 const uint64_t * bs1,
231 const uint64_t * bs2,
238 const size_t nwords = nbits / 64;
239 size_t i, j, posm = 0;
241 const uint64_t * bs2_ = bs2;
242 for (i = 0; i < n1; i++) {
244 for (j = 0; j < n2; j++) {
246 h = hamming <nbits> (bs1, bs2);
266 template <
class HammingComputer>
268 void hammings_knn_hc (
270 int_maxheap_array_t * ha,
275 bool init_heap =
true)
278 if (init_heap) ha->heapify ();
280 const size_t block_size = hamming_batch_size;
281 for (
size_t j0 = 0; j0 < n2; j0 += block_size) {
282 const size_t j1 = std::min(j0 + block_size, n2);
283 #pragma omp parallel for
284 for (
size_t i = 0; i < ha->nh; i++) {
285 HammingComputer hc (bs1 + i * bytes_per_code, bytes_per_code);
287 const uint8_t * bs2_ = bs2 + j0 * bytes_per_code;
289 hamdis_t * __restrict bh_val_ = ha->val + i * k;
290 long * __restrict bh_ids_ = ha->ids + i * k;
292 for (j = j0; j < j1; j++, bs2_+= bytes_per_code) {
293 dis = hc.hamming (bs2_);
294 if (dis < bh_val_[0]) {
295 faiss::maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
296 faiss::maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, dis, j);
301 if (order) ha->reorder ();
305 template <
class HammingComputer>
307 void hammings_knn_mc (
317 const int nBuckets = bytes_per_code * 8 + 1;
318 std::vector<int> all_counters(na * nBuckets, 0);
319 std::unique_ptr<long[]> all_ids_per_dis(
new long[na * nBuckets * k]);
321 std::vector<HCounterState<HammingComputer>> cs;
322 for (
size_t i = 0; i < na; ++i) {
323 cs.push_back(HCounterState<HammingComputer>(
324 all_counters.data() + i * nBuckets,
325 all_ids_per_dis.get() + i * nBuckets * k,
326 a + i * bytes_per_code,
332 const size_t block_size = hamming_batch_size;
333 for (
size_t j0 = 0; j0 < nb; j0 += block_size) {
334 const size_t j1 = std::min(j0 + block_size, nb);
335 #pragma omp parallel for
336 for (
size_t i = 0; i < na; ++i) {
337 for (
size_t j = j0; j < j1; ++j) {
338 cs[i].update_counter(b + j * bytes_per_code, j);
343 for (
size_t i = 0; i < na; ++i) {
344 HCounterState<HammingComputer>& csi = cs[i];
347 for (
int b = 0; b < nBuckets && nres < k; b++) {
348 for (
int l = 0; l < csi.counters[b] && nres < k; l++) {
349 labels[i * k + nres] = csi.ids_per_dis[b * k + l];
350 distances[i * k + nres] = b;
355 labels[i * k + nres] = -1;
356 distances[i * k + nres] = std::numeric_limits<int32_t>::max();
366 void hammings_knn_hc_1 (
367 int_maxheap_array_t * ha,
368 const uint64_t * bs1,
369 const uint64_t * bs2,
372 bool init_heap =
true)
374 const size_t nwords = 1;
382 #pragma omp parallel for
383 for (
size_t i = 0; i < ha->nh; i++) {
384 const uint64_t bs1_ = bs1 [i];
385 const uint64_t * bs2_ = bs2;
387 hamdis_t * bh_val_ = ha->val + i * k;
388 hamdis_t bh_val_0 = bh_val_[0];
389 long * bh_ids_ = ha->ids + i * k;
391 for (j = 0; j < n2; j++, bs2_+= nwords) {
392 dis = popcount64 (bs1_ ^ *bs2_);
393 if (dis < bh_val_0) {
394 faiss::maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
395 faiss::maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, dis, j);
396 bh_val_0 = bh_val_[0];
415 void fvec2bitvec (
const float * x, uint8_t * b,
size_t d)
417 for (
int i = 0; i < d; i += 8) {
420 int nj = i + 8 <= d ? 8 : d - i;
421 for (
int j = 0; j < nj; j++) {
435 void fvecs2bitvecs (
const float * x, uint8_t * b,
size_t d,
size_t n)
437 const long ncodes = ((d + 7) / 8);
438 #pragma omp parallel for
439 for (
size_t i = 0; i < n; i++)
440 fvec2bitvec (x + i * d, b + i * ncodes, d);
445 static uint64_t uint64_reverse_bits (uint64_t b)
449 for (i = 0; i < 64; i++) {
459 void bitvec_print (
const uint8_t * b,
size_t d)
462 for (i = 0; i < d; ) {
463 uint64_t brev = uint64_reverse_bits (* (uint64_t *) b);
464 for (j = 0; j < 64 && i < d; j++, i++) {
465 printf (
"%d", (
int) (brev & 1));
481 #define C64(x) ((uint64_t *)x)
488 size_t na,
size_t nb,
490 hamdis_t * __restrict dis)
492 FAISS_THROW_IF_NOT (ncodes % 8 == 0);
495 faiss::hammings <64> (C64(a), C64(b), na, nb, dis);
return;
497 faiss::hammings <128> (C64(a), C64(b), na, nb, dis);
return;
499 faiss::hammings <256> (C64(a), C64(b), na, nb, dis);
return;
501 faiss::hammings <512> (C64(a), C64(b), na, nb, dis);
return;
508 int_maxheap_array_t *ha,
515 hammings_knn_hc(ha, a, b, nb, ncodes, order);
517 void hammings_knn_hc (
527 hammings_knn_hc<faiss::HammingComputer4>
528 (4, ha, a, b, nb, order,
true);
531 hammings_knn_hc_1 (ha, C64(a), C64(b), nb, order,
true);
536 hammings_knn_hc<faiss::HammingComputer16>
537 (16, ha, a, b, nb, order,
true);
540 hammings_knn_hc<faiss::HammingComputer32>
541 (32, ha, a, b, nb, order,
true);
544 if(ncodes % 8 == 0) {
545 hammings_knn_hc<faiss::HammingComputerM8>
546 (ncodes, ha, a, b, nb, order,
true);
548 hammings_knn_hc<faiss::HammingComputerDefault>
549 (ncodes, ha, a, b, nb, order,
true);
555 void hammings_knn_mc(
567 hammings_knn_mc<faiss::HammingComputer4>(
568 4, a, b, na, nb, k, distances, labels
574 hammings_knn_mc<faiss::HammingComputer8>(
575 8, a, b, na, nb, k, distances, labels
579 hammings_knn_mc<faiss::HammingComputer16>(
580 16, a, b, na, nb, k, distances, labels
584 hammings_knn_mc<faiss::HammingComputer32>(
585 32, a, b, na, nb, k, distances, labels
589 if(ncodes % 8 == 0) {
590 hammings_knn_mc<faiss::HammingComputerM8>(
591 ncodes, a, b, na, nb, k, distances, labels
594 hammings_knn_mc<faiss::HammingComputerDefault>(
595 ncodes, a, b, na, nb, k, distances, labels
605 void hamming_count_thres (
616 faiss::hamming_count_thres <64> (C64(bs1), C64(bs2),
620 faiss::hamming_count_thres <128> (C64(bs1), C64(bs2),
624 faiss::hamming_count_thres <256> (C64(bs1), C64(bs2),
628 faiss::hamming_count_thres <512> (C64(bs1), C64(bs2),
632 FAISS_THROW_FMT (
"not implemented for %zu bits", ncodes);
638 void crosshamming_count_thres (
647 faiss::crosshamming_count_thres <64> (C64(dbs), n, ht, nptr);
650 faiss::crosshamming_count_thres <128> (C64(dbs), n, ht, nptr);
653 faiss::crosshamming_count_thres <256> (C64(dbs), n, ht, nptr);
656 faiss::crosshamming_count_thres <512> (C64(dbs), n, ht, nptr);
659 FAISS_THROW_FMT (
"not implemented for %zu bits", ncodes);
665 size_t match_hamming_thres (
677 return faiss::match_hamming_thres <64> (C64(bs1), C64(bs2),
678 n1, n2, ht, idx, dis);
680 return faiss::match_hamming_thres <128> (C64(bs1), C64(bs2),
681 n1, n2, ht, idx, dis);
683 return faiss::match_hamming_thres <256> (C64(bs1), C64(bs2),
684 n1, n2, ht, idx, dis);
686 return faiss::match_hamming_thres <512> (C64(bs1), C64(bs2),
687 n1, n2, ht, idx, dis);
689 FAISS_THROW_FMT (
"not implemented for %zu bits", ncodes);
705 template <
class HammingComputer>
706 static void hamming_dis_inner_loop (
716 HammingComputer hc (ca, code_size);
718 for (
size_t j = 0; j < nb; j++) {
719 int ndiff = hc.hamming (cb);
721 if (ndiff < bh_val_[0]) {
722 maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
723 maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, ndiff, j);
742 #pragma omp parallel for
743 for (
int i = 0; i < na; i++) {
744 const uint8_t *ca = a + i * code_size;
745 const uint8_t *cb = b;
747 hamdis_t * bh_val_ = ha->
val + i * k;
748 long * bh_ids_ = ha->
ids + i * k;
752 hamming_dis_inner_loop<GenHammingComputer8>
753 (ca, cb, nb, 8, k, bh_val_, bh_ids_);
756 hamming_dis_inner_loop<GenHammingComputer16>
757 (ca, cb, nb, 16, k, bh_val_, bh_ids_);
760 hamming_dis_inner_loop<GenHammingComputer32>
761 (ca, cb, nb, 32, k, bh_val_, bh_ids_);
764 hamming_dis_inner_loop<GenHammingComputerM8>
765 (ca, cb, nb, code_size, k, bh_val_, bh_ids_);
size_t k
allocated size per heap
void reorder()
reorder all the heaps
TI * ids
identifiers (size nh * k)
void heapify()
prepare all the heaps before adding
void hammings(const uint8_t *a, const uint8_t *b, size_t na, size_t nb, size_t nbytespercode, hamdis_t *dis)
T * val
values (distances or similarities), size nh * k
void generalized_hammings_knn_hc(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t code_size, int ordered)