39 #include "FaissAssert.h"
41 static const size_t BLOCKSIZE_QUERY = 8192;
46 size_t hamming_batch_size = 65536;
48 static const uint8_t hamdis_tab_ham_bytes[256] = {
49 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
50 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
51 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
52 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
53 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
54 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
55 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
56 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
57 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
58 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
59 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
60 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
61 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
62 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
63 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
64 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
69 template <
size_t nbits,
typename T>
70 T hamming (
const uint8_t *bs1,
73 const size_t nbytes = nbits / 8;
76 for (i = 0; i < nbytes; i++)
77 h += (T) hamdis_tab_ham_bytes[bs1[i]^bs2[i]];
83 template <
size_t nbits>
84 hamdis_t hamming (
const uint64_t * bs1,
const uint64_t * bs2)
86 const size_t nwords = nbits / 64;
89 for (i = 0; i < nwords; i++)
90 h += popcount64 (bs1[i] ^ bs2[i]);
98 hamdis_t hamming<64> (
const uint64_t * pa,
const uint64_t * pb)
100 return popcount64 (pa[0] ^ pb[0]);
105 hamdis_t hamming<128> (
const uint64_t *pa,
const uint64_t *pb)
107 return popcount64 (pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]);
112 hamdis_t hamming<256> (
const uint64_t * pa,
const uint64_t * pb)
114 return popcount64 (pa[0] ^ pb[0])
115 + popcount64 (pa[1] ^ pb[1])
116 + popcount64 (pa[2] ^ pb[2])
117 + popcount64 (pa[3] ^ pb[3]);
123 const uint64_t * bs1,
124 const uint64_t * bs2,
129 for (i = 0; i < nwords; i++)
130 h += popcount64 (bs1[i] ^ bs2[i]);
136 template <
size_t nbits>
138 const uint64_t * bs1,
139 const uint64_t * bs2,
140 size_t n1,
size_t n2,
145 const size_t nwords = nbits / 64;
146 for (i = 0; i < n1; i++) {
147 const uint64_t * __restrict bs1_ = bs1 + i * nwords;
148 hamdis_t * __restrict dis_ = dis + i * n2;
149 for (j = 0; j < n2; j++)
150 dis_[j] = hamming<nbits>(bs1_, bs2 + j * nwords);
157 const uint64_t * bs1,
158 const uint64_t * bs2,
162 hamdis_t * __restrict dis)
167 for (i = 0; i < n1; i+=nwords) {
168 const uint64_t * bs1_ = bs1+i;
169 for (j = 0; j < n2; j+=nwords)
170 dis[j] = hamming (bs1_, bs2+j, nwords);
178 template <
size_t nbits>
179 void hamming_count_thres (
180 const uint64_t * bs1,
181 const uint64_t * bs2,
187 const size_t nwords = nbits / 64;
188 size_t i, j, posm = 0;
189 const uint64_t * bs2_ = bs2;
191 for (i = 0; i < n1; i++) {
193 for (j = 0; j < n2; j++) {
195 if (hamming <nbits> (bs1, bs2) <= ht)
205 template <
size_t nbits>
206 void crosshamming_count_thres (
207 const uint64_t * dbs,
212 const size_t nwords = nbits / 64;
213 size_t i, j, posm = 0;
214 const uint64_t * bs1 = dbs;
215 for (i = 0; i < n; i++) {
216 const uint64_t * bs2 = bs1 + 2;
217 for (j = i + 1; j < n; j++) {
219 if (hamming <nbits> (bs1, bs2) <= ht)
229 template <
size_t nbits>
230 size_t match_hamming_thres (
231 const uint64_t * bs1,
232 const uint64_t * bs2,
239 const size_t nwords = nbits / 64;
240 size_t i, j, posm = 0;
242 const uint64_t * bs2_ = bs2;
243 for (i = 0; i < n1; i++) {
245 for (j = 0; j < n2; j++) {
247 h = hamming <nbits> (bs1, bs2);
267 template <
class HammingComputer>
269 void hammings_knn_hc (
271 int_maxheap_array_t * ha,
276 bool init_heap =
true)
279 if (init_heap) ha->heapify ();
281 const size_t block_size = hamming_batch_size;
282 for (
size_t j0 = 0; j0 < n2; j0 += block_size) {
283 const size_t j1 = std::min(j0 + block_size, n2);
284 #pragma omp parallel for
285 for (
size_t i = 0; i < ha->nh; i++) {
286 HammingComputer hc (bs1 + i * bytes_per_code, bytes_per_code);
288 const uint8_t * bs2_ = bs2 + j0 * bytes_per_code;
290 hamdis_t * __restrict bh_val_ = ha->val + i * k;
291 long * __restrict bh_ids_ = ha->ids + i * k;
293 for (j = j0; j < j1; j++, bs2_+= bytes_per_code) {
294 dis = hc.hamming (bs2_);
295 if (dis < bh_val_[0]) {
296 faiss::maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
297 faiss::maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, dis, j);
302 if (order) ha->reorder ();
306 template <
class HammingComputer>
308 void hammings_knn_mc (
318 const int nBuckets = bytes_per_code * 8 + 1;
319 std::vector<int> all_counters(na * nBuckets, 0);
320 std::unique_ptr<long[]> all_ids_per_dis(
new long[na * nBuckets * k]);
322 std::vector<HCounterState<HammingComputer>> cs;
323 for (
size_t i = 0; i < na; ++i) {
324 cs.push_back(HCounterState<HammingComputer>(
325 all_counters.data() + i * nBuckets,
326 all_ids_per_dis.get() + i * nBuckets * k,
327 a + i * bytes_per_code,
333 const size_t block_size = hamming_batch_size;
334 for (
size_t j0 = 0; j0 < nb; j0 += block_size) {
335 const size_t j1 = std::min(j0 + block_size, nb);
336 #pragma omp parallel for
337 for (
size_t i = 0; i < na; ++i) {
338 for (
size_t j = j0; j < j1; ++j) {
339 cs[i].update_counter(b + j * bytes_per_code, j);
344 for (
size_t i = 0; i < na; ++i) {
345 HCounterState<HammingComputer>& csi = cs[i];
348 for (
int b = 0; b < nBuckets && nres < k; b++) {
349 for (
int l = 0; l < csi.counters[b] && nres < k; l++) {
350 labels[i * k + nres] = csi.ids_per_dis[b * k + l];
351 distances[i * k + nres] = b;
356 labels[i * k + nres] = -1;
357 distances[i * k + nres] = std::numeric_limits<int32_t>::max();
367 void hammings_knn_hc_1 (
368 int_maxheap_array_t * ha,
369 const uint64_t * bs1,
370 const uint64_t * bs2,
373 bool init_heap =
true)
375 const size_t nwords = 1;
383 #pragma omp parallel for
384 for (
size_t i = 0; i < ha->nh; i++) {
385 const uint64_t bs1_ = bs1 [i];
386 const uint64_t * bs2_ = bs2;
388 hamdis_t * bh_val_ = ha->val + i * k;
389 hamdis_t bh_val_0 = bh_val_[0];
390 long * bh_ids_ = ha->ids + i * k;
392 for (j = 0; j < n2; j++, bs2_+= nwords) {
393 dis = popcount64 (bs1_ ^ *bs2_);
394 if (dis < bh_val_0) {
395 faiss::maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
396 faiss::maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, dis, j);
397 bh_val_0 = bh_val_[0];
416 void fvec2bitvec (
const float * x, uint8_t * b,
size_t d)
418 for (
int i = 0; i < d; i += 8) {
421 int nj = i + 8 <= d ? 8 : d - i;
422 for (
int j = 0; j < nj; j++) {
436 void fvecs2bitvecs (
const float * x, uint8_t * b,
size_t d,
size_t n)
438 const long ncodes = ((d + 7) / 8);
439 #pragma omp parallel for
440 for (
size_t i = 0; i < n; i++)
441 fvec2bitvec (x + i * d, b + i * ncodes, d);
446 static uint64_t uint64_reverse_bits (uint64_t b)
450 for (i = 0; i < 64; i++) {
460 void bitvec_print (
const uint8_t * b,
size_t d)
463 for (i = 0; i < d; ) {
464 uint64_t brev = uint64_reverse_bits (* (uint64_t *) b);
465 for (j = 0; j < 64 && i < d; j++, i++) {
466 printf (
"%d", (
int) (brev & 1));
482 #define C64(x) ((uint64_t *)x)
489 size_t na,
size_t nb,
491 hamdis_t * __restrict dis)
493 FAISS_THROW_IF_NOT (ncodes % 8 == 0);
496 faiss::hammings <64> (C64(a), C64(b), na, nb, dis);
return;
498 faiss::hammings <128> (C64(a), C64(b), na, nb, dis);
return;
500 faiss::hammings <256> (C64(a), C64(b), na, nb, dis);
return;
502 faiss::hammings <512> (C64(a), C64(b), na, nb, dis);
return;
509 int_maxheap_array_t *ha,
516 hammings_knn_hc(ha, a, b, nb, ncodes, order);
518 void hammings_knn_hc (
528 hammings_knn_hc<faiss::HammingComputer4>
529 (4, ha, a, b, nb, order,
true);
532 hammings_knn_hc_1 (ha, C64(a), C64(b), nb, order,
true);
537 hammings_knn_hc<faiss::HammingComputer16>
538 (16, ha, a, b, nb, order,
true);
541 hammings_knn_hc<faiss::HammingComputer32>
542 (32, ha, a, b, nb, order,
true);
545 if(ncodes % 8 == 0) {
546 hammings_knn_hc<faiss::HammingComputerM8>
547 (ncodes, ha, a, b, nb, order,
true);
549 hammings_knn_hc<faiss::HammingComputerDefault>
550 (ncodes, ha, a, b, nb, order,
true);
556 void hammings_knn_mc(
568 hammings_knn_mc<faiss::HammingComputer4>(
569 4, a, b, na, nb, k, distances, labels
575 hammings_knn_mc<faiss::HammingComputer8>(
576 8, a, b, na, nb, k, distances, labels
580 hammings_knn_mc<faiss::HammingComputer16>(
581 16, a, b, na, nb, k, distances, labels
585 hammings_knn_mc<faiss::HammingComputer32>(
586 32, a, b, na, nb, k, distances, labels
590 if(ncodes % 8 == 0) {
591 hammings_knn_mc<faiss::HammingComputerM8>(
592 ncodes, a, b, na, nb, k, distances, labels
595 hammings_knn_mc<faiss::HammingComputerDefault>(
596 ncodes, a, b, na, nb, k, distances, labels
606 void hamming_count_thres (
617 faiss::hamming_count_thres <64> (C64(bs1), C64(bs2),
621 faiss::hamming_count_thres <128> (C64(bs1), C64(bs2),
625 faiss::hamming_count_thres <256> (C64(bs1), C64(bs2),
629 faiss::hamming_count_thres <512> (C64(bs1), C64(bs2),
633 FAISS_THROW_FMT (
"not implemented for %zu bits", ncodes);
639 void crosshamming_count_thres (
648 faiss::crosshamming_count_thres <64> (C64(dbs), n, ht, nptr);
651 faiss::crosshamming_count_thres <128> (C64(dbs), n, ht, nptr);
654 faiss::crosshamming_count_thres <256> (C64(dbs), n, ht, nptr);
657 faiss::crosshamming_count_thres <512> (C64(dbs), n, ht, nptr);
660 FAISS_THROW_FMT (
"not implemented for %zu bits", ncodes);
666 size_t match_hamming_thres (
678 return faiss::match_hamming_thres <64> (C64(bs1), C64(bs2),
679 n1, n2, ht, idx, dis);
681 return faiss::match_hamming_thres <128> (C64(bs1), C64(bs2),
682 n1, n2, ht, idx, dis);
684 return faiss::match_hamming_thres <256> (C64(bs1), C64(bs2),
685 n1, n2, ht, idx, dis);
687 return faiss::match_hamming_thres <512> (C64(bs1), C64(bs2),
688 n1, n2, ht, idx, dis);
690 FAISS_THROW_FMT (
"not implemented for %zu bits", ncodes);
706 template <
class HammingComputer>
707 static void hamming_dis_inner_loop (
717 HammingComputer hc (ca, code_size);
719 for (
size_t j = 0; j < nb; j++) {
720 int ndiff = hc.hamming (cb);
722 if (ndiff < bh_val_[0]) {
723 maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
724 maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, ndiff, j);
743 #pragma omp parallel for
744 for (
int i = 0; i < na; i++) {
745 const uint8_t *ca = a + i * code_size;
746 const uint8_t *cb = b;
748 hamdis_t * bh_val_ = ha->
val + i * k;
749 long * bh_ids_ = ha->
ids + i * k;
753 hamming_dis_inner_loop<GenHammingComputer8>
754 (ca, cb, nb, 8, k, bh_val_, bh_ids_);
757 hamming_dis_inner_loop<GenHammingComputer16>
758 (ca, cb, nb, 16, k, bh_val_, bh_ids_);
761 hamming_dis_inner_loop<GenHammingComputer32>
762 (ca, cb, nb, 32, k, bh_val_, bh_ids_);
765 hamming_dis_inner_loop<GenHammingComputerM8>
766 (ca, cb, nb, code_size, k, bh_val_, bh_ids_);
size_t k
allocated size per heap
void reorder()
reorder all the heaps
TI * ids
identifiers (size nh * k)
void heapify()
prepare all the heaps before adding
void hammings(const uint8_t *a, const uint8_t *b, size_t na, size_t nb, size_t nbytespercode, hamdis_t *dis)
T * val
values (distances or similarities), size nh * k
void generalized_hammings_knn_hc(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t code_size, int ordered)