Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
hamming.cpp
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 /* Copyright 2004-present Facebook. All Rights Reserved.
11  *
12  * Implementation of Hamming related functions (distances, smallest distance
13  * selection with regular heap|radix and probabilistic heap|radix.
14  *
15  * IMPLEMENTATION NOTES
16  * Bitvectors are generally assumed to be multiples of 64 bits.
17  *
18  * hamdis_t is used for distances because at this time
19  * it is not clear how we will need to balance
20  * - flexibility in vector size (unclear more than 2^16 or even 2^8 bitvectors)
21  * - memory usage
22  * - cache-misses when dealing with large volumes of data (lower bits is better)
23  *
24  * The hamdis_t should optimally be compatibe with one of the Torch Storage
25  * (Byte,Short,Long) and therefore should be signed for 2-bytes and 4-bytes
26 */
27 
28 #include "hamming.h"
29 
30 #include <stdlib.h>
31 #include <stdio.h>
32 #include <math.h>
33 #include <assert.h>
34 #include <limits.h>
35 
36 #include "Heap.h"
37 #include "FaissAssert.h"
38 
39 static const size_t BLOCKSIZE_QUERY = 8192;
40 
41 
42 namespace faiss {
43 
44 static const uint8_t hamdis_tab_ham_bytes[256] = {
45  0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
46  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
47  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
48  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
49  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
50  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
51  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
52  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
53  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
54  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
55  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
56  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
57  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
58  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
59  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
60  4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
61 };
62 
63 
64 /* Elementary Hamming distance computation: unoptimized */
65 template <size_t nbits, typename T>
66 T hamming (const uint8_t *bs1,
67  const uint8_t *bs2)
68 {
69  const size_t nbytes = nbits / 8;
70  size_t i;
71  T h = 0;
72  for (i = 0; i < nbytes; i++)
73  h += (T) hamdis_tab_ham_bytes[bs1[i]^bs2[i]];
74  return h;
75 }
76 
77 
78 /* Hamming distances for multiples of 64 bits */
79 template <size_t nbits>
80 hamdis_t hamming (const uint64_t * bs1, const uint64_t * bs2)
81 {
82  const size_t nwords = nbits / 64;
83  size_t i;
84  hamdis_t h = 0;
85  for (i = 0; i < nwords; i++)
86  h += popcount64 (bs1[i] ^ bs2[i]);
87  return h;
88 }
89 
90 
91 
92 /* specialized (optimized) functions */
93 template <>
94 hamdis_t hamming<64> (const uint64_t * pa, const uint64_t * pb)
95 {
96  return popcount64 (pa[0] ^ pb[0]);
97 }
98 
99 
100 template <>
101 hamdis_t hamming<128> (const uint64_t *pa, const uint64_t *pb)
102 {
103  return popcount64 (pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]);
104 }
105 
106 
107 template <>
108 hamdis_t hamming<256> (const uint64_t * pa, const uint64_t * pb)
109 {
110  return popcount64 (pa[0] ^ pb[0])
111  + popcount64 (pa[1] ^ pb[1])
112  + popcount64 (pa[2] ^ pb[2])
113  + popcount64 (pa[3] ^ pb[3]);
114 }
115 
116 
117 /* Hamming distances for multiple of 64 bits */
118 hamdis_t hamming (
119  const uint64_t * bs1,
120  const uint64_t * bs2,
121  size_t nwords)
122 {
123  size_t i;
124  hamdis_t h = 0;
125  for (i = 0; i < nwords; i++)
126  h += popcount64 (bs1[i] ^ bs2[i]);
127  return h;
128 }
129 
130 
131 
132 template <size_t nbits>
133 void hammings (
134  const uint64_t * bs1,
135  const uint64_t * bs2,
136  size_t n1, size_t n2,
137  hamdis_t * dis)
138 
139 {
140  size_t i, j;
141  const size_t nwords = nbits / 64;
142  for (i = 0; i < n1; i++) {
143  const uint64_t * __restrict bs1_ = bs1 + i * nwords;
144  hamdis_t * __restrict dis_ = dis + i * n2;
145  for (j = 0; j < n2; j++)
146  dis_[j] = hamming<nbits>(bs1_, bs2 + j * nwords);
147  }
148 }
149 
150 
151 
152 void hammings (
153  const uint64_t * bs1,
154  const uint64_t * bs2,
155  size_t n1,
156  size_t n2,
157  size_t nwords,
158  hamdis_t * __restrict dis)
159 {
160  size_t i, j;
161  n1 *= nwords;
162  n2 *= nwords;
163  for (i = 0; i < n1; i+=nwords) {
164  const uint64_t * bs1_ = bs1+i;
165  for (j = 0; j < n2; j+=nwords)
166  dis[j] = hamming (bs1_, bs2+j, nwords);
167  }
168 }
169 
170 
171 
172 
173 /* Count number of matches given a max threshold */
174 template <size_t nbits>
175 void hamming_count_thres (
176  const uint64_t * bs1,
177  const uint64_t * bs2,
178  size_t n1,
179  size_t n2,
180  hamdis_t ht,
181  size_t * nptr)
182 {
183  const size_t nwords = nbits / 64;
184  size_t i, j, posm = 0;
185  const uint64_t * bs2_ = bs2;
186 
187  for (i = 0; i < n1; i++) {
188  bs2 = bs2_;
189  for (j = 0; j < n2; j++) {
190  /* collect the match only if this satisfies the threshold */
191  if (hamming <nbits> (bs1, bs2) <= ht)
192  posm++;
193  bs2 += nwords;
194  }
195  bs1 += nwords; /* next signature */
196  }
197  *nptr = posm;
198 }
199 
200 
201 template <size_t nbits>
202 void crosshamming_count_thres (
203  const uint64_t * dbs,
204  size_t n,
205  int ht,
206  size_t * nptr)
207 {
208  const size_t nwords = nbits / 64;
209  size_t i, j, posm = 0;
210  const uint64_t * bs1 = dbs;
211  for (i = 0; i < n; i++) {
212  const uint64_t * bs2 = bs1 + 2;
213  for (j = i + 1; j < n; j++) {
214  /* collect the match only if this satisfies the threshold */
215  if (hamming <nbits> (bs1, bs2) <= ht)
216  posm++;
217  bs2 += nwords;
218  }
219  bs1 += nwords;
220  }
221  *nptr = posm;
222 }
223 
224 
225 template <size_t nbits>
226 size_t match_hamming_thres (
227  const uint64_t * bs1,
228  const uint64_t * bs2,
229  size_t n1,
230  size_t n2,
231  int ht,
232  long * idx,
233  hamdis_t * hams)
234 {
235  const size_t nwords = nbits / 64;
236  size_t i, j, posm = 0;
237  hamdis_t h;
238  const uint64_t * bs2_ = bs2;
239  for (i = 0; i < n1; i++) {
240  bs2 = bs2_;
241  for (j = 0; j < n2; j++) {
242  /* Here perform the real work of computing the distance */
243  h = hamming <nbits> (bs1, bs2);
244 
245  /* collect the match only if this satisfies the threshold */
246  if (h <= ht) {
247  /* Enough space to store another match ? */
248  *idx = i; idx++;
249  *idx = j; idx++;
250  *hams = h;
251  hams++;
252  posm++;
253  }
254  bs2+=nwords; /* next signature */
255  }
256  bs1+=nwords;
257  }
258  return posm;
259 }
260 
261 
262 /* Return closest neighbors w.r.t Hamming distance */
263 template <class HammingComputer>
264 static
265 void hammings_knn_hc (
266  int bytes_per_code,
267  int_maxheap_array_t * ha,
268  const uint8_t * bs1,
269  const uint8_t * bs2,
270  size_t n2,
271  bool order = true,
272  bool init_heap = true)
273 {
274  size_t k = ha->k;
275 
276 
277  if (init_heap) ha->heapify ();
278 
279  /* The computation here does not involved any blockization, which
280  is suboptimal for many queries in parallel. */
281 #pragma omp parallel for
282  for (size_t i = 0; i < ha->nh; i++) {
283  HammingComputer hc (bs1 + i * bytes_per_code, bytes_per_code);
284 
285  const uint8_t * bs2_ = bs2;
286  hamdis_t dis;
287  hamdis_t * __restrict bh_val_ = ha->val + i * k;
288  long * __restrict bh_ids_ = ha->ids + i * k;
289  size_t j;
290  for (j = 0; j < n2; j++, bs2_+= bytes_per_code) {
291  dis = hc.hamming (bs2_);
292  if (dis < bh_val_[0]) {
293  faiss::maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
294  faiss::maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, dis, j);
295  }
296  }
297  }
298  if (order) ha->reorder ();
299  }
300 
301 
302 
303 // works faster than the template version
304 static
305 void hammings_knn_1 (
306  int_maxheap_array_t * ha,
307  const uint64_t * bs1,
308  const uint64_t * bs2,
309  size_t n2,
310  bool order = true,
311  bool init_heap = true)
312 {
313  const size_t nwords = 1;
314  size_t k = ha->k;
315 
316 
317  if (init_heap) {
318  ha->heapify ();
319  }
320 
321 #pragma omp parallel for
322  for (size_t i = 0; i < ha->nh; i++) {
323  const uint64_t bs1_ = bs1 [i];
324  const uint64_t * bs2_ = bs2;
325  hamdis_t dis;
326  hamdis_t * bh_val_ = ha->val + i * k;
327  hamdis_t bh_val_0 = bh_val_[0];
328  long * bh_ids_ = ha->ids + i * k;
329  size_t j;
330  for (j = 0; j < n2; j++, bs2_+= nwords) {
331  dis = popcount64 (bs1_ ^ *bs2_);
332  if (dis < bh_val_0) {
333  faiss::maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
334  faiss::maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, dis, j);
335  bh_val_0 = bh_val_[0];
336  }
337  }
338  }
339  if (order) {
340  ha->reorder ();
341  }
342 }
343 
344 
345 
346 
347 /* Functions to maps vectors to bits. Assume proper allocation done beforehand,
348  meaning that b should be be able to receive as many bits as x may produce. */
349 
350 /*
351  * dimension 0 corresponds to the least significant bit of b[0], or
352  * equivalently to the lsb of the first byte that is stored.
353  */
354 void fvec2bitvec (const float * x, uint8_t * b, size_t d)
355 {
356  for (int i = 0; i < d; i += 8) {
357  uint8_t w = 0;
358  uint8_t mask = 1;
359  int nj = i + 8 <= d ? 8 : d - i;
360  for (int j = 0; j < nj; j++) {
361  if (x[i + j] >= 0)
362  w |= mask;
363  mask <<= 1;
364  }
365  *b = w;
366  b++;
367  }
368 }
369 
370 
371 
372 /* Same but for n vectors.
373  Ensure that the ouptut b is byte-aligned (pad with 0s). */
374 void fvecs2bitvecs (const float * x, uint8_t * b, size_t d, size_t n)
375 {
376  const long ncodes = ((d + 7) / 8);
377 #pragma omp parallel for
378  for (size_t i = 0; i < n; i++)
379  fvec2bitvec (x + i * d, b + i * ncodes, d);
380 }
381 
382 
383 /* Reverse bit (NOT a optimized function, only used for print purpose) */
384 static uint64_t uint64_reverse_bits (uint64_t b)
385 {
386  int i;
387  uint64_t revb = 0;
388  for (i = 0; i < 64; i++) {
389  revb <<= 1;
390  revb |= b & 1;
391  b >>= 1;
392  }
393  return revb;
394 }
395 
396 
397 /* print the bit vector */
398 void bitvec_print (const uint8_t * b, size_t d)
399 {
400  size_t i, j;
401  for (i = 0; i < d; ) {
402  uint64_t brev = uint64_reverse_bits (* (uint64_t *) b);
403  for (j = 0; j < 64 && i < d; j++, i++) {
404  printf ("%d", (int) (brev & 1));
405  brev >>= 1;
406  }
407  b += 8;
408  printf (" ");
409  }
410 }
411 
412 
413 
414 
415 
416 /*----------------------------------------*/
417 /* Hamming distance computation and k-nn */
418 
419 
420 #define C64(x) ((uint64_t *)x)
421 
422 
423 /* Compute a set of Hamming distances */
424 void hammings (
425  const uint8_t * a,
426  const uint8_t * b,
427  size_t na, size_t nb,
428  size_t ncodes,
429  hamdis_t * __restrict dis)
430 {
431  FAISS_ASSERT (ncodes % 8 == 0);
432  switch (ncodes) {
433  case 8:
434  faiss::hammings <64> (C64(a), C64(b), na, nb, dis); return;
435  case 16:
436  faiss::hammings <128> (C64(a), C64(b), na, nb, dis); return;
437  case 32:
438  faiss::hammings <256> (C64(a), C64(b), na, nb, dis); return;
439  case 64:
440  faiss::hammings <512> (C64(a), C64(b), na, nb, dis); return;
441  default:
442  faiss::hammings (C64(a), C64(b), na, nb, ncodes * 8, dis); return;
443  }
444 }
445 
446 
447 void hammings_knn_core (
448  int_maxheap_array_t * ha,
449  const uint8_t * a,
450  const uint8_t * b,
451  size_t nb,
452  size_t ncodes)
453 {
454  FAISS_ASSERT (ncodes % 8 == 0);
455 
456  switch (ncodes) {
457  hammings_knn_1 (ha, C64(a), C64(b), nb, false, true);
458  // hammings_knn_hc<faiss::HammingComputer8>
459  // (8, ha, a, b, nb, false, true);
460  break;
461  case 16:
462  hammings_knn_hc<faiss::HammingComputer16>
463  (16, ha, a, b, nb, false, true);
464  break;
465  case 32:
466  hammings_knn_hc<faiss::HammingComputer32>
467  (32, ha, a, b, nb, false, true);
468  break;
469  default:
470  hammings_knn_hc<faiss::HammingComputerM8>
471  (ncodes, ha, a, b, nb, false, true);
472  }
473 }
474 
476  int_maxheap_array_t * ha,
477  const uint8_t * a,
478  const uint8_t * b,
479  size_t nb,
480  size_t ncodes,
481  int order)
482 {
483  switch (ncodes) {
484  case 4:
485  hammings_knn_hc<faiss::HammingComputer4>
486  (4, ha, a, b, nb, order, true);
487  break;
488  case 8:
489  hammings_knn_1 (ha, C64(a), C64(b), nb, order, true);
490  // hammings_knn_hc<faiss::HammingComputer8>
491  // (8, ha, a, b, nb, order, true);
492  break;
493  case 16:
494  hammings_knn_hc<faiss::HammingComputer16>
495  (16, ha, a, b, nb, order, true);
496  break;
497  case 32:
498  hammings_knn_hc<faiss::HammingComputer32>
499  (32, ha, a, b, nb, order, true);
500  break;
501  default:
502  FAISS_ASSERT (ncodes % 8 == 0);
503  hammings_knn_hc<faiss::HammingComputerM8>
504  (ncodes, ha, a, b, nb, order, true);
505 
506  }
507 }
508 
509 
510 
511 
512 /* Count number of matches given a max threshold */
513 void hamming_count_thres (
514  const uint8_t * bs1,
515  const uint8_t * bs2,
516  size_t n1,
517  size_t n2,
518  hamdis_t ht,
519  size_t ncodes,
520  size_t * nptr)
521 {
522  switch (ncodes) {
523  case 8:
524  faiss::hamming_count_thres <64> (C64(bs1), C64(bs2),
525  n1, n2, ht, nptr);
526  return;
527  case 16:
528  faiss::hamming_count_thres <128> (C64(bs1), C64(bs2),
529  n1, n2, ht, nptr);
530  return;
531  case 32:
532  faiss::hamming_count_thres <256> (C64(bs1), C64(bs2),
533  n1, n2, ht, nptr);
534  return;
535  case 64:
536  faiss::hamming_count_thres <512> (C64(bs1), C64(bs2),
537  n1, n2, ht, nptr);
538  return;
539  default:
540  FAISS_ASSERT (!"not-implemented for this number of bits");
541  }
542 }
543 
544 
545 /* Count number of cross-matches given a threshold */
546 void crosshamming_count_thres (
547  const uint8_t * dbs,
548  size_t n,
549  hamdis_t ht,
550  size_t ncodes,
551  size_t * nptr)
552 {
553  switch (ncodes) {
554  case 8:
555  faiss::crosshamming_count_thres <64> (C64(dbs), n, ht, nptr);
556  return;
557  case 16:
558  faiss::crosshamming_count_thres <128> (C64(dbs), n, ht, nptr);
559  return;
560  case 32:
561  faiss::crosshamming_count_thres <256> (C64(dbs), n, ht, nptr);
562  return;
563  case 64:
564  faiss::crosshamming_count_thres <512> (C64(dbs), n, ht, nptr);
565  return;
566  default:
567  FAISS_ASSERT (!"not-implemented for this number of bits");
568  }
569 }
570 
571 
572 /* Returns all matches given a threshold */
573 size_t match_hamming_thres (
574  const uint8_t * bs1,
575  const uint8_t * bs2,
576  size_t n1,
577  size_t n2,
578  hamdis_t ht,
579  size_t ncodes,
580  long * idx,
581  hamdis_t * dis)
582 {
583  switch (ncodes) {
584  case 8:
585  return faiss::match_hamming_thres <64> (C64(bs1), C64(bs2),
586  n1, n2, ht, idx, dis);
587  case 16:
588  return faiss::match_hamming_thres <128> (C64(bs1), C64(bs2),
589  n1, n2, ht, idx, dis);
590  case 32:
591  return faiss::match_hamming_thres <256> (C64(bs1), C64(bs2),
592  n1, n2, ht, idx, dis);
593  case 64:
594  return faiss::match_hamming_thres <512> (C64(bs1), C64(bs2),
595  n1, n2, ht, idx, dis);
596  default:
597  FAISS_ASSERT (!"not-implemented for this number of bits");
598  }
599 }
600 
601 
602 #undef C64
603 
604 
605 
606 /*************************************
607  * generalized Hamming distances
608  ************************************/
609 
610 
611 
612 template <class HammingComputer>
613 static void hamming_dis_inner_loop (
614  const uint8_t *ca,
615  const uint8_t *cb,
616  size_t nb,
617  size_t code_size,
618  int k,
619  hamdis_t * bh_val_,
620  long * bh_ids_)
621 {
622 
623  HammingComputer hc (ca, code_size);
624 
625  for (size_t j = 0; j < nb; j++) {
626  int ndiff = hc.hamming (cb);
627  cb += code_size;
628  if (ndiff < bh_val_[0]) {
629  maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
630  maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, ndiff, j);
631  }
632  }
633 }
634 
636  int_maxheap_array_t * ha,
637  const uint8_t * a,
638  const uint8_t * b,
639  size_t nb,
640  size_t code_size,
641  int ordered)
642 {
643  int na = ha->nh;
644  int k = ha->k;
645 
646  if (ordered)
647  ha->heapify ();
648 
649 #pragma omp parallel for
650  for (int i = 0; i < na; i++) {
651  const uint8_t *ca = a + i * code_size;
652  const uint8_t *cb = b;
653 
654  hamdis_t * bh_val_ = ha->val + i * k;
655  long * bh_ids_ = ha->ids + i * k;
656 
657  switch (code_size) {
658  case 8:
659  hamming_dis_inner_loop<GenHammingComputer8>
660  (ca, cb, nb, 8, k, bh_val_, bh_ids_);
661  break;
662  case 16:
663  hamming_dis_inner_loop<GenHammingComputer16>
664  (ca, cb, nb, 16, k, bh_val_, bh_ids_);
665  break;
666  case 32:
667  hamming_dis_inner_loop<GenHammingComputer32>
668  (ca, cb, nb, 32, k, bh_val_, bh_ids_);
669  break;
670  default:
671  hamming_dis_inner_loop<GenHammingComputerM8>
672  (ca, cb, nb, code_size, k, bh_val_, bh_ids_);
673  break;
674  }
675  }
676 
677  if (ordered)
678  ha->reorder ();
679 
680 }
681 
682 
683 
684 
685 
686 } // namespace faiss
size_t k
allocated size per heap
Definition: Heap.h:356
void generalized_hammings_knn(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t code_size, int ordered)
Definition: hamming.cpp:635
void reorder()
reorder all the heaps
Definition: Heap.cpp:35
void hammings_knn(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t ncodes, int order)
Definition: hamming.cpp:475
TI * ids
identifiers (size nh * k)
Definition: Heap.h:357
void heapify()
prepare all the heaps before adding
Definition: Heap.cpp:27
void hammings(const uint8_t *a, const uint8_t *b, size_t na, size_t nb, size_t nbytespercode, hamdis_t *dis)
T * val
values (distances or similarities), size nh * k
Definition: Heap.h:358
size_t nh
number of heaps
Definition: Heap.h:355