Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/hamming.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the CC-by-NC license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 /* Copyright 2004-present Facebook. All Rights Reserved.
10  *
11  * Implementation of Hamming related functions (distances, smallest distance
12  * selection with regular heap|radix and probabilistic heap|radix.
13  *
14  * IMPLEMENTATION NOTES
15  * Bitvectors are generally assumed to be multiples of 64 bits.
16  *
17  * hamdis_t is used for distances because at this time
18  * it is not clear how we will need to balance
19  * - flexibility in vector size (unclear more than 2^16 or even 2^8 bitvectors)
20  * - memory usage
21  * - cache-misses when dealing with large volumes of data (lower bits is better)
22  *
23  * The hamdis_t should optimally be compatibe with one of the Torch Storage
24  * (Byte,Short,Long) and therefore should be signed for 2-bytes and 4-bytes
25 */
26 
27 #include "hamming.h"
28 
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <math.h>
32 #include <assert.h>
33 #include <limits.h>
34 
35 #include "Heap.h"
36 #include "FaissAssert.h"
37 
38 static const size_t BLOCKSIZE_QUERY = 8192;
39 
40 
41 namespace faiss {
42 
43 static const uint8_t hamdis_tab_ham_bytes[256] = {
44  0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
45  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
46  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
47  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
48  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
49  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
50  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
51  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
52  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
53  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
54  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
55  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
56  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
57  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
58  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
59  4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
60 };
61 
62 
63 /* Elementary Hamming distance computation: unoptimized */
64 template <size_t nbits, typename T>
65 T hamming (const uint8_t *bs1,
66  const uint8_t *bs2)
67 {
68  const size_t nbytes = nbits / 8;
69  size_t i;
70  T h = 0;
71  for (i = 0; i < nbytes; i++)
72  h += (T) hamdis_tab_ham_bytes[bs1[i]^bs2[i]];
73  return h;
74 }
75 
76 
77 /* Hamming distances for multiples of 64 bits */
78 template <size_t nbits>
79 hamdis_t hamming (const uint64_t * bs1, const uint64_t * bs2)
80 {
81  const size_t nwords = nbits / 64;
82  size_t i;
83  hamdis_t h = 0;
84  for (i = 0; i < nwords; i++)
85  h += popcount64 (bs1[i] ^ bs2[i]);
86  return h;
87 }
88 
89 
90 
91 /* specialized (optimized) functions */
92 template <>
93 hamdis_t hamming<64> (const uint64_t * pa, const uint64_t * pb)
94 {
95  return popcount64 (pa[0] ^ pb[0]);
96 }
97 
98 
99 template <>
100 hamdis_t hamming<128> (const uint64_t *pa, const uint64_t *pb)
101 {
102  return popcount64 (pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]);
103 }
104 
105 
106 template <>
107 hamdis_t hamming<256> (const uint64_t * pa, const uint64_t * pb)
108 {
109  return popcount64 (pa[0] ^ pb[0])
110  + popcount64 (pa[1] ^ pb[1])
111  + popcount64 (pa[2] ^ pb[2])
112  + popcount64 (pa[3] ^ pb[3]);
113 }
114 
115 
116 /* Hamming distances for multiple of 64 bits */
117 hamdis_t hamming (
118  const uint64_t * bs1,
119  const uint64_t * bs2,
120  size_t nwords)
121 {
122  size_t i;
123  hamdis_t h = 0;
124  for (i = 0; i < nwords; i++)
125  h += popcount64 (bs1[i] ^ bs2[i]);
126  return h;
127 }
128 
129 
130 
131 template <size_t nbits>
132 void hammings (
133  const uint64_t * bs1,
134  const uint64_t * bs2,
135  size_t n1, size_t n2,
136  hamdis_t * dis)
137 
138 {
139  size_t i, j;
140  const size_t nwords = nbits / 64;
141  for (i = 0; i < n1; i++) {
142  const uint64_t * __restrict bs1_ = bs1 + i * nwords;
143  hamdis_t * __restrict dis_ = dis + i * n2;
144  for (j = 0; j < n2; j++)
145  dis_[j] = hamming<nbits>(bs1_, bs2 + j * nwords);
146  }
147 }
148 
149 
150 
151 void hammings (
152  const uint64_t * bs1,
153  const uint64_t * bs2,
154  size_t n1,
155  size_t n2,
156  size_t nwords,
157  hamdis_t * __restrict dis)
158 {
159  size_t i, j;
160  n1 *= nwords;
161  n2 *= nwords;
162  for (i = 0; i < n1; i+=nwords) {
163  const uint64_t * bs1_ = bs1+i;
164  for (j = 0; j < n2; j+=nwords)
165  dis[j] = hamming (bs1_, bs2+j, nwords);
166  }
167 }
168 
169 
170 
171 
172 /* Count number of matches given a max threshold */
173 template <size_t nbits>
174 void hamming_count_thres (
175  const uint64_t * bs1,
176  const uint64_t * bs2,
177  size_t n1,
178  size_t n2,
179  hamdis_t ht,
180  size_t * nptr)
181 {
182  const size_t nwords = nbits / 64;
183  size_t i, j, posm = 0;
184  const uint64_t * bs2_ = bs2;
185 
186  for (i = 0; i < n1; i++) {
187  bs2 = bs2_;
188  for (j = 0; j < n2; j++) {
189  /* collect the match only if this satisfies the threshold */
190  if (hamming <nbits> (bs1, bs2) <= ht)
191  posm++;
192  bs2 += nwords;
193  }
194  bs1 += nwords; /* next signature */
195  }
196  *nptr = posm;
197 }
198 
199 
200 template <size_t nbits>
201 void crosshamming_count_thres (
202  const uint64_t * dbs,
203  size_t n,
204  int ht,
205  size_t * nptr)
206 {
207  const size_t nwords = nbits / 64;
208  size_t i, j, posm = 0;
209  const uint64_t * bs1 = dbs;
210  for (i = 0; i < n; i++) {
211  const uint64_t * bs2 = bs1 + 2;
212  for (j = i + 1; j < n; j++) {
213  /* collect the match only if this satisfies the threshold */
214  if (hamming <nbits> (bs1, bs2) <= ht)
215  posm++;
216  bs2 += nwords;
217  }
218  bs1 += nwords;
219  }
220  *nptr = posm;
221 }
222 
223 
224 template <size_t nbits>
225 size_t match_hamming_thres (
226  const uint64_t * bs1,
227  const uint64_t * bs2,
228  size_t n1,
229  size_t n2,
230  int ht,
231  long * idx,
232  hamdis_t * hams)
233 {
234  const size_t nwords = nbits / 64;
235  size_t i, j, posm = 0;
236  hamdis_t h;
237  const uint64_t * bs2_ = bs2;
238  for (i = 0; i < n1; i++) {
239  bs2 = bs2_;
240  for (j = 0; j < n2; j++) {
241  /* Here perform the real work of computing the distance */
242  h = hamming <nbits> (bs1, bs2);
243 
244  /* collect the match only if this satisfies the threshold */
245  if (h <= ht) {
246  /* Enough space to store another match ? */
247  *idx = i; idx++;
248  *idx = j; idx++;
249  *hams = h;
250  hams++;
251  posm++;
252  }
253  bs2+=nwords; /* next signature */
254  }
255  bs1+=nwords;
256  }
257  return posm;
258 }
259 
260 
261 /* Return closest neighbors w.r.t Hamming distance */
262 template <class HammingComputer>
263 static
264 void hammings_knn_hc (
265  int bytes_per_code,
266  int_maxheap_array_t * ha,
267  const uint8_t * bs1,
268  const uint8_t * bs2,
269  size_t n2,
270  bool order = true,
271  bool init_heap = true)
272 {
273  size_t k = ha->k;
274 
275 
276  if (init_heap) ha->heapify ();
277 
278  /* The computation here does not involved any blockization, which
279  is suboptimal for many queries in parallel. */
280 #pragma omp parallel for
281  for (size_t i = 0; i < ha->nh; i++) {
282  HammingComputer hc (bs1 + i * bytes_per_code, bytes_per_code);
283 
284  const uint8_t * bs2_ = bs2;
285  hamdis_t dis;
286  hamdis_t * __restrict bh_val_ = ha->val + i * k;
287  long * __restrict bh_ids_ = ha->ids + i * k;
288  size_t j;
289  for (j = 0; j < n2; j++, bs2_+= bytes_per_code) {
290  dis = hc.hamming (bs2_);
291  if (dis < bh_val_[0]) {
292  faiss::maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
293  faiss::maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, dis, j);
294  }
295  }
296  }
297  if (order) ha->reorder ();
298  }
299 
300 
301 
302 // works faster than the template version
303 static
304 void hammings_knn_1 (
305  int_maxheap_array_t * ha,
306  const uint64_t * bs1,
307  const uint64_t * bs2,
308  size_t n2,
309  bool order = true,
310  bool init_heap = true)
311 {
312  const size_t nwords = 1;
313  size_t k = ha->k;
314 
315 
316  if (init_heap) {
317  ha->heapify ();
318  }
319 
320 #pragma omp parallel for
321  for (size_t i = 0; i < ha->nh; i++) {
322  const uint64_t bs1_ = bs1 [i];
323  const uint64_t * bs2_ = bs2;
324  hamdis_t dis;
325  hamdis_t * bh_val_ = ha->val + i * k;
326  hamdis_t bh_val_0 = bh_val_[0];
327  long * bh_ids_ = ha->ids + i * k;
328  size_t j;
329  for (j = 0; j < n2; j++, bs2_+= nwords) {
330  dis = popcount64 (bs1_ ^ *bs2_);
331  if (dis < bh_val_0) {
332  faiss::maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
333  faiss::maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, dis, j);
334  bh_val_0 = bh_val_[0];
335  }
336  }
337  }
338  if (order) {
339  ha->reorder ();
340  }
341 }
342 
343 
344 
345 
346 /* Functions to maps vectors to bits. Assume proper allocation done beforehand,
347  meaning that b should be be able to receive as many bits as x may produce. */
348 
349 /*
350  * dimension 0 corresponds to the least significant bit of b[0], or
351  * equivalently to the lsb of the first byte that is stored.
352  */
353 void fvec2bitvec (const float * x, uint8_t * b, size_t d)
354 {
355  for (int i = 0; i < d; i += 8) {
356  uint8_t w = 0;
357  uint8_t mask = 1;
358  int nj = i + 8 <= d ? 8 : d - i;
359  for (int j = 0; j < nj; j++) {
360  if (x[i + j] >= 0)
361  w |= mask;
362  mask <<= 1;
363  }
364  *b = w;
365  b++;
366  }
367 }
368 
369 
370 
371 /* Same but for n vectors.
372  Ensure that the ouptut b is byte-aligned (pad with 0s). */
373 void fvecs2bitvecs (const float * x, uint8_t * b, size_t d, size_t n)
374 {
375  const long ncodes = ((d + 7) / 8);
376 #pragma omp parallel for
377  for (size_t i = 0; i < n; i++)
378  fvec2bitvec (x + i * d, b + i * ncodes, d);
379 }
380 
381 
382 /* Reverse bit (NOT a optimized function, only used for print purpose) */
383 static uint64_t uint64_reverse_bits (uint64_t b)
384 {
385  int i;
386  uint64_t revb = 0;
387  for (i = 0; i < 64; i++) {
388  revb <<= 1;
389  revb |= b & 1;
390  b >>= 1;
391  }
392  return revb;
393 }
394 
395 
396 /* print the bit vector */
397 void bitvec_print (const uint8_t * b, size_t d)
398 {
399  size_t i, j;
400  for (i = 0; i < d; ) {
401  uint64_t brev = uint64_reverse_bits (* (uint64_t *) b);
402  for (j = 0; j < 64 && i < d; j++, i++) {
403  printf ("%d", (int) (brev & 1));
404  brev >>= 1;
405  }
406  b += 8;
407  printf (" ");
408  }
409 }
410 
411 
412 
413 
414 
415 /*----------------------------------------*/
416 /* Hamming distance computation and k-nn */
417 
418 
419 #define C64(x) ((uint64_t *)x)
420 
421 
422 /* Compute a set of Hamming distances */
423 void hammings (
424  const uint8_t * a,
425  const uint8_t * b,
426  size_t na, size_t nb,
427  size_t ncodes,
428  hamdis_t * __restrict dis)
429 {
430  FAISS_THROW_IF_NOT (ncodes % 8 == 0);
431  switch (ncodes) {
432  case 8:
433  faiss::hammings <64> (C64(a), C64(b), na, nb, dis); return;
434  case 16:
435  faiss::hammings <128> (C64(a), C64(b), na, nb, dis); return;
436  case 32:
437  faiss::hammings <256> (C64(a), C64(b), na, nb, dis); return;
438  case 64:
439  faiss::hammings <512> (C64(a), C64(b), na, nb, dis); return;
440  default:
441  faiss::hammings (C64(a), C64(b), na, nb, ncodes * 8, dis); return;
442  }
443 }
444 
445 
446 void hammings_knn_core (
447  int_maxheap_array_t * ha,
448  const uint8_t * a,
449  const uint8_t * b,
450  size_t nb,
451  size_t ncodes)
452 {
453  FAISS_THROW_IF_NOT (ncodes % 8 == 0);
454 
455  switch (ncodes) {
456  hammings_knn_1 (ha, C64(a), C64(b), nb, false, true);
457  // hammings_knn_hc<faiss::HammingComputer8>
458  // (8, ha, a, b, nb, false, true);
459  break;
460  case 16:
461  hammings_knn_hc<faiss::HammingComputer16>
462  (16, ha, a, b, nb, false, true);
463  break;
464  case 32:
465  hammings_knn_hc<faiss::HammingComputer32>
466  (32, ha, a, b, nb, false, true);
467  break;
468  default:
469  hammings_knn_hc<faiss::HammingComputerM8>
470  (ncodes, ha, a, b, nb, false, true);
471  }
472 }
473 
475  int_maxheap_array_t * ha,
476  const uint8_t * a,
477  const uint8_t * b,
478  size_t nb,
479  size_t ncodes,
480  int order)
481 {
482  switch (ncodes) {
483  case 4:
484  hammings_knn_hc<faiss::HammingComputer4>
485  (4, ha, a, b, nb, order, true);
486  break;
487  case 8:
488  hammings_knn_1 (ha, C64(a), C64(b), nb, order, true);
489  // hammings_knn_hc<faiss::HammingComputer8>
490  // (8, ha, a, b, nb, order, true);
491  break;
492  case 16:
493  hammings_knn_hc<faiss::HammingComputer16>
494  (16, ha, a, b, nb, order, true);
495  break;
496  case 32:
497  hammings_knn_hc<faiss::HammingComputer32>
498  (32, ha, a, b, nb, order, true);
499  break;
500  default:
501  FAISS_THROW_IF_NOT (ncodes % 8 == 0);
502  hammings_knn_hc<faiss::HammingComputerM8>
503  (ncodes, ha, a, b, nb, order, true);
504 
505  }
506 }
507 
508 
509 
510 
511 /* Count number of matches given a max threshold */
512 void hamming_count_thres (
513  const uint8_t * bs1,
514  const uint8_t * bs2,
515  size_t n1,
516  size_t n2,
517  hamdis_t ht,
518  size_t ncodes,
519  size_t * nptr)
520 {
521  switch (ncodes) {
522  case 8:
523  faiss::hamming_count_thres <64> (C64(bs1), C64(bs2),
524  n1, n2, ht, nptr);
525  return;
526  case 16:
527  faiss::hamming_count_thres <128> (C64(bs1), C64(bs2),
528  n1, n2, ht, nptr);
529  return;
530  case 32:
531  faiss::hamming_count_thres <256> (C64(bs1), C64(bs2),
532  n1, n2, ht, nptr);
533  return;
534  case 64:
535  faiss::hamming_count_thres <512> (C64(bs1), C64(bs2),
536  n1, n2, ht, nptr);
537  return;
538  default:
539  FAISS_THROW_FMT ("not implemented for %zu bits", ncodes);
540  }
541 }
542 
543 
544 /* Count number of cross-matches given a threshold */
545 void crosshamming_count_thres (
546  const uint8_t * dbs,
547  size_t n,
548  hamdis_t ht,
549  size_t ncodes,
550  size_t * nptr)
551 {
552  switch (ncodes) {
553  case 8:
554  faiss::crosshamming_count_thres <64> (C64(dbs), n, ht, nptr);
555  return;
556  case 16:
557  faiss::crosshamming_count_thres <128> (C64(dbs), n, ht, nptr);
558  return;
559  case 32:
560  faiss::crosshamming_count_thres <256> (C64(dbs), n, ht, nptr);
561  return;
562  case 64:
563  faiss::crosshamming_count_thres <512> (C64(dbs), n, ht, nptr);
564  return;
565  default:
566  FAISS_THROW_FMT ("not implemented for %zu bits", ncodes);
567  }
568 }
569 
570 
571 /* Returns all matches given a threshold */
572 size_t match_hamming_thres (
573  const uint8_t * bs1,
574  const uint8_t * bs2,
575  size_t n1,
576  size_t n2,
577  hamdis_t ht,
578  size_t ncodes,
579  long * idx,
580  hamdis_t * dis)
581 {
582  switch (ncodes) {
583  case 8:
584  return faiss::match_hamming_thres <64> (C64(bs1), C64(bs2),
585  n1, n2, ht, idx, dis);
586  case 16:
587  return faiss::match_hamming_thres <128> (C64(bs1), C64(bs2),
588  n1, n2, ht, idx, dis);
589  case 32:
590  return faiss::match_hamming_thres <256> (C64(bs1), C64(bs2),
591  n1, n2, ht, idx, dis);
592  case 64:
593  return faiss::match_hamming_thres <512> (C64(bs1), C64(bs2),
594  n1, n2, ht, idx, dis);
595  default:
596  FAISS_THROW_FMT ("not implemented for %zu bits", ncodes);
597  return 0;
598  }
599 }
600 
601 
602 #undef C64
603 
604 
605 
606 /*************************************
607  * generalized Hamming distances
608  ************************************/
609 
610 
611 
612 template <class HammingComputer>
613 static void hamming_dis_inner_loop (
614  const uint8_t *ca,
615  const uint8_t *cb,
616  size_t nb,
617  size_t code_size,
618  int k,
619  hamdis_t * bh_val_,
620  long * bh_ids_)
621 {
622 
623  HammingComputer hc (ca, code_size);
624 
625  for (size_t j = 0; j < nb; j++) {
626  int ndiff = hc.hamming (cb);
627  cb += code_size;
628  if (ndiff < bh_val_[0]) {
629  maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
630  maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, ndiff, j);
631  }
632  }
633 }
634 
636  int_maxheap_array_t * ha,
637  const uint8_t * a,
638  const uint8_t * b,
639  size_t nb,
640  size_t code_size,
641  int ordered)
642 {
643  int na = ha->nh;
644  int k = ha->k;
645 
646  if (ordered)
647  ha->heapify ();
648 
649 #pragma omp parallel for
650  for (int i = 0; i < na; i++) {
651  const uint8_t *ca = a + i * code_size;
652  const uint8_t *cb = b;
653 
654  hamdis_t * bh_val_ = ha->val + i * k;
655  long * bh_ids_ = ha->ids + i * k;
656 
657  switch (code_size) {
658  case 8:
659  hamming_dis_inner_loop<GenHammingComputer8>
660  (ca, cb, nb, 8, k, bh_val_, bh_ids_);
661  break;
662  case 16:
663  hamming_dis_inner_loop<GenHammingComputer16>
664  (ca, cb, nb, 16, k, bh_val_, bh_ids_);
665  break;
666  case 32:
667  hamming_dis_inner_loop<GenHammingComputer32>
668  (ca, cb, nb, 32, k, bh_val_, bh_ids_);
669  break;
670  default:
671  hamming_dis_inner_loop<GenHammingComputerM8>
672  (ca, cb, nb, code_size, k, bh_val_, bh_ids_);
673  break;
674  }
675  }
676 
677  if (ordered)
678  ha->reorder ();
679 
680 }
681 
682 
683 
684 
685 
686 } // namespace faiss
size_t k
allocated size per heap
Definition: Heap.h:355
void generalized_hammings_knn(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t code_size, int ordered)
Definition: hamming.cpp:635
void reorder()
reorder all the heaps
Definition: Heap.cpp:34
void hammings_knn(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t ncodes, int order)
Definition: hamming.cpp:474
TI * ids
identifiers (size nh * k)
Definition: Heap.h:356
void heapify()
prepare all the heaps before adding
Definition: Heap.cpp:26
void hammings(const uint8_t *a, const uint8_t *b, size_t na, size_t nb, size_t nbytespercode, hamdis_t *dis)
T * val
values (distances or similarities), size nh * k
Definition: Heap.h:357
size_t nh
number of heaps
Definition: Heap.h:354