Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/hamming.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 /* Copyright 2004-present Facebook. All Rights Reserved.
10  *
11  * Implementation of Hamming related functions (distances, smallest distance
12  * selection with regular heap|radix and probabilistic heap|radix.
13  *
14  * IMPLEMENTATION NOTES
15  * Bitvectors are generally assumed to be multiples of 64 bits.
16  *
17  * hamdis_t is used for distances because at this time
18  * it is not clear how we will need to balance
19  * - flexibility in vector size (unclear more than 2^16 or even 2^8 bitvectors)
20  * - memory usage
21  * - cache-misses when dealing with large volumes of data (lower bits is better)
22  *
23  * The hamdis_t should optimally be compatibe with one of the Torch Storage
24  * (Byte,Short,Long) and therefore should be signed for 2-bytes and 4-bytes
25 */
26 
27 #include "hamming.h"
28 
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <math.h>
32 #include <assert.h>
33 #include <limits.h>
34 
35 #include "Heap.h"
36 #include "FaissAssert.h"
37 
38 static const size_t BLOCKSIZE_QUERY = 8192;
39 
40 
41 namespace faiss {
42 
43 static const uint8_t hamdis_tab_ham_bytes[256] = {
44  0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
45  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
46  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
47  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
48  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
49  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
50  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
51  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
52  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
53  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
54  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
55  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
56  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
57  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
58  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
59  4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
60 };
61 
62 
63 /* Elementary Hamming distance computation: unoptimized */
64 template <size_t nbits, typename T>
65 T hamming (const uint8_t *bs1,
66  const uint8_t *bs2)
67 {
68  const size_t nbytes = nbits / 8;
69  size_t i;
70  T h = 0;
71  for (i = 0; i < nbytes; i++)
72  h += (T) hamdis_tab_ham_bytes[bs1[i]^bs2[i]];
73  return h;
74 }
75 
76 
77 /* Hamming distances for multiples of 64 bits */
78 template <size_t nbits>
79 hamdis_t hamming (const uint64_t * bs1, const uint64_t * bs2)
80 {
81  const size_t nwords = nbits / 64;
82  size_t i;
83  hamdis_t h = 0;
84  for (i = 0; i < nwords; i++)
85  h += popcount64 (bs1[i] ^ bs2[i]);
86  return h;
87 }
88 
89 
90 
91 /* specialized (optimized) functions */
92 template <>
93 hamdis_t hamming<64> (const uint64_t * pa, const uint64_t * pb)
94 {
95  return popcount64 (pa[0] ^ pb[0]);
96 }
97 
98 
99 template <>
100 hamdis_t hamming<128> (const uint64_t *pa, const uint64_t *pb)
101 {
102  return popcount64 (pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]);
103 }
104 
105 
106 template <>
107 hamdis_t hamming<256> (const uint64_t * pa, const uint64_t * pb)
108 {
109  return popcount64 (pa[0] ^ pb[0])
110  + popcount64 (pa[1] ^ pb[1])
111  + popcount64 (pa[2] ^ pb[2])
112  + popcount64 (pa[3] ^ pb[3]);
113 }
114 
115 
116 /* Hamming distances for multiple of 64 bits */
117 hamdis_t hamming (
118  const uint64_t * bs1,
119  const uint64_t * bs2,
120  size_t nwords)
121 {
122  size_t i;
123  hamdis_t h = 0;
124  for (i = 0; i < nwords; i++)
125  h += popcount64 (bs1[i] ^ bs2[i]);
126  return h;
127 }
128 
129 
130 
131 template <size_t nbits>
132 void hammings (
133  const uint64_t * bs1,
134  const uint64_t * bs2,
135  size_t n1, size_t n2,
136  hamdis_t * dis)
137 
138 {
139  size_t i, j;
140  const size_t nwords = nbits / 64;
141  for (i = 0; i < n1; i++) {
142  const uint64_t * __restrict bs1_ = bs1 + i * nwords;
143  hamdis_t * __restrict dis_ = dis + i * n2;
144  for (j = 0; j < n2; j++)
145  dis_[j] = hamming<nbits>(bs1_, bs2 + j * nwords);
146  }
147 }
148 
149 
150 
151 void hammings (
152  const uint64_t * bs1,
153  const uint64_t * bs2,
154  size_t n1,
155  size_t n2,
156  size_t nwords,
157  hamdis_t * __restrict dis)
158 {
159  size_t i, j;
160  n1 *= nwords;
161  n2 *= nwords;
162  for (i = 0; i < n1; i+=nwords) {
163  const uint64_t * bs1_ = bs1+i;
164  for (j = 0; j < n2; j+=nwords)
165  dis[j] = hamming (bs1_, bs2+j, nwords);
166  }
167 }
168 
169 
170 
171 
172 /* Count number of matches given a max threshold */
173 template <size_t nbits>
174 void hamming_count_thres (
175  const uint64_t * bs1,
176  const uint64_t * bs2,
177  size_t n1,
178  size_t n2,
179  hamdis_t ht,
180  size_t * nptr)
181 {
182  const size_t nwords = nbits / 64;
183  size_t i, j, posm = 0;
184  const uint64_t * bs2_ = bs2;
185 
186  for (i = 0; i < n1; i++) {
187  bs2 = bs2_;
188  for (j = 0; j < n2; j++) {
189  /* collect the match only if this satisfies the threshold */
190  if (hamming <nbits> (bs1, bs2) <= ht)
191  posm++;
192  bs2 += nwords;
193  }
194  bs1 += nwords; /* next signature */
195  }
196  *nptr = posm;
197 }
198 
199 
200 template <size_t nbits>
201 void crosshamming_count_thres (
202  const uint64_t * dbs,
203  size_t n,
204  int ht,
205  size_t * nptr)
206 {
207  const size_t nwords = nbits / 64;
208  size_t i, j, posm = 0;
209  const uint64_t * bs1 = dbs;
210  for (i = 0; i < n; i++) {
211  const uint64_t * bs2 = bs1 + 2;
212  for (j = i + 1; j < n; j++) {
213  /* collect the match only if this satisfies the threshold */
214  if (hamming <nbits> (bs1, bs2) <= ht)
215  posm++;
216  bs2 += nwords;
217  }
218  bs1 += nwords;
219  }
220  *nptr = posm;
221 }
222 
223 
224 template <size_t nbits>
225 size_t match_hamming_thres (
226  const uint64_t * bs1,
227  const uint64_t * bs2,
228  size_t n1,
229  size_t n2,
230  int ht,
231  long * idx,
232  hamdis_t * hams)
233 {
234  const size_t nwords = nbits / 64;
235  size_t i, j, posm = 0;
236  hamdis_t h;
237  const uint64_t * bs2_ = bs2;
238  for (i = 0; i < n1; i++) {
239  bs2 = bs2_;
240  for (j = 0; j < n2; j++) {
241  /* Here perform the real work of computing the distance */
242  h = hamming <nbits> (bs1, bs2);
243 
244  /* collect the match only if this satisfies the threshold */
245  if (h <= ht) {
246  /* Enough space to store another match ? */
247  *idx = i; idx++;
248  *idx = j; idx++;
249  *hams = h;
250  hams++;
251  posm++;
252  }
253  bs2+=nwords; /* next signature */
254  }
255  bs1+=nwords;
256  }
257  return posm;
258 }
259 
260 
261 /* Return closest neighbors w.r.t Hamming distance */
262 template <class HammingComputer>
263 static
264 void hammings_knn_hc (
265  int bytes_per_code,
266  int_maxheap_array_t * ha,
267  const uint8_t * bs1,
268  const uint8_t * bs2,
269  size_t n2,
270  bool order = true,
271  bool init_heap = true)
272 {
273  size_t k = ha->k;
274 
275 
276  if (init_heap) ha->heapify ();
277 
278  /* The computation here does not involved any blockization, which
279  is suboptimal for many queries in parallel. */
280 #pragma omp parallel for
281  for (size_t i = 0; i < ha->nh; i++) {
282  HammingComputer hc (bs1 + i * bytes_per_code, bytes_per_code);
283 
284  const uint8_t * bs2_ = bs2;
285  hamdis_t dis;
286  hamdis_t * __restrict bh_val_ = ha->val + i * k;
287  long * __restrict bh_ids_ = ha->ids + i * k;
288  size_t j;
289  for (j = 0; j < n2; j++, bs2_+= bytes_per_code) {
290  dis = hc.hamming (bs2_);
291  if (dis < bh_val_[0]) {
292  faiss::maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
293  faiss::maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, dis, j);
294  }
295  }
296  }
297  if (order) ha->reorder ();
298  }
299 
300 
301 
302 // works faster than the template version
303 static
304 void hammings_knn_1 (
305  int_maxheap_array_t * ha,
306  const uint64_t * bs1,
307  const uint64_t * bs2,
308  size_t n2,
309  bool order = true,
310  bool init_heap = true)
311 {
312  const size_t nwords = 1;
313  size_t k = ha->k;
314 
315 
316  if (init_heap) {
317  ha->heapify ();
318  }
319 
320 #pragma omp parallel for
321  for (size_t i = 0; i < ha->nh; i++) {
322  const uint64_t bs1_ = bs1 [i];
323  const uint64_t * bs2_ = bs2;
324  hamdis_t dis;
325  hamdis_t * bh_val_ = ha->val + i * k;
326  hamdis_t bh_val_0 = bh_val_[0];
327  long * bh_ids_ = ha->ids + i * k;
328  size_t j;
329  for (j = 0; j < n2; j++, bs2_+= nwords) {
330  dis = popcount64 (bs1_ ^ *bs2_);
331  if (dis < bh_val_0) {
332  faiss::maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
333  faiss::maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, dis, j);
334  bh_val_0 = bh_val_[0];
335  }
336  }
337  }
338  if (order) {
339  ha->reorder ();
340  }
341 }
342 
343 
344 
345 
346 /* Functions to maps vectors to bits. Assume proper allocation done beforehand,
347  meaning that b should be be able to receive as many bits as x may produce. */
348 
349 /*
350  * dimension 0 corresponds to the least significant bit of b[0], or
351  * equivalently to the lsb of the first byte that is stored.
352  */
353 void fvec2bitvec (const float * x, uint8_t * b, size_t d)
354 {
355  for (int i = 0; i < d; i += 8) {
356  uint8_t w = 0;
357  uint8_t mask = 1;
358  int nj = i + 8 <= d ? 8 : d - i;
359  for (int j = 0; j < nj; j++) {
360  if (x[i + j] >= 0)
361  w |= mask;
362  mask <<= 1;
363  }
364  *b = w;
365  b++;
366  }
367 }
368 
369 
370 
371 /* Same but for n vectors.
372  Ensure that the ouptut b is byte-aligned (pad with 0s). */
373 void fvecs2bitvecs (const float * x, uint8_t * b, size_t d, size_t n)
374 {
375  const long ncodes = ((d + 7) / 8);
376 #pragma omp parallel for
377  for (size_t i = 0; i < n; i++)
378  fvec2bitvec (x + i * d, b + i * ncodes, d);
379 }
380 
381 
382 /* Reverse bit (NOT a optimized function, only used for print purpose) */
383 static uint64_t uint64_reverse_bits (uint64_t b)
384 {
385  int i;
386  uint64_t revb = 0;
387  for (i = 0; i < 64; i++) {
388  revb <<= 1;
389  revb |= b & 1;
390  b >>= 1;
391  }
392  return revb;
393 }
394 
395 
396 /* print the bit vector */
397 void bitvec_print (const uint8_t * b, size_t d)
398 {
399  size_t i, j;
400  for (i = 0; i < d; ) {
401  uint64_t brev = uint64_reverse_bits (* (uint64_t *) b);
402  for (j = 0; j < 64 && i < d; j++, i++) {
403  printf ("%d", (int) (brev & 1));
404  brev >>= 1;
405  }
406  b += 8;
407  printf (" ");
408  }
409 }
410 
411 
412 
413 
414 
415 /*----------------------------------------*/
416 /* Hamming distance computation and k-nn */
417 
418 
419 #define C64(x) ((uint64_t *)x)
420 
421 
422 /* Compute a set of Hamming distances */
423 void hammings (
424  const uint8_t * a,
425  const uint8_t * b,
426  size_t na, size_t nb,
427  size_t ncodes,
428  hamdis_t * __restrict dis)
429 {
430  FAISS_THROW_IF_NOT (ncodes % 8 == 0);
431  switch (ncodes) {
432  case 8:
433  faiss::hammings <64> (C64(a), C64(b), na, nb, dis); return;
434  case 16:
435  faiss::hammings <128> (C64(a), C64(b), na, nb, dis); return;
436  case 32:
437  faiss::hammings <256> (C64(a), C64(b), na, nb, dis); return;
438  case 64:
439  faiss::hammings <512> (C64(a), C64(b), na, nb, dis); return;
440  default:
441  faiss::hammings (C64(a), C64(b), na, nb, ncodes * 8, dis); return;
442  }
443 }
444 
445 
446 void hammings_knn_core (
447  int_maxheap_array_t * ha,
448  const uint8_t * a,
449  const uint8_t * b,
450  size_t nb,
451  size_t ncodes)
452 {
453  FAISS_THROW_IF_NOT (ncodes % 8 == 0);
454 
455  switch (ncodes) {
456  hammings_knn_1 (ha, C64(a), C64(b), nb, false, true);
457  // hammings_knn_hc<faiss::HammingComputer8>
458  // (8, ha, a, b, nb, false, true);
459  break;
460  case 16:
461  hammings_knn_hc<faiss::HammingComputer16>
462  (16, ha, a, b, nb, false, true);
463  break;
464  case 32:
465  hammings_knn_hc<faiss::HammingComputer32>
466  (32, ha, a, b, nb, false, true);
467  break;
468  default:
469  hammings_knn_hc<faiss::HammingComputerM8>
470  (ncodes, ha, a, b, nb, false, true);
471  }
472 }
473 
475  int_maxheap_array_t * ha,
476  const uint8_t * a,
477  const uint8_t * b,
478  size_t nb,
479  size_t ncodes,
480  int order)
481 {
482  switch (ncodes) {
483  case 4:
484  hammings_knn_hc<faiss::HammingComputer4>
485  (4, ha, a, b, nb, order, true);
486  break;
487  case 8:
488  hammings_knn_1 (ha, C64(a), C64(b), nb, order, true);
489  // hammings_knn_hc<faiss::HammingComputer8>
490  // (8, ha, a, b, nb, order, true);
491  break;
492  case 16:
493  hammings_knn_hc<faiss::HammingComputer16>
494  (16, ha, a, b, nb, order, true);
495  break;
496  case 32:
497  hammings_knn_hc<faiss::HammingComputer32>
498  (32, ha, a, b, nb, order, true);
499  break;
500  default:
501  if(ncodes % 8 == 0) {
502  hammings_knn_hc<faiss::HammingComputerM8>
503  (ncodes, ha, a, b, nb, order, true);
504  } else {
505  hammings_knn_hc<faiss::HammingComputerDefault>
506  (ncodes, ha, a, b, nb, order, true);
507 
508  }
509  }
510 }
511 
512 
513 
514 
515 /* Count number of matches given a max threshold */
516 void hamming_count_thres (
517  const uint8_t * bs1,
518  const uint8_t * bs2,
519  size_t n1,
520  size_t n2,
521  hamdis_t ht,
522  size_t ncodes,
523  size_t * nptr)
524 {
525  switch (ncodes) {
526  case 8:
527  faiss::hamming_count_thres <64> (C64(bs1), C64(bs2),
528  n1, n2, ht, nptr);
529  return;
530  case 16:
531  faiss::hamming_count_thres <128> (C64(bs1), C64(bs2),
532  n1, n2, ht, nptr);
533  return;
534  case 32:
535  faiss::hamming_count_thres <256> (C64(bs1), C64(bs2),
536  n1, n2, ht, nptr);
537  return;
538  case 64:
539  faiss::hamming_count_thres <512> (C64(bs1), C64(bs2),
540  n1, n2, ht, nptr);
541  return;
542  default:
543  FAISS_THROW_FMT ("not implemented for %zu bits", ncodes);
544  }
545 }
546 
547 
548 /* Count number of cross-matches given a threshold */
549 void crosshamming_count_thres (
550  const uint8_t * dbs,
551  size_t n,
552  hamdis_t ht,
553  size_t ncodes,
554  size_t * nptr)
555 {
556  switch (ncodes) {
557  case 8:
558  faiss::crosshamming_count_thres <64> (C64(dbs), n, ht, nptr);
559  return;
560  case 16:
561  faiss::crosshamming_count_thres <128> (C64(dbs), n, ht, nptr);
562  return;
563  case 32:
564  faiss::crosshamming_count_thres <256> (C64(dbs), n, ht, nptr);
565  return;
566  case 64:
567  faiss::crosshamming_count_thres <512> (C64(dbs), n, ht, nptr);
568  return;
569  default:
570  FAISS_THROW_FMT ("not implemented for %zu bits", ncodes);
571  }
572 }
573 
574 
575 /* Returns all matches given a threshold */
576 size_t match_hamming_thres (
577  const uint8_t * bs1,
578  const uint8_t * bs2,
579  size_t n1,
580  size_t n2,
581  hamdis_t ht,
582  size_t ncodes,
583  long * idx,
584  hamdis_t * dis)
585 {
586  switch (ncodes) {
587  case 8:
588  return faiss::match_hamming_thres <64> (C64(bs1), C64(bs2),
589  n1, n2, ht, idx, dis);
590  case 16:
591  return faiss::match_hamming_thres <128> (C64(bs1), C64(bs2),
592  n1, n2, ht, idx, dis);
593  case 32:
594  return faiss::match_hamming_thres <256> (C64(bs1), C64(bs2),
595  n1, n2, ht, idx, dis);
596  case 64:
597  return faiss::match_hamming_thres <512> (C64(bs1), C64(bs2),
598  n1, n2, ht, idx, dis);
599  default:
600  FAISS_THROW_FMT ("not implemented for %zu bits", ncodes);
601  return 0;
602  }
603 }
604 
605 
606 #undef C64
607 
608 
609 
610 /*************************************
611  * generalized Hamming distances
612  ************************************/
613 
614 
615 
616 template <class HammingComputer>
617 static void hamming_dis_inner_loop (
618  const uint8_t *ca,
619  const uint8_t *cb,
620  size_t nb,
621  size_t code_size,
622  int k,
623  hamdis_t * bh_val_,
624  long * bh_ids_)
625 {
626 
627  HammingComputer hc (ca, code_size);
628 
629  for (size_t j = 0; j < nb; j++) {
630  int ndiff = hc.hamming (cb);
631  cb += code_size;
632  if (ndiff < bh_val_[0]) {
633  maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
634  maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, ndiff, j);
635  }
636  }
637 }
638 
640  int_maxheap_array_t * ha,
641  const uint8_t * a,
642  const uint8_t * b,
643  size_t nb,
644  size_t code_size,
645  int ordered)
646 {
647  int na = ha->nh;
648  int k = ha->k;
649 
650  if (ordered)
651  ha->heapify ();
652 
653 #pragma omp parallel for
654  for (int i = 0; i < na; i++) {
655  const uint8_t *ca = a + i * code_size;
656  const uint8_t *cb = b;
657 
658  hamdis_t * bh_val_ = ha->val + i * k;
659  long * bh_ids_ = ha->ids + i * k;
660 
661  switch (code_size) {
662  case 8:
663  hamming_dis_inner_loop<GenHammingComputer8>
664  (ca, cb, nb, 8, k, bh_val_, bh_ids_);
665  break;
666  case 16:
667  hamming_dis_inner_loop<GenHammingComputer16>
668  (ca, cb, nb, 16, k, bh_val_, bh_ids_);
669  break;
670  case 32:
671  hamming_dis_inner_loop<GenHammingComputer32>
672  (ca, cb, nb, 32, k, bh_val_, bh_ids_);
673  break;
674  default:
675  hamming_dis_inner_loop<GenHammingComputerM8>
676  (ca, cb, nb, code_size, k, bh_val_, bh_ids_);
677  break;
678  }
679  }
680 
681  if (ordered)
682  ha->reorder ();
683 
684 }
685 
686 
687 
688 
689 
690 } // namespace faiss
size_t k
allocated size per heap
Definition: Heap.h:355
void generalized_hammings_knn(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t code_size, int ordered)
Definition: hamming.cpp:639
void reorder()
reorder all the heaps
Definition: Heap.cpp:34
void hammings_knn(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t ncodes, int order)
Definition: hamming.cpp:474
TI * ids
identifiers (size nh * k)
Definition: Heap.h:356
void heapify()
prepare all the heaps before adding
Definition: Heap.cpp:26
void hammings(const uint8_t *a, const uint8_t *b, size_t na, size_t nb, size_t nbytespercode, hamdis_t *dis)
T * val
values (distances or similarities), size nh * k
Definition: Heap.h:357
size_t nh
number of heaps
Definition: Heap.h:354