faiss/hamming.cpp

687 lines
17 KiB
C++

/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the CC-by-NC license found in the
* LICENSE file in the root directory of this source tree.
*/
/* Copyright 2004-present Facebook. All Rights Reserved.
*
* Implementation of Hamming related functions (distances, smallest distance
* selection with regular heap|radix and probabilistic heap|radix.
*
* IMPLEMENTATION NOTES
* Bitvectors are generally assumed to be multiples of 64 bits.
*
* hamdis_t is used for distances because at this time
* it is not clear how we will need to balance
* - flexibility in vector size (unclear more than 2^16 or even 2^8 bitvectors)
* - memory usage
* - cache-misses when dealing with large volumes of data (lower bits is better)
*
* The hamdis_t should optimally be compatibe with one of the Torch Storage
* (Byte,Short,Long) and therefore should be signed for 2-bytes and 4-bytes
*/
#include "hamming.h"
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <assert.h>
#include <limits.h>
#include "Heap.h"
#include "FaissAssert.h"
static const size_t BLOCKSIZE_QUERY = 8192;
namespace faiss {
static const uint8_t hamdis_tab_ham_bytes[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};
/* Elementary Hamming distance computation: unoptimized */
template <size_t nbits, typename T>
T hamming (const uint8_t *bs1,
const uint8_t *bs2)
{
const size_t nbytes = nbits / 8;
size_t i;
T h = 0;
for (i = 0; i < nbytes; i++)
h += (T) hamdis_tab_ham_bytes[bs1[i]^bs2[i]];
return h;
}
/* Hamming distances for multiples of 64 bits */
template <size_t nbits>
hamdis_t hamming (const uint64_t * bs1, const uint64_t * bs2)
{
const size_t nwords = nbits / 64;
size_t i;
hamdis_t h = 0;
for (i = 0; i < nwords; i++)
h += popcount64 (bs1[i] ^ bs2[i]);
return h;
}
/* specialized (optimized) functions */
template <>
hamdis_t hamming<64> (const uint64_t * pa, const uint64_t * pb)
{
return popcount64 (pa[0] ^ pb[0]);
}
template <>
hamdis_t hamming<128> (const uint64_t *pa, const uint64_t *pb)
{
return popcount64 (pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]);
}
template <>
hamdis_t hamming<256> (const uint64_t * pa, const uint64_t * pb)
{
return popcount64 (pa[0] ^ pb[0])
+ popcount64 (pa[1] ^ pb[1])
+ popcount64 (pa[2] ^ pb[2])
+ popcount64 (pa[3] ^ pb[3]);
}
/* Hamming distances for multiple of 64 bits */
hamdis_t hamming (
const uint64_t * bs1,
const uint64_t * bs2,
size_t nwords)
{
size_t i;
hamdis_t h = 0;
for (i = 0; i < nwords; i++)
h += popcount64 (bs1[i] ^ bs2[i]);
return h;
}
template <size_t nbits>
void hammings (
const uint64_t * bs1,
const uint64_t * bs2,
size_t n1, size_t n2,
hamdis_t * dis)
{
size_t i, j;
const size_t nwords = nbits / 64;
for (i = 0; i < n1; i++) {
const uint64_t * __restrict bs1_ = bs1 + i * nwords;
hamdis_t * __restrict dis_ = dis + i * n2;
for (j = 0; j < n2; j++)
dis_[j] = hamming<nbits>(bs1_, bs2 + j * nwords);
}
}
void hammings (
const uint64_t * bs1,
const uint64_t * bs2,
size_t n1,
size_t n2,
size_t nwords,
hamdis_t * __restrict dis)
{
size_t i, j;
n1 *= nwords;
n2 *= nwords;
for (i = 0; i < n1; i+=nwords) {
const uint64_t * bs1_ = bs1+i;
for (j = 0; j < n2; j+=nwords)
dis[j] = hamming (bs1_, bs2+j, nwords);
}
}
/* Count number of matches given a max threshold */
template <size_t nbits>
void hamming_count_thres (
const uint64_t * bs1,
const uint64_t * bs2,
size_t n1,
size_t n2,
hamdis_t ht,
size_t * nptr)
{
const size_t nwords = nbits / 64;
size_t i, j, posm = 0;
const uint64_t * bs2_ = bs2;
for (i = 0; i < n1; i++) {
bs2 = bs2_;
for (j = 0; j < n2; j++) {
/* collect the match only if this satisfies the threshold */
if (hamming <nbits> (bs1, bs2) <= ht)
posm++;
bs2 += nwords;
}
bs1 += nwords; /* next signature */
}
*nptr = posm;
}
template <size_t nbits>
void crosshamming_count_thres (
const uint64_t * dbs,
size_t n,
int ht,
size_t * nptr)
{
const size_t nwords = nbits / 64;
size_t i, j, posm = 0;
const uint64_t * bs1 = dbs;
for (i = 0; i < n; i++) {
const uint64_t * bs2 = bs1 + 2;
for (j = i + 1; j < n; j++) {
/* collect the match only if this satisfies the threshold */
if (hamming <nbits> (bs1, bs2) <= ht)
posm++;
bs2 += nwords;
}
bs1 += nwords;
}
*nptr = posm;
}
template <size_t nbits>
size_t match_hamming_thres (
const uint64_t * bs1,
const uint64_t * bs2,
size_t n1,
size_t n2,
int ht,
long * idx,
hamdis_t * hams)
{
const size_t nwords = nbits / 64;
size_t i, j, posm = 0;
hamdis_t h;
const uint64_t * bs2_ = bs2;
for (i = 0; i < n1; i++) {
bs2 = bs2_;
for (j = 0; j < n2; j++) {
/* Here perform the real work of computing the distance */
h = hamming <nbits> (bs1, bs2);
/* collect the match only if this satisfies the threshold */
if (h <= ht) {
/* Enough space to store another match ? */
*idx = i; idx++;
*idx = j; idx++;
*hams = h;
hams++;
posm++;
}
bs2+=nwords; /* next signature */
}
bs1+=nwords;
}
return posm;
}
/* Return closest neighbors w.r.t Hamming distance */
template <class HammingComputer>
static
void hammings_knn_hc (
int bytes_per_code,
int_maxheap_array_t * ha,
const uint8_t * bs1,
const uint8_t * bs2,
size_t n2,
bool order = true,
bool init_heap = true)
{
size_t k = ha->k;
if (init_heap) ha->heapify ();
/* The computation here does not involved any blockization, which
is suboptimal for many queries in parallel. */
#pragma omp parallel for
for (size_t i = 0; i < ha->nh; i++) {
HammingComputer hc (bs1 + i * bytes_per_code, bytes_per_code);
const uint8_t * bs2_ = bs2;
hamdis_t dis;
hamdis_t * __restrict bh_val_ = ha->val + i * k;
long * __restrict bh_ids_ = ha->ids + i * k;
size_t j;
for (j = 0; j < n2; j++, bs2_+= bytes_per_code) {
dis = hc.hamming (bs2_);
if (dis < bh_val_[0]) {
faiss::maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
faiss::maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, dis, j);
}
}
}
if (order) ha->reorder ();
}
// works faster than the template version
static
void hammings_knn_1 (
int_maxheap_array_t * ha,
const uint64_t * bs1,
const uint64_t * bs2,
size_t n2,
bool order = true,
bool init_heap = true)
{
const size_t nwords = 1;
size_t k = ha->k;
if (init_heap) {
ha->heapify ();
}
#pragma omp parallel for
for (size_t i = 0; i < ha->nh; i++) {
const uint64_t bs1_ = bs1 [i];
const uint64_t * bs2_ = bs2;
hamdis_t dis;
hamdis_t * bh_val_ = ha->val + i * k;
hamdis_t bh_val_0 = bh_val_[0];
long * bh_ids_ = ha->ids + i * k;
size_t j;
for (j = 0; j < n2; j++, bs2_+= nwords) {
dis = popcount64 (bs1_ ^ *bs2_);
if (dis < bh_val_0) {
faiss::maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
faiss::maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, dis, j);
bh_val_0 = bh_val_[0];
}
}
}
if (order) {
ha->reorder ();
}
}
/* Functions to maps vectors to bits. Assume proper allocation done beforehand,
meaning that b should be be able to receive as many bits as x may produce. */
/*
* dimension 0 corresponds to the least significant bit of b[0], or
* equivalently to the lsb of the first byte that is stored.
*/
void fvec2bitvec (const float * x, uint8_t * b, size_t d)
{
for (int i = 0; i < d; i += 8) {
uint8_t w = 0;
uint8_t mask = 1;
int nj = i + 8 <= d ? 8 : d - i;
for (int j = 0; j < nj; j++) {
if (x[i + j] >= 0)
w |= mask;
mask <<= 1;
}
*b = w;
b++;
}
}
/* Same but for n vectors.
Ensure that the ouptut b is byte-aligned (pad with 0s). */
void fvecs2bitvecs (const float * x, uint8_t * b, size_t d, size_t n)
{
const long ncodes = ((d + 7) / 8);
#pragma omp parallel for
for (size_t i = 0; i < n; i++)
fvec2bitvec (x + i * d, b + i * ncodes, d);
}
/* Reverse bit (NOT a optimized function, only used for print purpose) */
static uint64_t uint64_reverse_bits (uint64_t b)
{
int i;
uint64_t revb = 0;
for (i = 0; i < 64; i++) {
revb <<= 1;
revb |= b & 1;
b >>= 1;
}
return revb;
}
/* print the bit vector */
void bitvec_print (const uint8_t * b, size_t d)
{
size_t i, j;
for (i = 0; i < d; ) {
uint64_t brev = uint64_reverse_bits (* (uint64_t *) b);
for (j = 0; j < 64 && i < d; j++, i++) {
printf ("%d", (int) (brev & 1));
brev >>= 1;
}
b += 8;
printf (" ");
}
}
/*----------------------------------------*/
/* Hamming distance computation and k-nn */
#define C64(x) ((uint64_t *)x)
/* Compute a set of Hamming distances */
void hammings (
const uint8_t * a,
const uint8_t * b,
size_t na, size_t nb,
size_t ncodes,
hamdis_t * __restrict dis)
{
FAISS_ASSERT (ncodes % 8 == 0);
switch (ncodes) {
case 8:
faiss::hammings <64> (C64(a), C64(b), na, nb, dis); return;
case 16:
faiss::hammings <128> (C64(a), C64(b), na, nb, dis); return;
case 32:
faiss::hammings <256> (C64(a), C64(b), na, nb, dis); return;
case 64:
faiss::hammings <512> (C64(a), C64(b), na, nb, dis); return;
default:
faiss::hammings (C64(a), C64(b), na, nb, ncodes * 8, dis); return;
}
}
void hammings_knn_core (
int_maxheap_array_t * ha,
const uint8_t * a,
const uint8_t * b,
size_t nb,
size_t ncodes)
{
FAISS_ASSERT (ncodes % 8 == 0);
switch (ncodes) {
hammings_knn_1 (ha, C64(a), C64(b), nb, false, true);
// hammings_knn_hc<faiss::HammingComputer8>
// (8, ha, a, b, nb, false, true);
break;
case 16:
hammings_knn_hc<faiss::HammingComputer16>
(16, ha, a, b, nb, false, true);
break;
case 32:
hammings_knn_hc<faiss::HammingComputer32>
(32, ha, a, b, nb, false, true);
break;
default:
hammings_knn_hc<faiss::HammingComputerM8>
(ncodes, ha, a, b, nb, false, true);
}
}
void hammings_knn (
int_maxheap_array_t * ha,
const uint8_t * a,
const uint8_t * b,
size_t nb,
size_t ncodes,
int order)
{
switch (ncodes) {
case 4:
hammings_knn_hc<faiss::HammingComputer4>
(4, ha, a, b, nb, order, true);
break;
case 8:
hammings_knn_1 (ha, C64(a), C64(b), nb, order, true);
// hammings_knn_hc<faiss::HammingComputer8>
// (8, ha, a, b, nb, order, true);
break;
case 16:
hammings_knn_hc<faiss::HammingComputer16>
(16, ha, a, b, nb, order, true);
break;
case 32:
hammings_knn_hc<faiss::HammingComputer32>
(32, ha, a, b, nb, order, true);
break;
default:
FAISS_ASSERT (ncodes % 8 == 0);
hammings_knn_hc<faiss::HammingComputerM8>
(ncodes, ha, a, b, nb, order, true);
}
}
/* Count number of matches given a max threshold */
void hamming_count_thres (
const uint8_t * bs1,
const uint8_t * bs2,
size_t n1,
size_t n2,
hamdis_t ht,
size_t ncodes,
size_t * nptr)
{
switch (ncodes) {
case 8:
faiss::hamming_count_thres <64> (C64(bs1), C64(bs2),
n1, n2, ht, nptr);
return;
case 16:
faiss::hamming_count_thres <128> (C64(bs1), C64(bs2),
n1, n2, ht, nptr);
return;
case 32:
faiss::hamming_count_thres <256> (C64(bs1), C64(bs2),
n1, n2, ht, nptr);
return;
case 64:
faiss::hamming_count_thres <512> (C64(bs1), C64(bs2),
n1, n2, ht, nptr);
return;
default:
FAISS_ASSERT (!"not-implemented for this number of bits");
}
}
/* Count number of cross-matches given a threshold */
void crosshamming_count_thres (
const uint8_t * dbs,
size_t n,
hamdis_t ht,
size_t ncodes,
size_t * nptr)
{
switch (ncodes) {
case 8:
faiss::crosshamming_count_thres <64> (C64(dbs), n, ht, nptr);
return;
case 16:
faiss::crosshamming_count_thres <128> (C64(dbs), n, ht, nptr);
return;
case 32:
faiss::crosshamming_count_thres <256> (C64(dbs), n, ht, nptr);
return;
case 64:
faiss::crosshamming_count_thres <512> (C64(dbs), n, ht, nptr);
return;
default:
FAISS_ASSERT (!"not-implemented for this number of bits");
}
}
/* Returns all matches given a threshold */
size_t match_hamming_thres (
const uint8_t * bs1,
const uint8_t * bs2,
size_t n1,
size_t n2,
hamdis_t ht,
size_t ncodes,
long * idx,
hamdis_t * dis)
{
switch (ncodes) {
case 8:
return faiss::match_hamming_thres <64> (C64(bs1), C64(bs2),
n1, n2, ht, idx, dis);
case 16:
return faiss::match_hamming_thres <128> (C64(bs1), C64(bs2),
n1, n2, ht, idx, dis);
case 32:
return faiss::match_hamming_thres <256> (C64(bs1), C64(bs2),
n1, n2, ht, idx, dis);
case 64:
return faiss::match_hamming_thres <512> (C64(bs1), C64(bs2),
n1, n2, ht, idx, dis);
default:
FAISS_ASSERT (!"not-implemented for this number of bits");
}
}
#undef C64
/*************************************
* generalized Hamming distances
************************************/
template <class HammingComputer>
static void hamming_dis_inner_loop (
const uint8_t *ca,
const uint8_t *cb,
size_t nb,
size_t code_size,
int k,
hamdis_t * bh_val_,
long * bh_ids_)
{
HammingComputer hc (ca, code_size);
for (size_t j = 0; j < nb; j++) {
int ndiff = hc.hamming (cb);
cb += code_size;
if (ndiff < bh_val_[0]) {
maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, ndiff, j);
}
}
}
void generalized_hammings_knn (
int_maxheap_array_t * ha,
const uint8_t * a,
const uint8_t * b,
size_t nb,
size_t code_size,
int ordered)
{
int na = ha->nh;
int k = ha->k;
if (ordered)
ha->heapify ();
#pragma omp parallel for
for (int i = 0; i < na; i++) {
const uint8_t *ca = a + i * code_size;
const uint8_t *cb = b;
hamdis_t * bh_val_ = ha->val + i * k;
long * bh_ids_ = ha->ids + i * k;
switch (code_size) {
case 8:
hamming_dis_inner_loop<GenHammingComputer8>
(ca, cb, nb, 8, k, bh_val_, bh_ids_);
break;
case 16:
hamming_dis_inner_loop<GenHammingComputer16>
(ca, cb, nb, 16, k, bh_val_, bh_ids_);
break;
case 32:
hamming_dis_inner_loop<GenHammingComputer32>
(ca, cb, nb, 32, k, bh_val_, bh_ids_);
break;
default:
hamming_dis_inner_loop<GenHammingComputerM8>
(ca, cb, nb, code_size, k, bh_val_, bh_ids_);
break;
}
}
if (ordered)
ha->reorder ();
}
} // namespace faiss