/** * Copyright (c) Facebook, Inc. and its affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ namespace faiss { inline BitstringWriter::BitstringWriter(uint8_t *code, int code_size): code (code), code_size (code_size), i(0) { bzero (code, code_size); } inline void BitstringWriter::write(uint64_t x, int nbit) { assert (code_size * 8 >= nbit + i); // nb of available bits in i / 8 int na = 8 - (i & 7); if (nbit <= na) { code[i >> 3] |= x << (i & 7); i += nbit; return; } else { int j = i >> 3; code[j++] |= x << (i & 7); i += nbit; x >>= na; while (x != 0) { code[j++] |= x; x >>= 8; } } } inline BitstringReader::BitstringReader(const uint8_t *code, int code_size): code (code), code_size (code_size), i(0) {} inline uint64_t BitstringReader::read(int nbit) { assert (code_size * 8 >= nbit + i); // nb of available bits in i / 8 int na = 8 - (i & 7); // get available bits in current byte uint64_t res = code[i >> 3] >> (i & 7); if (nbit <= na) { res &= (1 << nbit) - 1; i += nbit; return res; } else { int ofs = na; int j = (i >> 3) + 1; i += nbit; nbit -= na; while (nbit > 8) { res |= ((uint64_t)code[j++]) << ofs; ofs += 8; nbit -= 8; // TODO remove nbit } uint64_t last_byte = code[j]; last_byte &= (1 << nbit) - 1; res |= last_byte << ofs; return res; } } /****************************************************************** * The HammingComputer series of classes compares a single code of * size 4 to 32 to incoming codes. They are intended for use as a * template class where it would be inefficient to switch on the code * size in the inner loop. Hopefully the compiler will inline the * hamming() functions and put the a0, a1, ... in registers. ******************************************************************/ struct HammingComputer4 { uint32_t a0; HammingComputer4 () {} HammingComputer4 (const uint8_t *a, int code_size) { set (a, code_size); } void set (const uint8_t *a, int code_size) { assert (code_size == 4); a0 = *(uint32_t *)a; } inline int hamming (const uint8_t *b) const { return popcount64 (*(uint32_t *)b ^ a0); } }; struct HammingComputer8 { uint64_t a0; HammingComputer8 () {} HammingComputer8 (const uint8_t *a, int code_size) { set (a, code_size); } void set (const uint8_t *a, int code_size) { assert (code_size == 8); a0 = *(uint64_t *)a; } inline int hamming (const uint8_t *b) const { return popcount64 (*(uint64_t *)b ^ a0); } }; struct HammingComputer16 { uint64_t a0, a1; HammingComputer16 () {} HammingComputer16 (const uint8_t *a8, int code_size) { set (a8, code_size); } void set (const uint8_t *a8, int code_size) { assert (code_size == 16); const uint64_t *a = (uint64_t *)a8; a0 = a[0]; a1 = a[1]; } inline int hamming (const uint8_t *b8) const { const uint64_t *b = (uint64_t *)b8; return popcount64 (b[0] ^ a0) + popcount64 (b[1] ^ a1); } }; // when applied to an array, 1/2 of the 64-bit accesses are unaligned. // This incurs a penalty of ~10% wrt. fully aligned accesses. struct HammingComputer20 { uint64_t a0, a1; uint32_t a2; HammingComputer20 () {} HammingComputer20 (const uint8_t *a8, int code_size) { set (a8, code_size); } void set (const uint8_t *a8, int code_size) { assert (code_size == 20); const uint64_t *a = (uint64_t *)a8; a0 = a[0]; a1 = a[1]; a2 = a[2]; } inline int hamming (const uint8_t *b8) const { const uint64_t *b = (uint64_t *)b8; return popcount64 (b[0] ^ a0) + popcount64 (b[1] ^ a1) + popcount64 (*(uint32_t*)(b + 2) ^ a2); } }; struct HammingComputer32 { uint64_t a0, a1, a2, a3; HammingComputer32 () {} HammingComputer32 (const uint8_t *a8, int code_size) { set (a8, code_size); } void set (const uint8_t *a8, int code_size) { assert (code_size == 32); const uint64_t *a = (uint64_t *)a8; a0 = a[0]; a1 = a[1]; a2 = a[2]; a3 = a[3]; } inline int hamming (const uint8_t *b8) const { const uint64_t *b = (uint64_t *)b8; return popcount64 (b[0] ^ a0) + popcount64 (b[1] ^ a1) + popcount64 (b[2] ^ a2) + popcount64 (b[3] ^ a3); } }; struct HammingComputer64 { uint64_t a0, a1, a2, a3, a4, a5, a6, a7; HammingComputer64 () {} HammingComputer64 (const uint8_t *a8, int code_size) { set (a8, code_size); } void set (const uint8_t *a8, int code_size) { assert (code_size == 64); const uint64_t *a = (uint64_t *)a8; a0 = a[0]; a1 = a[1]; a2 = a[2]; a3 = a[3]; a4 = a[4]; a5 = a[5]; a6 = a[6]; a7 = a[7]; } inline int hamming (const uint8_t *b8) const { const uint64_t *b = (uint64_t *)b8; return popcount64 (b[0] ^ a0) + popcount64 (b[1] ^ a1) + popcount64 (b[2] ^ a2) + popcount64 (b[3] ^ a3) + popcount64 (b[4] ^ a4) + popcount64 (b[5] ^ a5) + popcount64 (b[6] ^ a6) + popcount64 (b[7] ^ a7); } }; // very inefficient... struct HammingComputerDefault { const uint8_t *a; int n; HammingComputerDefault () {} HammingComputerDefault (const uint8_t *a8, int code_size) { set (a8, code_size); } void set (const uint8_t *a8, int code_size) { a = a8; n = code_size; } int hamming (const uint8_t *b8) const { int accu = 0; for (int i = 0; i < n; i++) accu += popcount64 (a[i] ^ b8[i]); return accu; } }; struct HammingComputerM8 { const uint64_t *a; int n; HammingComputerM8 () {} HammingComputerM8 (const uint8_t *a8, int code_size) { set (a8, code_size); } void set (const uint8_t *a8, int code_size) { assert (code_size % 8 == 0); a = (uint64_t *)a8; n = code_size / 8; } int hamming (const uint8_t *b8) const { const uint64_t *b = (uint64_t *)b8; int accu = 0; for (int i = 0; i < n; i++) accu += popcount64 (a[i] ^ b[i]); return accu; } }; // even more inefficient! struct HammingComputerM4 { const uint32_t *a; int n; HammingComputerM4 () {} HammingComputerM4 (const uint8_t *a4, int code_size) { set (a4, code_size); } void set (const uint8_t *a4, int code_size) { assert (code_size % 4 == 0); a = (uint32_t *)a4; n = code_size / 4; } int hamming (const uint8_t *b8) const { const uint32_t *b = (uint32_t *)b8; int accu = 0; for (int i = 0; i < n; i++) accu += popcount64 (a[i] ^ b[i]); return accu; } }; /*************************************************************************** * Equivalence with a template class when code size is known at compile time **************************************************************************/ // default template template struct HammingComputer: HammingComputerM8 { HammingComputer (const uint8_t *a, int code_size): HammingComputerM8(a, code_size) {} }; #define SPECIALIZED_HC(CODE_SIZE) \ template<> struct HammingComputer: \ HammingComputer ## CODE_SIZE { \ HammingComputer (const uint8_t *a): \ HammingComputer ## CODE_SIZE(a, CODE_SIZE) {} \ } SPECIALIZED_HC(4); SPECIALIZED_HC(8); SPECIALIZED_HC(16); SPECIALIZED_HC(20); SPECIALIZED_HC(32); SPECIALIZED_HC(64); #undef SPECIALIZED_HC /*************************************************************************** * generalized Hamming = number of bytes that are different between * two codes. ***************************************************************************/ inline int generalized_hamming_64 (uint64_t a) { a |= a >> 1; a |= a >> 2; a |= a >> 4; a &= 0x0101010101010101UL; return popcount64 (a); } struct GenHammingComputer8 { uint64_t a0; GenHammingComputer8 (const uint8_t *a, int code_size) { assert (code_size == 8); a0 = *(uint64_t *)a; } inline int hamming (const uint8_t *b) const { return generalized_hamming_64 (*(uint64_t *)b ^ a0); } }; struct GenHammingComputer16 { uint64_t a0, a1; GenHammingComputer16 (const uint8_t *a8, int code_size) { assert (code_size == 16); const uint64_t *a = (uint64_t *)a8; a0 = a[0]; a1 = a[1]; } inline int hamming (const uint8_t *b8) const { const uint64_t *b = (uint64_t *)b8; return generalized_hamming_64 (b[0] ^ a0) + generalized_hamming_64 (b[1] ^ a1); } }; struct GenHammingComputer32 { uint64_t a0, a1, a2, a3; GenHammingComputer32 (const uint8_t *a8, int code_size) { assert (code_size == 32); const uint64_t *a = (uint64_t *)a8; a0 = a[0]; a1 = a[1]; a2 = a[2]; a3 = a[3]; } inline int hamming (const uint8_t *b8) const { const uint64_t *b = (uint64_t *)b8; return generalized_hamming_64 (b[0] ^ a0) + generalized_hamming_64 (b[1] ^ a1) + generalized_hamming_64 (b[2] ^ a2) + generalized_hamming_64 (b[3] ^ a3); } }; struct GenHammingComputerM8 { const uint64_t *a; int n; GenHammingComputerM8 (const uint8_t *a8, int code_size) { assert (code_size % 8 == 0); a = (uint64_t *)a8; n = code_size / 8; } int hamming (const uint8_t *b8) const { const uint64_t *b = (uint64_t *)b8; int accu = 0; for (int i = 0; i < n; i++) accu += generalized_hamming_64 (a[i] ^ b[i]); return accu; } }; /** generalized Hamming distances (= count number of code bytes that are the same) */ void generalized_hammings_knn_hc ( int_maxheap_array_t * ha, const uint8_t * a, const uint8_t * b, size_t nb, size_t code_size, int ordered = true); /** This class maintains a list of best distances seen so far. * * Since the distances are in a limited range (0 to nbit), the * object maintains one list per possible distance, and fills * in only the n-first lists, such that the sum of sizes of the * n lists is below k. */ template struct HCounterState { int *counters; int64_t *ids_per_dis; HammingComputer hc; int thres; int count_lt; int count_eq; int k; HCounterState(int *counters, int64_t *ids_per_dis, const uint8_t *x, int d, int k) : counters(counters), ids_per_dis(ids_per_dis), hc(x, d / 8), thres(d + 1), count_lt(0), count_eq(0), k(k) {} void update_counter(const uint8_t *y, size_t j) { int32_t dis = hc.hamming(y); if (dis <= thres) { if (dis < thres) { ids_per_dis[dis * k + counters[dis]++] = j; ++count_lt; while (count_lt == k && thres > 0) { --thres; count_eq = counters[thres]; count_lt -= count_eq; } } else if (count_eq < k) { ids_per_dis[dis * k + count_eq++] = j; counters[dis] = count_eq; } } } }; } // namespace faiss