/** * Copyright (c) Facebook, Inc. and its affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include #include #include #include #include #include #include #include using namespace faiss; // These implementations are currently slower than HammingComputerDefault so // they are not in the main faiss anymore. struct HammingComputerM8 { const uint64_t* a; int n; HammingComputerM8() = default; HammingComputerM8(const uint8_t* a8, int code_size) { set(a8, code_size); } void set(const uint8_t* a8, int code_size) { assert(code_size % 8 == 0); a = (uint64_t*)a8; n = code_size / 8; } int hamming(const uint8_t* b8) const { const uint64_t* b = (uint64_t*)b8; int accu = 0; for (int i = 0; i < n; i++) accu += popcount64(a[i] ^ b[i]); return accu; } inline int get_code_size() const { return n * 8; } }; struct HammingComputerM4 { const uint32_t* a; int n; HammingComputerM4() = default; HammingComputerM4(const uint8_t* a4, int code_size) { set(a4, code_size); } void set(const uint8_t* a4, int code_size) { assert(code_size % 4 == 0); a = (uint32_t*)a4; n = code_size / 4; } int hamming(const uint8_t* b8) const { const uint32_t* b = (uint32_t*)b8; int accu = 0; for (int i = 0; i < n; i++) accu += popcount64(a[i] ^ b[i]); return accu; } inline int get_code_size() const { return n * 4; } }; template void hamming_cpt_test( int code_size, uint8_t* data1, uint8_t* data2, int n, int* rst) { T computer(data1, code_size); for (int i = 0; i < n; i++) { rst[i] = computer.hamming(data2); data2 += code_size; } } template void hamming_func_test( const uint8_t* const x1, const uint8_t* const x2, const size_t n1, const size_t n2, uint64_t& sumv, uint64_t& xorv) { constexpr size_t CODE_SIZE_IN_BYTES = CODE_SIZE_IN_BITS / 8; double t0 = faiss::getmillisecs(); uint64_t sumx = 0; uint64_t xorx = 0; const size_t nruns = 10; for (size_t irun = 0; irun < 10; irun++) { #pragma omp parallel reduction(+ : sumx, xorx) { #pragma omp for for (size_t i = 0; i < n1; i++) { uint64_t local_sum = 0; uint64_t local_xor = 0; const uint64_t* data1_ptr = (const uint64_t*)(x1 + i * CODE_SIZE_IN_BYTES); for (size_t j = 0; j < n2; j++) { const uint64_t* data2_ptr = (const uint64_t*)(x2 + j * CODE_SIZE_IN_BYTES); uint64_t code = faiss::hamming( data1_ptr, data2_ptr); local_sum += code; local_xor ^= code; } sumx += local_sum; xorx ^= local_xor; } } } sumv = sumx; xorv = xorx; double t1 = faiss::getmillisecs(); printf("hamming<%d>: %.3f msec, %" PRIX64 ", %" PRIX64 "\n", CODE_SIZE_IN_BITS, (t1 - t0) / nruns, sumx, xorx); } template void hamming_computer_test( const uint8_t* const x1, const uint8_t* const x2, const size_t n1, const size_t n2, uint64_t& sumv, uint64_t& xorv) { constexpr size_t CODE_SIZE_IN_BYTES = CODE_SIZE_IN_BITS / 8; double t0 = faiss::getmillisecs(); uint64_t sumx = 0; uint64_t xorx = 0; const size_t nruns = 10; for (size_t irun = 0; irun < nruns; irun++) { sumx = 0; xorx = 0; #pragma omp parallel reduction(+ : sumx, xorx) { #pragma omp for for (size_t i = 0; i < n1; i++) { uint64_t local_sum = 0; uint64_t local_xor = 0; const uint8_t* data1_ptr = x1 + i * CODE_SIZE_IN_BYTES; HammingComputerT hc(data1_ptr, CODE_SIZE_IN_BYTES); for (size_t j = 0; j < n2; j++) { const uint8_t* data2_ptr = x2 + j * CODE_SIZE_IN_BYTES; uint64_t code = hc.hamming(data2_ptr); local_sum += code; local_xor ^= code; } sumx += local_sum; xorx ^= local_xor; } } } sumv = sumx; xorv = xorx; double t1 = faiss::getmillisecs(); printf("HammingComputer<%zd>: %.3f msec, %" PRIX64 ", %" PRIX64 "\n", CODE_SIZE_IN_BYTES, (t1 - t0) / nruns, sumx, xorx); } int main() { size_t n = 4 * 1000 * 1000; std::vector code_size = {128, 256, 512, 1000}; std::vector x(n * code_size.back()); byte_rand(x.data(), n, 12345); int nrun = 100; for (size_t cs : code_size) { printf("benchmark with code_size=%zd n=%zd nrun=%d\n", cs, n, nrun); double tot_t1 = 0, tot_t2 = 0, tot_t3 = 0; #pragma omp parallel reduction(+ : tot_t1, tot_t2, tot_t3) { std::vector rst_m4(n); std::vector rst_m8(n); std::vector rst_default(n); #pragma omp for for (int run = 0; run < nrun; run++) { double t0, t1, t2, t3; t0 = getmillisecs(); // new implem from Zilliz hamming_cpt_test( cs, x.data(), x.data(), n, rst_default.data()); t1 = getmillisecs(); // M8 hamming_cpt_test( cs, x.data(), x.data(), n, rst_m8.data()); t2 = getmillisecs(); // M4 hamming_cpt_test( cs, x.data(), x.data(), n, rst_m4.data()); t3 = getmillisecs(); tot_t1 += t1 - t0; tot_t2 += t2 - t1; tot_t3 += t3 - t2; } for (int i = 0; i < n; i++) { FAISS_THROW_IF_NOT_FMT( (rst_m4[i] == rst_m8[i] && rst_m4[i] == rst_default[i]), "wrong result i=%d, m4 %d m8 %d default %d", i, rst_m4[i], rst_m8[i], rst_default[i]); } } printf("Hamming_Dft implem: %.3f ms\n", tot_t1 / nrun); printf("Hamming_M8 implem: %.3f ms\n", tot_t2 / nrun); printf("Hamming_M4 implem: %.3f ms\n", tot_t3 / nrun); } // evaluate various hamming<>() function calls const size_t MAX_HAMMING_FUNC_CODE_SIZE = 512; const size_t n1 = 65536; const size_t n2 = 16384; std::vector x1(n1 * MAX_HAMMING_FUNC_CODE_SIZE / 8); std::vector x2(n2 * MAX_HAMMING_FUNC_CODE_SIZE / 8); byte_rand(x1.data(), x1.size(), 12345); byte_rand(x2.data(), x2.size(), 23456); // These two values serve as a kind of CRC. uint64_t sumx = 0; uint64_t xorx = 0; hamming_func_test<64>(x1.data(), x2.data(), n1, n2, sumx, xorx); hamming_func_test<128>(x1.data(), x2.data(), n1, n2, sumx, xorx); hamming_func_test<256>(x1.data(), x2.data(), n1, n2, sumx, xorx); hamming_func_test<384>(x1.data(), x2.data(), n1, n2, sumx, xorx); hamming_func_test<512>(x1.data(), x2.data(), n1, n2, sumx, xorx); // evaluate various HammingComputerXX hamming_computer_test( x1.data(), x2.data(), n1, n2, sumx, xorx); hamming_computer_test( x1.data(), x2.data(), n1, n2, sumx, xorx); hamming_computer_test( x1.data(), x2.data(), n1, n2, sumx, xorx); hamming_computer_test( x1.data(), x2.data(), n1, n2, sumx, xorx); hamming_computer_test( x1.data(), x2.data(), n1, n2, sumx, xorx); hamming_computer_test( x1.data(), x2.data(), n1, n2, sumx, xorx); // evaluate various GenHammingDistanceComputerXX hamming_computer_test( x1.data(), x2.data(), n1, n2, sumx, xorx); hamming_computer_test( x1.data(), x2.data(), n1, n2, sumx, xorx); hamming_computer_test( x1.data(), x2.data(), n1, n2, sumx, xorx); hamming_computer_test( x1.data(), x2.data(), n1, n2, sumx, xorx); hamming_computer_test( x1.data(), x2.data(), n1, n2, sumx, xorx); hamming_computer_test( x1.data(), x2.data(), n1, n2, sumx, xorx); hamming_computer_test( x1.data(), x2.data(), n1, n2, sumx, xorx); return 0; }