/** * Copyright (c) Facebook, Inc. and its affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include size_t nMismatches( const std::vector& ref, const std::vector& candidate) { size_t count = 0; for (size_t i = 0; i < count; i++) { double abs = std::abs(ref[i] - candidate[i]); if (abs >= 1e-5) { count += 1; } } return count; } void test( // dimensionality of the data const size_t dim, // number of subquantizers const size_t subq, // bits per subquantizer const size_t nbits, // number of codes to process const size_t n) { FAISS_THROW_IF_NOT(nbits == 8); // remove if benchmarking is needed omp_set_num_threads(1); // rng std::minstd_rand rng(123); std::uniform_int_distribution u(0, 255); std::uniform_real_distribution uf(0, 1); // initialize lookup std::vector lookup(256 * subq, 0); for (size_t i = 0; i < lookup.size(); i++) { lookup[i] = uf(rng); } // initialize codes std::vector codes(n * subq); #pragma omp parallel { std::minstd_rand rng0(123); std::uniform_int_distribution u1(0, 255); #pragma omp for schedule(guided) for (size_t i = 0; i < codes.size(); i++) { codes[i] = u1(rng0); } } // warmup. compute reference results std::vector resultsRef(n, 0); for (size_t k = 0; k < 10; k++) { #pragma omp parallel for schedule(guided) for (size_t i = 0; i < n; i++) { resultsRef[i] = faiss::distance_single_code_generic( subq, 8, lookup.data(), codes.data() + subq * i); } } // generic, 1 code per step std::vector resultsNewGeneric1x(n, 0); double generic1xMsec = 0; { const auto startingTimepoint = std::chrono::steady_clock::now(); for (size_t k = 0; k < 1000; k++) { #pragma omp parallel for schedule(guided) for (size_t i = 0; i < n; i++) { resultsNewGeneric1x[i] = faiss::distance_single_code_generic( subq, 8, lookup.data(), codes.data() + subq * i); } } const auto endingTimepoint = std::chrono::steady_clock::now(); std::chrono::duration duration = endingTimepoint - startingTimepoint; generic1xMsec = (duration.count() * 1000.0); } // generic, 4 codes per step std::vector resultsNewGeneric4x(n, 0); double generic4xMsec = 0; { const auto startingTimepoint = std::chrono::steady_clock::now(); for (size_t k = 0; k < 1000; k++) { #pragma omp parallel for schedule(guided) for (size_t i = 0; i < n; i += 4) { faiss::distance_four_codes_generic( subq, 8, lookup.data(), codes.data() + subq * (i + 0), codes.data() + subq * (i + 1), codes.data() + subq * (i + 2), codes.data() + subq * (i + 3), resultsNewGeneric4x[i + 0], resultsNewGeneric4x[i + 1], resultsNewGeneric4x[i + 2], resultsNewGeneric4x[i + 3]); } } const auto endingTimepoint = std::chrono::steady_clock::now(); std::chrono::duration duration = endingTimepoint - startingTimepoint; generic4xMsec = (duration.count() * 1000.0); } // generic, 1 code per step std::vector resultsNewCustom1x(n, 0); double custom1xMsec = 0; { const auto startingTimepoint = std::chrono::steady_clock::now(); for (size_t k = 0; k < 1000; k++) { #pragma omp parallel for schedule(guided) for (size_t i = 0; i < n; i++) { resultsNewCustom1x[i] = faiss::distance_single_code( subq, 8, lookup.data(), codes.data() + subq * i); } } const auto endingTimepoint = std::chrono::steady_clock::now(); std::chrono::duration duration = endingTimepoint - startingTimepoint; custom1xMsec = (duration.count() * 1000.0); } // generic, 4 codes per step std::vector resultsNewCustom4x(n, 0); double custom4xMsec = 0; { const auto startingTimepoint = std::chrono::steady_clock::now(); for (size_t k = 0; k < 1000; k++) { #pragma omp parallel for schedule(guided) for (size_t i = 0; i < n; i += 4) { faiss::distance_four_codes( subq, 8, lookup.data(), codes.data() + subq * (i + 0), codes.data() + subq * (i + 1), codes.data() + subq * (i + 2), codes.data() + subq * (i + 3), resultsNewCustom4x[i + 0], resultsNewCustom4x[i + 1], resultsNewCustom4x[i + 2], resultsNewCustom4x[i + 3]); } } const auto endingTimepoint = std::chrono::steady_clock::now(); std::chrono::duration duration = endingTimepoint - startingTimepoint; custom4xMsec = (duration.count() * 1000.0); } const size_t nMismatchesG1 = nMismatches(resultsRef, resultsNewGeneric1x); const size_t nMismatchesG4 = nMismatches(resultsRef, resultsNewGeneric4x); const size_t nMismatchesCustom1 = nMismatches(resultsRef, resultsNewCustom1x); const size_t nMismatchesCustom4 = nMismatches(resultsRef, resultsNewCustom4x); std::cout << "Dim = " << dim << ", subq = " << subq << ", nbits = " << nbits << ", n = " << n << std::endl; std::cout << "Generic 1x code: " << generic1xMsec << " msec, " << nMismatchesG1 << " mismatches" << std::endl; std::cout << "Generic 4x code: " << generic4xMsec << " msec, " << nMismatchesG4 << " mismatches" << std::endl; std::cout << "custom 1x code: " << custom1xMsec << " msec, " << nMismatchesCustom1 << " mismatches" << std::endl; std::cout << "custom 4x code: " << custom4xMsec << " msec, " << nMismatchesCustom4 << " mismatches" << std::endl; std::cout << std::endl; ASSERT_EQ(nMismatchesG1, 0); ASSERT_EQ(nMismatchesG4, 0); ASSERT_EQ(nMismatchesCustom1, 0); ASSERT_EQ(nMismatchesCustom4, 0); } // this test can be used as a benchmark. // 1. Increase the value of NELEMENTS // 2. Remove omp_set_num_threads() constexpr size_t NELEMENTS = 10000; TEST(TestCodeDistance, SUBQ4_NBITS8) { test(256, 4, 8, NELEMENTS); } TEST(TestCodeDistance, SUBQ8_NBITS8) { test(256, 8, 8, NELEMENTS); } TEST(TestCodeDistance, SUBQ16_NBITS8) { test(256, 16, 8, NELEMENTS); } TEST(TestCodeDistance, SUBQ32_NBITS8) { test(256, 32, 8, NELEMENTS); }