more refactor and add encode/decode steps to benchmark (#3825)

Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3825

- previously, there is 1 benchmark that measures the reconstruction error and then measures the performance of the distance computation for the scalar quantizer. Split it up into distance benchmark and accuracy benchmark.
- add performance benchmarks for encode and decode as well
- refactor the benchmarks to accept `n` and `d` as command line arguments. We run the benchmarks with `n` = 2000 and `d` = 128 to start. Happy to expand it to d=`256` and a higher `n` if we think it's better.
- refactor the targets file so we can create servicelab experiments based on different parameters

Planning to use the benchmarks here to test my simd refactor changes (and expand the benchmarks when necessary).

Reviewed By: mnorris11

Differential Revision: D62049857

fbshipit-source-id: 7e4cbfe27af6da09616b2e7c82d77480c8ddecd6
pull/3826/head
Mengdi Lin 2024-09-03 13:56:59 -07:00 committed by Facebook GitHub Bot
parent 4683cc119f
commit 501a8be55c
7 changed files with 294 additions and 36 deletions

View File

@ -19,6 +19,7 @@ using namespace faiss;
static void bench(benchmark::State& state) {
int d = 128;
int n = 2000;
state.SetLabel(faiss::get_compile_options());
std::vector<float> x(d * n);

View File

@ -5,7 +5,9 @@
* LICENSE file in the root directory of this source tree.
*/
#include <omp.h>
#include <faiss/perf_tests/utils.h>
#include <fmt/format.h>
#include <gflags/gflags.h>
#include <cstdio>
#include <map>
@ -16,13 +18,16 @@
#include <faiss/utils/utils.h>
using namespace faiss;
DEFINE_uint32(d, 128, "dimension");
DEFINE_uint32(n, 2000, "dimension");
DEFINE_uint32(iterations, 20, "iterations");
static void bench(
static void bench_reconstruction_error(
benchmark::State& state,
ScalarQuantizer::QuantizerType type) {
int d = 128;
int n = 2000;
ScalarQuantizer::QuantizerType type,
int d,
int n) {
state.SetLabel(faiss::get_compile_options());
std::vector<float> x(d * n);
float_rand(x.data(), d * n, 12345);
@ -30,8 +35,6 @@ static void bench(
// make sure it's idempotent
ScalarQuantizer sq(d, type);
omp_set_num_threads(1);
sq.train(n, x.data());
size_t code_size = sq.code_size;
@ -61,40 +64,28 @@ static void bench(
state.counters["ndiff_for_idempotence"] = ndiff;
state.counters["code_size_two"] = codes.size();
std::unique_ptr<ScalarQuantizer::SQDistanceComputer> dc(
sq.get_distance_computer());
dc->codes = codes.data();
dc->code_size = sq.code_size;
state.counters["code_size_three"] = dc->code_size;
for (auto _ : state) {
float sum_dis = 0;
for (int i = 0; i < n; i++) {
dc->set_query(&x[i * d]);
for (int j = 0; j < n; j++) {
benchmark::DoNotOptimize(sum_dis += (*dc)(j));
}
}
}
state.SetLabel(faiss::get_compile_options());
}
int main(int argc, char** argv) {
int iterations = 20;
std::map<std::string, ScalarQuantizer::QuantizerType> benchs = {
{"QT_8bit", ScalarQuantizer::QT_8bit},
{"QT_4bit", ScalarQuantizer::QT_4bit},
{"QT_8bit_uniform", ScalarQuantizer::QT_8bit_uniform},
{"QT_4bit_uniform", ScalarQuantizer::QT_4bit_uniform},
{"QT_fp16", ScalarQuantizer::QT_fp16},
{"QT_8bit_direct", ScalarQuantizer::QT_8bit_direct},
{"QT_6bit", ScalarQuantizer::QT_6bit},
{"QT_bf16", ScalarQuantizer::QT_bf16},
{"QT_8bit_direct_signed", ScalarQuantizer::QT_8bit_direct_signed}};
benchmark::Initialize(&argc, argv);
gflags::AllowCommandLineReparsing();
gflags::ParseCommandLineFlags(&argc, &argv, true);
int iterations = FLAGS_iterations;
int d = FLAGS_d;
int n = FLAGS_n;
auto benchs = ::perf_tests::sq_types();
for (auto& [bench_name, quantizer_type] : benchs) {
benchmark::RegisterBenchmark(bench_name.c_str(), bench, quantizer_type)
benchmark::RegisterBenchmark(
fmt::format("{}_{}d_{}n", bench_name, d, n).c_str(),
bench_reconstruction_error,
quantizer_type,
d,
n)
->Iterations(iterations);
}
benchmark::RunSpecifiedBenchmarks();
benchmark::Shutdown();
}

View File

@ -0,0 +1,77 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <faiss/perf_tests/utils.h>
#include <fmt/format.h>
#include <gflags/gflags.h>
#include <omp.h>
#include <cstdio>
#include <map>
#include <benchmark/benchmark.h>
#include <faiss/impl/ScalarQuantizer.h>
#include <faiss/utils/random.h>
#include <faiss/utils/utils.h>
using namespace faiss;
DEFINE_uint32(d, 128, "dimension");
DEFINE_uint32(n, 2000, "dimension");
DEFINE_uint32(iterations, 20, "iterations");
static void bench_decode(
benchmark::State& state,
ScalarQuantizer::QuantizerType type,
int d,
int n) {
state.SetLabel(faiss::get_compile_options());
std::vector<float> x(d * n);
float_rand(x.data(), d * n, 12345);
// make sure it's idempotent
ScalarQuantizer sq(d, type);
omp_set_num_threads(1);
sq.train(n, x.data());
size_t code_size = sq.code_size;
state.counters["code_size"] = sq.code_size;
// encode
std::vector<uint8_t> codes(code_size * n);
sq.compute_codes(x.data(), codes.data(), n);
std::vector<float> x2(d * n);
for (auto _ : state) {
// decode
sq.decode(codes.data(), x2.data(), n);
}
}
int main(int argc, char** argv) {
benchmark::Initialize(&argc, argv);
gflags::AllowCommandLineReparsing();
gflags::ParseCommandLineFlags(&argc, &argv, true);
int iterations = FLAGS_iterations;
int d = FLAGS_d;
int n = FLAGS_n;
auto benchs = ::perf_tests::sq_types();
for (auto& [bench_name, quantizer_type] : benchs) {
benchmark::RegisterBenchmark(
fmt::format("{}_{}d_{}n", bench_name, d, n).c_str(),
bench_decode,
quantizer_type,
d,
n)
->Iterations(iterations);
}
benchmark::RunSpecifiedBenchmarks();
benchmark::Shutdown();
}

View File

@ -0,0 +1,87 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <fmt/format.h>
#include <gflags/gflags.h>
#include <omp.h>
#include <cstdio>
#include <map>
#include <benchmark/benchmark.h>
#include <faiss/impl/ScalarQuantizer.h>
#include <faiss/perf_tests/utils.h>
#include <faiss/utils/random.h>
#include <faiss/utils/utils.h>
using namespace faiss;
DEFINE_uint32(d, 128, "dimension");
DEFINE_uint32(n, 2000, "dimension");
DEFINE_uint32(iterations, 20, "iterations");
static void bench_distance(
benchmark::State& state,
ScalarQuantizer::QuantizerType type,
int n,
int d) {
state.SetLabel(faiss::get_compile_options());
std::vector<float> x(d * n);
float_rand(x.data(), d * n, 12345);
// make sure it's idempotent
ScalarQuantizer sq(d, type);
omp_set_num_threads(1);
sq.train(n, x.data());
size_t code_size = sq.code_size;
state.counters["code_size"] = sq.code_size;
// encode
std::vector<uint8_t> codes(code_size * n);
sq.compute_codes(x.data(), codes.data(), n);
state.SetLabel(faiss::get_compile_options());
std::unique_ptr<ScalarQuantizer::SQDistanceComputer> dc(
sq.get_distance_computer());
dc->codes = codes.data();
dc->code_size = sq.code_size;
for (auto _ : state) {
float sum_dis = 0;
for (int i = 0; i < n; i++) {
dc->set_query(&x[i * d]);
for (int j = 0; j < n; j++) {
benchmark::DoNotOptimize(sum_dis += (*dc)(j));
}
}
}
}
int main(int argc, char** argv) {
benchmark::Initialize(&argc, argv);
gflags::AllowCommandLineReparsing();
gflags::ParseCommandLineFlags(&argc, &argv, true);
int iterations = FLAGS_iterations;
int d = FLAGS_d;
int n = FLAGS_n;
auto benchs = ::perf_tests::sq_types();
for (auto& [bench_name, quantizer_type] : benchs) {
benchmark::RegisterBenchmark(
fmt::format("{}_{}d_{}n", bench_name, d, n).c_str(),
bench_distance,
quantizer_type,
d,
n)
->Iterations(iterations);
}
benchmark::RunSpecifiedBenchmarks();
benchmark::Shutdown();
}

View File

@ -0,0 +1,71 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <fmt/format.h>
#include <gflags/gflags.h>
#include <omp.h>
#include <cstdio>
#include <map>
#include <benchmark/benchmark.h>
#include <faiss/impl/ScalarQuantizer.h>
#include <faiss/perf_tests/utils.h>
#include <faiss/utils/distances.h>
#include <faiss/utils/random.h>
#include <faiss/utils/utils.h>
using namespace faiss;
DEFINE_uint32(d, 128, "dimension");
DEFINE_uint32(n, 2000, "dimension");
DEFINE_uint32(iterations, 20, "iterations");
static void bench_encode(
benchmark::State& state,
ScalarQuantizer::QuantizerType type,
int d,
int n) {
state.SetLabel(faiss::get_compile_options());
std::vector<float> x(d * n);
float_rand(x.data(), d * n, 12345);
ScalarQuantizer sq(d, type);
omp_set_num_threads(1);
size_t code_size = sq.code_size;
sq.train(n, x.data());
state.counters["code_size"] = sq.code_size;
std::vector<uint8_t> codes(code_size * n);
for (auto _ : state) {
// encode
sq.compute_codes(x.data(), codes.data(), n);
}
}
int main(int argc, char** argv) {
benchmark::Initialize(&argc, argv);
gflags::AllowCommandLineReparsing();
gflags::ParseCommandLineFlags(&argc, &argv, true);
int iterations = FLAGS_iterations;
int d = FLAGS_d;
int n = FLAGS_n;
auto benchs = ::perf_tests::sq_types();
for (auto& [bench_name, quantizer_type] : benchs) {
benchmark::RegisterBenchmark(
fmt::format("{}_{}d_{}n", bench_name, d, n).c_str(),
bench_encode,
quantizer_type,
d,
n)
->Iterations(iterations);
}
benchmark::RunSpecifiedBenchmarks();
benchmark::Shutdown();
}

View File

@ -0,0 +1,20 @@
#include <faiss/perf_tests/utils.h>
namespace faiss::perf_tests {
std::map<std::string, faiss::ScalarQuantizer::QuantizerType> sq_types() {
static std::map<std::string, faiss::ScalarQuantizer::QuantizerType>
sq_types = {
{"QT_8bit", faiss::ScalarQuantizer::QT_8bit},
{"QT_4bit", faiss::ScalarQuantizer::QT_4bit},
{"QT_8bit_uniform",
faiss::ScalarQuantizer::QT_8bit_uniform},
{"QT_4bit_uniform",
faiss::ScalarQuantizer::QT_4bit_uniform},
{"QT_fp16", faiss::ScalarQuantizer::QT_fp16},
{"QT_8bit_direct", faiss::ScalarQuantizer::QT_8bit_direct},
{"QT_6bit", faiss::ScalarQuantizer::QT_6bit},
{"QT_bf16", faiss::ScalarQuantizer::QT_bf16},
{"QT_8bit_direct_signed",
faiss::ScalarQuantizer::QT_8bit_direct_signed}};
return sq_types;
}
} // namespace faiss::perf_tests

View File

@ -0,0 +1,11 @@
// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
#pragma once
#include <faiss/impl/ScalarQuantizer.h>
#include <map>
namespace faiss::perf_tests {
std::map<std::string, faiss::ScalarQuantizer::QuantizerType> sq_types();
} // namespace faiss::perf_tests