mirror of
https://github.com/facebookresearch/faiss.git
synced 2025-06-03 21:54:02 +08:00
Summary: Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3154 Using the benchmark to find Pareto optimal indices, in this case on BigANN as an example. Separately optimize the coarse quantizer and the vector codec and use Pareto optimal configurations to construct IVF indices, which are then retested at various scales. See `optimize()` in `optimize.py` as the main function driving the process. The results can be interpreted with `bench_fw_notebook.ipynb`, which allows: * filtering by maximum code size * maximum time * minimum accuracy * space or time Pareto optimal options * and visualize the results and output them as a table. This version is intentionally limited to IVF(Flat|HNSW),PQ|SQ indices... Reviewed By: mdouze Differential Revision: D51781670 fbshipit-source-id: 2c0f800d374ea845255934f519cc28095c00a51f
59 lines
1.6 KiB
Python
59 lines
1.6 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
#
|
|
# This source code is licensed under the MIT license found in the
|
|
# LICENSE file in the root directory of this source tree.
|
|
|
|
import argparse
|
|
import logging
|
|
import os
|
|
|
|
from bench_fw.benchmark_io import BenchmarkIO
|
|
from bench_fw.descriptors import DatasetDescriptor
|
|
from bench_fw.optimize import Optimizer
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
|
|
def bigann(bio):
|
|
optimizer = Optimizer(
|
|
distance_metric="L2",
|
|
num_threads=32,
|
|
run_local=False,
|
|
)
|
|
optimizer.set_io(bio)
|
|
query_vectors = DatasetDescriptor(namespace="std_q", tablename="bigann1M")
|
|
xt = bio.get_dataset(query_vectors)
|
|
optimizer.optimize(
|
|
d=xt.shape[1],
|
|
training_vectors=DatasetDescriptor(
|
|
namespace="std_t",
|
|
tablename="bigann1M",
|
|
num_vectors=2_000_000,
|
|
),
|
|
database_vectors_list=[
|
|
DatasetDescriptor(
|
|
namespace="std_d",
|
|
tablename="bigann1M",
|
|
),
|
|
DatasetDescriptor(namespace="std_d", tablename="bigann10M"),
|
|
],
|
|
query_vectors=query_vectors,
|
|
min_accuracy=0.85,
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("experiment")
|
|
parser.add_argument("path")
|
|
args = parser.parse_args()
|
|
assert os.path.exists(args.path)
|
|
path = os.path.join(args.path, args.experiment)
|
|
if not os.path.exists(path):
|
|
os.mkdir(path)
|
|
bio = BenchmarkIO(
|
|
path=path,
|
|
)
|
|
if args.experiment == "bigann":
|
|
bigann(bio)
|