2023-12-04 21:53:17 +08:00
|
|
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
|
|
#
|
|
|
|
# This source code is licensed under the MIT license found in the
|
|
|
|
# LICENSE file in the root directory of this source tree.
|
offline index evaluation (#3097)
Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3097
A framework for evaluating indices offline.
Long term objectives:
1. Generate offline similarity index performance data with test datasets both for existing indices and automatically generated alternatives. That is, given a dataset and some constraints this workflow should automatically discover optimal index types and parameter choices as well as evaluate the performance of existing production indices and their parameters.
2. Allow researchers, platform owners (Laser, Unicorn) and product teams to understand how different index types perform on their datasets and make optimal choices wrt their objectives. Longer term to enable automatic decision-making/auto-tuning.
Constraints, design choices:
1. I want to run the same evaluation on Meta-internal (fblearner, data from hive and manifold) or the local machine + research cluster (data on local disk or NFS) via OSS Faiss. Via fblearner, I want this to work in a way that it can be turned into a service and plugged into Unicorn or Laser, while the core Faiss part can be used/referred to in our research and to update the wiki with the latest results/recommendations for public datasets.
2. It must support a range of metrics for KNN and range search, and it should be easy to add new ones. Cost metrics need to be fine-grained to allow extrapolation.
3. It should automatically sweep all query time params (eg. nprobe, polysemous code hamming distance, params of quantizers), using`OperatingPointsWithRanges` to cut down the optimal param search space. (For now, it sweeps nprobes only.)
4. [FUTURE] It will generate/sweep index creation hyperparams (factory strings, quantizer sizes, quantizer params), using heuristics.
5. [FUTURE] It will sweep the dataset size: start small test with e.g. 100K db vectors and go up to millions, billions potentially, while narrowing down the index+param choices at each step.
6. [FUTURE] Extrapolate perf metrics (cost and accuracy)
7. Intermediate results must be saved (to disk, to manifold) throughout, and reused as much as possible to cut down on overall runtime and enable faster iteration during development.
For range search, this diff supports the metric proposed in https://docs.google.com/document/d/1v5OOj7kfsKJ16xzaEHuKQj12Lrb-HlWLa_T2ct0LJiw/edit?usp=sharing I also added support for the classical case where the scoring function steps from 1 to 0 at some arbitrary threshold.
For KNN, I added knn_intersection, but other metrics, particularly recall@1 will also be interesting. I also added the distance_ratio metric, which we previously discussed as an interesting alternative, since it shows how much the returned results approximate the ground-truth nearest-neighbours in terms of distances.
In the test case, I evaluated three current production indices for VCE with 1M vectors in the database and 10K queries. Each index is tested at various operating points (nprobes), which are shows on the charts. The results are not extrapolated to the true scale of these indices.
Reviewed By: yonglimeta
Differential Revision: D49958434
fbshipit-source-id: f7f567b299118003955dc9e2d9c5b971e0940fc5
2023-10-18 04:56:02 +08:00
|
|
|
|
2024-01-06 01:27:04 +08:00
|
|
|
import logging
|
2024-06-22 04:04:09 +08:00
|
|
|
import os
|
|
|
|
from dataclasses import dataclass
|
2023-12-04 21:53:17 +08:00
|
|
|
from typing import Any, Dict, List, Optional
|
offline index evaluation (#3097)
Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3097
A framework for evaluating indices offline.
Long term objectives:
1. Generate offline similarity index performance data with test datasets both for existing indices and automatically generated alternatives. That is, given a dataset and some constraints this workflow should automatically discover optimal index types and parameter choices as well as evaluate the performance of existing production indices and their parameters.
2. Allow researchers, platform owners (Laser, Unicorn) and product teams to understand how different index types perform on their datasets and make optimal choices wrt their objectives. Longer term to enable automatic decision-making/auto-tuning.
Constraints, design choices:
1. I want to run the same evaluation on Meta-internal (fblearner, data from hive and manifold) or the local machine + research cluster (data on local disk or NFS) via OSS Faiss. Via fblearner, I want this to work in a way that it can be turned into a service and plugged into Unicorn or Laser, while the core Faiss part can be used/referred to in our research and to update the wiki with the latest results/recommendations for public datasets.
2. It must support a range of metrics for KNN and range search, and it should be easy to add new ones. Cost metrics need to be fine-grained to allow extrapolation.
3. It should automatically sweep all query time params (eg. nprobe, polysemous code hamming distance, params of quantizers), using`OperatingPointsWithRanges` to cut down the optimal param search space. (For now, it sweeps nprobes only.)
4. [FUTURE] It will generate/sweep index creation hyperparams (factory strings, quantizer sizes, quantizer params), using heuristics.
5. [FUTURE] It will sweep the dataset size: start small test with e.g. 100K db vectors and go up to millions, billions potentially, while narrowing down the index+param choices at each step.
6. [FUTURE] Extrapolate perf metrics (cost and accuracy)
7. Intermediate results must be saved (to disk, to manifold) throughout, and reused as much as possible to cut down on overall runtime and enable faster iteration during development.
For range search, this diff supports the metric proposed in https://docs.google.com/document/d/1v5OOj7kfsKJ16xzaEHuKQj12Lrb-HlWLa_T2ct0LJiw/edit?usp=sharing I also added support for the classical case where the scoring function steps from 1 to 0 at some arbitrary threshold.
For KNN, I added knn_intersection, but other metrics, particularly recall@1 will also be interesting. I also added the distance_ratio metric, which we previously discussed as an interesting alternative, since it shows how much the returned results approximate the ground-truth nearest-neighbours in terms of distances.
In the test case, I evaluated three current production indices for VCE with 1M vectors in the database and 10K queries. Each index is tested at various operating points (nprobes), which are shows on the charts. The results are not extrapolated to the true scale of these indices.
Reviewed By: yonglimeta
Differential Revision: D49958434
fbshipit-source-id: f7f567b299118003955dc9e2d9c5b971e0940fc5
2023-10-18 04:56:02 +08:00
|
|
|
|
2024-01-06 01:27:04 +08:00
|
|
|
import faiss # @manual=//faiss/python:pyfaiss_gpu
|
2024-06-22 04:04:09 +08:00
|
|
|
|
|
|
|
from .benchmark_io import BenchmarkIO
|
2024-01-06 01:27:04 +08:00
|
|
|
from .utils import timer
|
2024-01-31 02:58:13 +08:00
|
|
|
|
2024-01-06 01:27:04 +08:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
offline index evaluation (#3097)
Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3097
A framework for evaluating indices offline.
Long term objectives:
1. Generate offline similarity index performance data with test datasets both for existing indices and automatically generated alternatives. That is, given a dataset and some constraints this workflow should automatically discover optimal index types and parameter choices as well as evaluate the performance of existing production indices and their parameters.
2. Allow researchers, platform owners (Laser, Unicorn) and product teams to understand how different index types perform on their datasets and make optimal choices wrt their objectives. Longer term to enable automatic decision-making/auto-tuning.
Constraints, design choices:
1. I want to run the same evaluation on Meta-internal (fblearner, data from hive and manifold) or the local machine + research cluster (data on local disk or NFS) via OSS Faiss. Via fblearner, I want this to work in a way that it can be turned into a service and plugged into Unicorn or Laser, while the core Faiss part can be used/referred to in our research and to update the wiki with the latest results/recommendations for public datasets.
2. It must support a range of metrics for KNN and range search, and it should be easy to add new ones. Cost metrics need to be fine-grained to allow extrapolation.
3. It should automatically sweep all query time params (eg. nprobe, polysemous code hamming distance, params of quantizers), using`OperatingPointsWithRanges` to cut down the optimal param search space. (For now, it sweeps nprobes only.)
4. [FUTURE] It will generate/sweep index creation hyperparams (factory strings, quantizer sizes, quantizer params), using heuristics.
5. [FUTURE] It will sweep the dataset size: start small test with e.g. 100K db vectors and go up to millions, billions potentially, while narrowing down the index+param choices at each step.
6. [FUTURE] Extrapolate perf metrics (cost and accuracy)
7. Intermediate results must be saved (to disk, to manifold) throughout, and reused as much as possible to cut down on overall runtime and enable faster iteration during development.
For range search, this diff supports the metric proposed in https://docs.google.com/document/d/1v5OOj7kfsKJ16xzaEHuKQj12Lrb-HlWLa_T2ct0LJiw/edit?usp=sharing I also added support for the classical case where the scoring function steps from 1 to 0 at some arbitrary threshold.
For KNN, I added knn_intersection, but other metrics, particularly recall@1 will also be interesting. I also added the distance_ratio metric, which we previously discussed as an interesting alternative, since it shows how much the returned results approximate the ground-truth nearest-neighbours in terms of distances.
In the test case, I evaluated three current production indices for VCE with 1M vectors in the database and 10K queries. Each index is tested at various operating points (nprobes), which are shows on the charts. The results are not extrapolated to the true scale of these indices.
Reviewed By: yonglimeta
Differential Revision: D49958434
fbshipit-source-id: f7f567b299118003955dc9e2d9c5b971e0940fc5
2023-10-18 04:56:02 +08:00
|
|
|
|
|
|
|
@dataclass
|
2024-06-22 04:04:09 +08:00
|
|
|
class IndexDescriptorClassic:
|
offline index evaluation (#3097)
Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3097
A framework for evaluating indices offline.
Long term objectives:
1. Generate offline similarity index performance data with test datasets both for existing indices and automatically generated alternatives. That is, given a dataset and some constraints this workflow should automatically discover optimal index types and parameter choices as well as evaluate the performance of existing production indices and their parameters.
2. Allow researchers, platform owners (Laser, Unicorn) and product teams to understand how different index types perform on their datasets and make optimal choices wrt their objectives. Longer term to enable automatic decision-making/auto-tuning.
Constraints, design choices:
1. I want to run the same evaluation on Meta-internal (fblearner, data from hive and manifold) or the local machine + research cluster (data on local disk or NFS) via OSS Faiss. Via fblearner, I want this to work in a way that it can be turned into a service and plugged into Unicorn or Laser, while the core Faiss part can be used/referred to in our research and to update the wiki with the latest results/recommendations for public datasets.
2. It must support a range of metrics for KNN and range search, and it should be easy to add new ones. Cost metrics need to be fine-grained to allow extrapolation.
3. It should automatically sweep all query time params (eg. nprobe, polysemous code hamming distance, params of quantizers), using`OperatingPointsWithRanges` to cut down the optimal param search space. (For now, it sweeps nprobes only.)
4. [FUTURE] It will generate/sweep index creation hyperparams (factory strings, quantizer sizes, quantizer params), using heuristics.
5. [FUTURE] It will sweep the dataset size: start small test with e.g. 100K db vectors and go up to millions, billions potentially, while narrowing down the index+param choices at each step.
6. [FUTURE] Extrapolate perf metrics (cost and accuracy)
7. Intermediate results must be saved (to disk, to manifold) throughout, and reused as much as possible to cut down on overall runtime and enable faster iteration during development.
For range search, this diff supports the metric proposed in https://docs.google.com/document/d/1v5OOj7kfsKJ16xzaEHuKQj12Lrb-HlWLa_T2ct0LJiw/edit?usp=sharing I also added support for the classical case where the scoring function steps from 1 to 0 at some arbitrary threshold.
For KNN, I added knn_intersection, but other metrics, particularly recall@1 will also be interesting. I also added the distance_ratio metric, which we previously discussed as an interesting alternative, since it shows how much the returned results approximate the ground-truth nearest-neighbours in terms of distances.
In the test case, I evaluated three current production indices for VCE with 1M vectors in the database and 10K queries. Each index is tested at various operating points (nprobes), which are shows on the charts. The results are not extrapolated to the true scale of these indices.
Reviewed By: yonglimeta
Differential Revision: D49958434
fbshipit-source-id: f7f567b299118003955dc9e2d9c5b971e0940fc5
2023-10-18 04:56:02 +08:00
|
|
|
bucket: Optional[str] = None
|
2023-12-04 21:53:17 +08:00
|
|
|
# either path or factory should be set,
|
|
|
|
# but not both at the same time.
|
offline index evaluation (#3097)
Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3097
A framework for evaluating indices offline.
Long term objectives:
1. Generate offline similarity index performance data with test datasets both for existing indices and automatically generated alternatives. That is, given a dataset and some constraints this workflow should automatically discover optimal index types and parameter choices as well as evaluate the performance of existing production indices and their parameters.
2. Allow researchers, platform owners (Laser, Unicorn) and product teams to understand how different index types perform on their datasets and make optimal choices wrt their objectives. Longer term to enable automatic decision-making/auto-tuning.
Constraints, design choices:
1. I want to run the same evaluation on Meta-internal (fblearner, data from hive and manifold) or the local machine + research cluster (data on local disk or NFS) via OSS Faiss. Via fblearner, I want this to work in a way that it can be turned into a service and plugged into Unicorn or Laser, while the core Faiss part can be used/referred to in our research and to update the wiki with the latest results/recommendations for public datasets.
2. It must support a range of metrics for KNN and range search, and it should be easy to add new ones. Cost metrics need to be fine-grained to allow extrapolation.
3. It should automatically sweep all query time params (eg. nprobe, polysemous code hamming distance, params of quantizers), using`OperatingPointsWithRanges` to cut down the optimal param search space. (For now, it sweeps nprobes only.)
4. [FUTURE] It will generate/sweep index creation hyperparams (factory strings, quantizer sizes, quantizer params), using heuristics.
5. [FUTURE] It will sweep the dataset size: start small test with e.g. 100K db vectors and go up to millions, billions potentially, while narrowing down the index+param choices at each step.
6. [FUTURE] Extrapolate perf metrics (cost and accuracy)
7. Intermediate results must be saved (to disk, to manifold) throughout, and reused as much as possible to cut down on overall runtime and enable faster iteration during development.
For range search, this diff supports the metric proposed in https://docs.google.com/document/d/1v5OOj7kfsKJ16xzaEHuKQj12Lrb-HlWLa_T2ct0LJiw/edit?usp=sharing I also added support for the classical case where the scoring function steps from 1 to 0 at some arbitrary threshold.
For KNN, I added knn_intersection, but other metrics, particularly recall@1 will also be interesting. I also added the distance_ratio metric, which we previously discussed as an interesting alternative, since it shows how much the returned results approximate the ground-truth nearest-neighbours in terms of distances.
In the test case, I evaluated three current production indices for VCE with 1M vectors in the database and 10K queries. Each index is tested at various operating points (nprobes), which are shows on the charts. The results are not extrapolated to the true scale of these indices.
Reviewed By: yonglimeta
Differential Revision: D49958434
fbshipit-source-id: f7f567b299118003955dc9e2d9c5b971e0940fc5
2023-10-18 04:56:02 +08:00
|
|
|
path: Optional[str] = None
|
2023-12-04 21:53:17 +08:00
|
|
|
factory: Optional[str] = None
|
2024-04-25 00:42:05 +08:00
|
|
|
codec_alias: Optional[str] = None
|
2023-12-04 21:53:17 +08:00
|
|
|
construction_params: Optional[List[Dict[str, int]]] = None
|
|
|
|
search_params: Optional[Dict[str, int]] = None
|
offline index evaluation (#3097)
Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3097
A framework for evaluating indices offline.
Long term objectives:
1. Generate offline similarity index performance data with test datasets both for existing indices and automatically generated alternatives. That is, given a dataset and some constraints this workflow should automatically discover optimal index types and parameter choices as well as evaluate the performance of existing production indices and their parameters.
2. Allow researchers, platform owners (Laser, Unicorn) and product teams to understand how different index types perform on their datasets and make optimal choices wrt their objectives. Longer term to enable automatic decision-making/auto-tuning.
Constraints, design choices:
1. I want to run the same evaluation on Meta-internal (fblearner, data from hive and manifold) or the local machine + research cluster (data on local disk or NFS) via OSS Faiss. Via fblearner, I want this to work in a way that it can be turned into a service and plugged into Unicorn or Laser, while the core Faiss part can be used/referred to in our research and to update the wiki with the latest results/recommendations for public datasets.
2. It must support a range of metrics for KNN and range search, and it should be easy to add new ones. Cost metrics need to be fine-grained to allow extrapolation.
3. It should automatically sweep all query time params (eg. nprobe, polysemous code hamming distance, params of quantizers), using`OperatingPointsWithRanges` to cut down the optimal param search space. (For now, it sweeps nprobes only.)
4. [FUTURE] It will generate/sweep index creation hyperparams (factory strings, quantizer sizes, quantizer params), using heuristics.
5. [FUTURE] It will sweep the dataset size: start small test with e.g. 100K db vectors and go up to millions, billions potentially, while narrowing down the index+param choices at each step.
6. [FUTURE] Extrapolate perf metrics (cost and accuracy)
7. Intermediate results must be saved (to disk, to manifold) throughout, and reused as much as possible to cut down on overall runtime and enable faster iteration during development.
For range search, this diff supports the metric proposed in https://docs.google.com/document/d/1v5OOj7kfsKJ16xzaEHuKQj12Lrb-HlWLa_T2ct0LJiw/edit?usp=sharing I also added support for the classical case where the scoring function steps from 1 to 0 at some arbitrary threshold.
For KNN, I added knn_intersection, but other metrics, particularly recall@1 will also be interesting. I also added the distance_ratio metric, which we previously discussed as an interesting alternative, since it shows how much the returned results approximate the ground-truth nearest-neighbours in terms of distances.
In the test case, I evaluated three current production indices for VCE with 1M vectors in the database and 10K queries. Each index is tested at various operating points (nprobes), which are shows on the charts. The results are not extrapolated to the true scale of these indices.
Reviewed By: yonglimeta
Differential Revision: D49958434
fbshipit-source-id: f7f567b299118003955dc9e2d9c5b971e0940fc5
2023-10-18 04:56:02 +08:00
|
|
|
# range metric definitions
|
|
|
|
# key: name
|
|
|
|
# value: one of the following:
|
|
|
|
#
|
|
|
|
# radius
|
|
|
|
# [0..radius) -> 1
|
|
|
|
# [radius..inf) -> 0
|
|
|
|
#
|
|
|
|
# [[radius1, score1], ...]
|
|
|
|
# [0..radius1) -> score1
|
|
|
|
# [radius1..radius2) -> score2
|
|
|
|
#
|
|
|
|
# [[radius1_from, radius1_to, score1], ...]
|
|
|
|
# [radius1_from, radius1_to) -> score1,
|
|
|
|
# [radius2_from, radius2_to) -> score2
|
2023-12-04 21:53:17 +08:00
|
|
|
range_metrics: Optional[Dict[str, Any]] = None
|
|
|
|
radius: Optional[float] = None
|
2024-01-06 01:27:04 +08:00
|
|
|
training_size: Optional[int] = None
|
|
|
|
|
|
|
|
def __hash__(self):
|
|
|
|
return hash(str(self))
|
offline index evaluation (#3097)
Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3097
A framework for evaluating indices offline.
Long term objectives:
1. Generate offline similarity index performance data with test datasets both for existing indices and automatically generated alternatives. That is, given a dataset and some constraints this workflow should automatically discover optimal index types and parameter choices as well as evaluate the performance of existing production indices and their parameters.
2. Allow researchers, platform owners (Laser, Unicorn) and product teams to understand how different index types perform on their datasets and make optimal choices wrt their objectives. Longer term to enable automatic decision-making/auto-tuning.
Constraints, design choices:
1. I want to run the same evaluation on Meta-internal (fblearner, data from hive and manifold) or the local machine + research cluster (data on local disk or NFS) via OSS Faiss. Via fblearner, I want this to work in a way that it can be turned into a service and plugged into Unicorn or Laser, while the core Faiss part can be used/referred to in our research and to update the wiki with the latest results/recommendations for public datasets.
2. It must support a range of metrics for KNN and range search, and it should be easy to add new ones. Cost metrics need to be fine-grained to allow extrapolation.
3. It should automatically sweep all query time params (eg. nprobe, polysemous code hamming distance, params of quantizers), using`OperatingPointsWithRanges` to cut down the optimal param search space. (For now, it sweeps nprobes only.)
4. [FUTURE] It will generate/sweep index creation hyperparams (factory strings, quantizer sizes, quantizer params), using heuristics.
5. [FUTURE] It will sweep the dataset size: start small test with e.g. 100K db vectors and go up to millions, billions potentially, while narrowing down the index+param choices at each step.
6. [FUTURE] Extrapolate perf metrics (cost and accuracy)
7. Intermediate results must be saved (to disk, to manifold) throughout, and reused as much as possible to cut down on overall runtime and enable faster iteration during development.
For range search, this diff supports the metric proposed in https://docs.google.com/document/d/1v5OOj7kfsKJ16xzaEHuKQj12Lrb-HlWLa_T2ct0LJiw/edit?usp=sharing I also added support for the classical case where the scoring function steps from 1 to 0 at some arbitrary threshold.
For KNN, I added knn_intersection, but other metrics, particularly recall@1 will also be interesting. I also added the distance_ratio metric, which we previously discussed as an interesting alternative, since it shows how much the returned results approximate the ground-truth nearest-neighbours in terms of distances.
In the test case, I evaluated three current production indices for VCE with 1M vectors in the database and 10K queries. Each index is tested at various operating points (nprobes), which are shows on the charts. The results are not extrapolated to the true scale of these indices.
Reviewed By: yonglimeta
Differential Revision: D49958434
fbshipit-source-id: f7f567b299118003955dc9e2d9c5b971e0940fc5
2023-10-18 04:56:02 +08:00
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class DatasetDescriptor:
|
2023-12-04 21:53:17 +08:00
|
|
|
# namespace possible values:
|
|
|
|
# 1. a hive namespace
|
|
|
|
# 2. 'std_t', 'std_d', 'std_q' for the standard datasets
|
|
|
|
# via faiss.contrib.datasets.dataset_from_name()
|
|
|
|
# t - training, d - database, q - queries
|
|
|
|
# eg. "std_t"
|
|
|
|
# 3. 'syn' for synthetic data
|
|
|
|
# 4. None for local files
|
offline index evaluation (#3097)
Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3097
A framework for evaluating indices offline.
Long term objectives:
1. Generate offline similarity index performance data with test datasets both for existing indices and automatically generated alternatives. That is, given a dataset and some constraints this workflow should automatically discover optimal index types and parameter choices as well as evaluate the performance of existing production indices and their parameters.
2. Allow researchers, platform owners (Laser, Unicorn) and product teams to understand how different index types perform on their datasets and make optimal choices wrt their objectives. Longer term to enable automatic decision-making/auto-tuning.
Constraints, design choices:
1. I want to run the same evaluation on Meta-internal (fblearner, data from hive and manifold) or the local machine + research cluster (data on local disk or NFS) via OSS Faiss. Via fblearner, I want this to work in a way that it can be turned into a service and plugged into Unicorn or Laser, while the core Faiss part can be used/referred to in our research and to update the wiki with the latest results/recommendations for public datasets.
2. It must support a range of metrics for KNN and range search, and it should be easy to add new ones. Cost metrics need to be fine-grained to allow extrapolation.
3. It should automatically sweep all query time params (eg. nprobe, polysemous code hamming distance, params of quantizers), using`OperatingPointsWithRanges` to cut down the optimal param search space. (For now, it sweeps nprobes only.)
4. [FUTURE] It will generate/sweep index creation hyperparams (factory strings, quantizer sizes, quantizer params), using heuristics.
5. [FUTURE] It will sweep the dataset size: start small test with e.g. 100K db vectors and go up to millions, billions potentially, while narrowing down the index+param choices at each step.
6. [FUTURE] Extrapolate perf metrics (cost and accuracy)
7. Intermediate results must be saved (to disk, to manifold) throughout, and reused as much as possible to cut down on overall runtime and enable faster iteration during development.
For range search, this diff supports the metric proposed in https://docs.google.com/document/d/1v5OOj7kfsKJ16xzaEHuKQj12Lrb-HlWLa_T2ct0LJiw/edit?usp=sharing I also added support for the classical case where the scoring function steps from 1 to 0 at some arbitrary threshold.
For KNN, I added knn_intersection, but other metrics, particularly recall@1 will also be interesting. I also added the distance_ratio metric, which we previously discussed as an interesting alternative, since it shows how much the returned results approximate the ground-truth nearest-neighbours in terms of distances.
In the test case, I evaluated three current production indices for VCE with 1M vectors in the database and 10K queries. Each index is tested at various operating points (nprobes), which are shows on the charts. The results are not extrapolated to the true scale of these indices.
Reviewed By: yonglimeta
Differential Revision: D49958434
fbshipit-source-id: f7f567b299118003955dc9e2d9c5b971e0940fc5
2023-10-18 04:56:02 +08:00
|
|
|
namespace: Optional[str] = None
|
2023-12-04 21:53:17 +08:00
|
|
|
|
|
|
|
# tablename possible values, corresponding to the
|
|
|
|
# namespace value above:
|
|
|
|
# 1. a hive table name
|
|
|
|
# 2. name of the standard dataset as recognized
|
|
|
|
# by faiss.contrib.datasets.dataset_from_name()
|
|
|
|
# eg. "bigann1M"
|
|
|
|
# 3. d_seed, eg. 128_1234 for 128 dimensional vectors
|
|
|
|
# with seed 1234
|
|
|
|
# 4. a local file name (relative to benchmark_io.path)
|
offline index evaluation (#3097)
Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3097
A framework for evaluating indices offline.
Long term objectives:
1. Generate offline similarity index performance data with test datasets both for existing indices and automatically generated alternatives. That is, given a dataset and some constraints this workflow should automatically discover optimal index types and parameter choices as well as evaluate the performance of existing production indices and their parameters.
2. Allow researchers, platform owners (Laser, Unicorn) and product teams to understand how different index types perform on their datasets and make optimal choices wrt their objectives. Longer term to enable automatic decision-making/auto-tuning.
Constraints, design choices:
1. I want to run the same evaluation on Meta-internal (fblearner, data from hive and manifold) or the local machine + research cluster (data on local disk or NFS) via OSS Faiss. Via fblearner, I want this to work in a way that it can be turned into a service and plugged into Unicorn or Laser, while the core Faiss part can be used/referred to in our research and to update the wiki with the latest results/recommendations for public datasets.
2. It must support a range of metrics for KNN and range search, and it should be easy to add new ones. Cost metrics need to be fine-grained to allow extrapolation.
3. It should automatically sweep all query time params (eg. nprobe, polysemous code hamming distance, params of quantizers), using`OperatingPointsWithRanges` to cut down the optimal param search space. (For now, it sweeps nprobes only.)
4. [FUTURE] It will generate/sweep index creation hyperparams (factory strings, quantizer sizes, quantizer params), using heuristics.
5. [FUTURE] It will sweep the dataset size: start small test with e.g. 100K db vectors and go up to millions, billions potentially, while narrowing down the index+param choices at each step.
6. [FUTURE] Extrapolate perf metrics (cost and accuracy)
7. Intermediate results must be saved (to disk, to manifold) throughout, and reused as much as possible to cut down on overall runtime and enable faster iteration during development.
For range search, this diff supports the metric proposed in https://docs.google.com/document/d/1v5OOj7kfsKJ16xzaEHuKQj12Lrb-HlWLa_T2ct0LJiw/edit?usp=sharing I also added support for the classical case where the scoring function steps from 1 to 0 at some arbitrary threshold.
For KNN, I added knn_intersection, but other metrics, particularly recall@1 will also be interesting. I also added the distance_ratio metric, which we previously discussed as an interesting alternative, since it shows how much the returned results approximate the ground-truth nearest-neighbours in terms of distances.
In the test case, I evaluated three current production indices for VCE with 1M vectors in the database and 10K queries. Each index is tested at various operating points (nprobes), which are shows on the charts. The results are not extrapolated to the true scale of these indices.
Reviewed By: yonglimeta
Differential Revision: D49958434
fbshipit-source-id: f7f567b299118003955dc9e2d9c5b971e0940fc5
2023-10-18 04:56:02 +08:00
|
|
|
tablename: Optional[str] = None
|
2023-12-04 21:53:17 +08:00
|
|
|
|
|
|
|
# partition names and values for hive
|
|
|
|
# eg. ["ds=2021-09-01"]
|
offline index evaluation (#3097)
Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3097
A framework for evaluating indices offline.
Long term objectives:
1. Generate offline similarity index performance data with test datasets both for existing indices and automatically generated alternatives. That is, given a dataset and some constraints this workflow should automatically discover optimal index types and parameter choices as well as evaluate the performance of existing production indices and their parameters.
2. Allow researchers, platform owners (Laser, Unicorn) and product teams to understand how different index types perform on their datasets and make optimal choices wrt their objectives. Longer term to enable automatic decision-making/auto-tuning.
Constraints, design choices:
1. I want to run the same evaluation on Meta-internal (fblearner, data from hive and manifold) or the local machine + research cluster (data on local disk or NFS) via OSS Faiss. Via fblearner, I want this to work in a way that it can be turned into a service and plugged into Unicorn or Laser, while the core Faiss part can be used/referred to in our research and to update the wiki with the latest results/recommendations for public datasets.
2. It must support a range of metrics for KNN and range search, and it should be easy to add new ones. Cost metrics need to be fine-grained to allow extrapolation.
3. It should automatically sweep all query time params (eg. nprobe, polysemous code hamming distance, params of quantizers), using`OperatingPointsWithRanges` to cut down the optimal param search space. (For now, it sweeps nprobes only.)
4. [FUTURE] It will generate/sweep index creation hyperparams (factory strings, quantizer sizes, quantizer params), using heuristics.
5. [FUTURE] It will sweep the dataset size: start small test with e.g. 100K db vectors and go up to millions, billions potentially, while narrowing down the index+param choices at each step.
6. [FUTURE] Extrapolate perf metrics (cost and accuracy)
7. Intermediate results must be saved (to disk, to manifold) throughout, and reused as much as possible to cut down on overall runtime and enable faster iteration during development.
For range search, this diff supports the metric proposed in https://docs.google.com/document/d/1v5OOj7kfsKJ16xzaEHuKQj12Lrb-HlWLa_T2ct0LJiw/edit?usp=sharing I also added support for the classical case where the scoring function steps from 1 to 0 at some arbitrary threshold.
For KNN, I added knn_intersection, but other metrics, particularly recall@1 will also be interesting. I also added the distance_ratio metric, which we previously discussed as an interesting alternative, since it shows how much the returned results approximate the ground-truth nearest-neighbours in terms of distances.
In the test case, I evaluated three current production indices for VCE with 1M vectors in the database and 10K queries. Each index is tested at various operating points (nprobes), which are shows on the charts. The results are not extrapolated to the true scale of these indices.
Reviewed By: yonglimeta
Differential Revision: D49958434
fbshipit-source-id: f7f567b299118003955dc9e2d9c5b971e0940fc5
2023-10-18 04:56:02 +08:00
|
|
|
partitions: Optional[List[str]] = None
|
2023-12-04 21:53:17 +08:00
|
|
|
|
|
|
|
# number of vectors to load from the dataset
|
offline index evaluation (#3097)
Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3097
A framework for evaluating indices offline.
Long term objectives:
1. Generate offline similarity index performance data with test datasets both for existing indices and automatically generated alternatives. That is, given a dataset and some constraints this workflow should automatically discover optimal index types and parameter choices as well as evaluate the performance of existing production indices and their parameters.
2. Allow researchers, platform owners (Laser, Unicorn) and product teams to understand how different index types perform on their datasets and make optimal choices wrt their objectives. Longer term to enable automatic decision-making/auto-tuning.
Constraints, design choices:
1. I want to run the same evaluation on Meta-internal (fblearner, data from hive and manifold) or the local machine + research cluster (data on local disk or NFS) via OSS Faiss. Via fblearner, I want this to work in a way that it can be turned into a service and plugged into Unicorn or Laser, while the core Faiss part can be used/referred to in our research and to update the wiki with the latest results/recommendations for public datasets.
2. It must support a range of metrics for KNN and range search, and it should be easy to add new ones. Cost metrics need to be fine-grained to allow extrapolation.
3. It should automatically sweep all query time params (eg. nprobe, polysemous code hamming distance, params of quantizers), using`OperatingPointsWithRanges` to cut down the optimal param search space. (For now, it sweeps nprobes only.)
4. [FUTURE] It will generate/sweep index creation hyperparams (factory strings, quantizer sizes, quantizer params), using heuristics.
5. [FUTURE] It will sweep the dataset size: start small test with e.g. 100K db vectors and go up to millions, billions potentially, while narrowing down the index+param choices at each step.
6. [FUTURE] Extrapolate perf metrics (cost and accuracy)
7. Intermediate results must be saved (to disk, to manifold) throughout, and reused as much as possible to cut down on overall runtime and enable faster iteration during development.
For range search, this diff supports the metric proposed in https://docs.google.com/document/d/1v5OOj7kfsKJ16xzaEHuKQj12Lrb-HlWLa_T2ct0LJiw/edit?usp=sharing I also added support for the classical case where the scoring function steps from 1 to 0 at some arbitrary threshold.
For KNN, I added knn_intersection, but other metrics, particularly recall@1 will also be interesting. I also added the distance_ratio metric, which we previously discussed as an interesting alternative, since it shows how much the returned results approximate the ground-truth nearest-neighbours in terms of distances.
In the test case, I evaluated three current production indices for VCE with 1M vectors in the database and 10K queries. Each index is tested at various operating points (nprobes), which are shows on the charts. The results are not extrapolated to the true scale of these indices.
Reviewed By: yonglimeta
Differential Revision: D49958434
fbshipit-source-id: f7f567b299118003955dc9e2d9c5b971e0940fc5
2023-10-18 04:56:02 +08:00
|
|
|
num_vectors: Optional[int] = None
|
|
|
|
|
2024-08-10 08:07:36 +08:00
|
|
|
embedding_column: Optional[str] = None
|
|
|
|
|
2024-08-23 01:44:55 +08:00
|
|
|
sampling_rate: Optional[float] = None
|
|
|
|
|
|
|
|
# sampling column for xdb
|
|
|
|
sampling_column: Optional[str] = None
|
|
|
|
|
offline index evaluation (#3097)
Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3097
A framework for evaluating indices offline.
Long term objectives:
1. Generate offline similarity index performance data with test datasets both for existing indices and automatically generated alternatives. That is, given a dataset and some constraints this workflow should automatically discover optimal index types and parameter choices as well as evaluate the performance of existing production indices and their parameters.
2. Allow researchers, platform owners (Laser, Unicorn) and product teams to understand how different index types perform on their datasets and make optimal choices wrt their objectives. Longer term to enable automatic decision-making/auto-tuning.
Constraints, design choices:
1. I want to run the same evaluation on Meta-internal (fblearner, data from hive and manifold) or the local machine + research cluster (data on local disk or NFS) via OSS Faiss. Via fblearner, I want this to work in a way that it can be turned into a service and plugged into Unicorn or Laser, while the core Faiss part can be used/referred to in our research and to update the wiki with the latest results/recommendations for public datasets.
2. It must support a range of metrics for KNN and range search, and it should be easy to add new ones. Cost metrics need to be fine-grained to allow extrapolation.
3. It should automatically sweep all query time params (eg. nprobe, polysemous code hamming distance, params of quantizers), using`OperatingPointsWithRanges` to cut down the optimal param search space. (For now, it sweeps nprobes only.)
4. [FUTURE] It will generate/sweep index creation hyperparams (factory strings, quantizer sizes, quantizer params), using heuristics.
5. [FUTURE] It will sweep the dataset size: start small test with e.g. 100K db vectors and go up to millions, billions potentially, while narrowing down the index+param choices at each step.
6. [FUTURE] Extrapolate perf metrics (cost and accuracy)
7. Intermediate results must be saved (to disk, to manifold) throughout, and reused as much as possible to cut down on overall runtime and enable faster iteration during development.
For range search, this diff supports the metric proposed in https://docs.google.com/document/d/1v5OOj7kfsKJ16xzaEHuKQj12Lrb-HlWLa_T2ct0LJiw/edit?usp=sharing I also added support for the classical case where the scoring function steps from 1 to 0 at some arbitrary threshold.
For KNN, I added knn_intersection, but other metrics, particularly recall@1 will also be interesting. I also added the distance_ratio metric, which we previously discussed as an interesting alternative, since it shows how much the returned results approximate the ground-truth nearest-neighbours in terms of distances.
In the test case, I evaluated three current production indices for VCE with 1M vectors in the database and 10K queries. Each index is tested at various operating points (nprobes), which are shows on the charts. The results are not extrapolated to the true scale of these indices.
Reviewed By: yonglimeta
Differential Revision: D49958434
fbshipit-source-id: f7f567b299118003955dc9e2d9c5b971e0940fc5
2023-10-18 04:56:02 +08:00
|
|
|
def __hash__(self):
|
|
|
|
return hash(self.get_filename())
|
|
|
|
|
|
|
|
def get_filename(
|
|
|
|
self,
|
2024-06-22 04:04:09 +08:00
|
|
|
prefix: Optional[str] = None,
|
offline index evaluation (#3097)
Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3097
A framework for evaluating indices offline.
Long term objectives:
1. Generate offline similarity index performance data with test datasets both for existing indices and automatically generated alternatives. That is, given a dataset and some constraints this workflow should automatically discover optimal index types and parameter choices as well as evaluate the performance of existing production indices and their parameters.
2. Allow researchers, platform owners (Laser, Unicorn) and product teams to understand how different index types perform on their datasets and make optimal choices wrt their objectives. Longer term to enable automatic decision-making/auto-tuning.
Constraints, design choices:
1. I want to run the same evaluation on Meta-internal (fblearner, data from hive and manifold) or the local machine + research cluster (data on local disk or NFS) via OSS Faiss. Via fblearner, I want this to work in a way that it can be turned into a service and plugged into Unicorn or Laser, while the core Faiss part can be used/referred to in our research and to update the wiki with the latest results/recommendations for public datasets.
2. It must support a range of metrics for KNN and range search, and it should be easy to add new ones. Cost metrics need to be fine-grained to allow extrapolation.
3. It should automatically sweep all query time params (eg. nprobe, polysemous code hamming distance, params of quantizers), using`OperatingPointsWithRanges` to cut down the optimal param search space. (For now, it sweeps nprobes only.)
4. [FUTURE] It will generate/sweep index creation hyperparams (factory strings, quantizer sizes, quantizer params), using heuristics.
5. [FUTURE] It will sweep the dataset size: start small test with e.g. 100K db vectors and go up to millions, billions potentially, while narrowing down the index+param choices at each step.
6. [FUTURE] Extrapolate perf metrics (cost and accuracy)
7. Intermediate results must be saved (to disk, to manifold) throughout, and reused as much as possible to cut down on overall runtime and enable faster iteration during development.
For range search, this diff supports the metric proposed in https://docs.google.com/document/d/1v5OOj7kfsKJ16xzaEHuKQj12Lrb-HlWLa_T2ct0LJiw/edit?usp=sharing I also added support for the classical case where the scoring function steps from 1 to 0 at some arbitrary threshold.
For KNN, I added knn_intersection, but other metrics, particularly recall@1 will also be interesting. I also added the distance_ratio metric, which we previously discussed as an interesting alternative, since it shows how much the returned results approximate the ground-truth nearest-neighbours in terms of distances.
In the test case, I evaluated three current production indices for VCE with 1M vectors in the database and 10K queries. Each index is tested at various operating points (nprobes), which are shows on the charts. The results are not extrapolated to the true scale of these indices.
Reviewed By: yonglimeta
Differential Revision: D49958434
fbshipit-source-id: f7f567b299118003955dc9e2d9c5b971e0940fc5
2023-10-18 04:56:02 +08:00
|
|
|
) -> str:
|
2023-12-04 21:53:17 +08:00
|
|
|
filename = ""
|
|
|
|
if prefix is not None:
|
|
|
|
filename += prefix + "_"
|
offline index evaluation (#3097)
Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3097
A framework for evaluating indices offline.
Long term objectives:
1. Generate offline similarity index performance data with test datasets both for existing indices and automatically generated alternatives. That is, given a dataset and some constraints this workflow should automatically discover optimal index types and parameter choices as well as evaluate the performance of existing production indices and their parameters.
2. Allow researchers, platform owners (Laser, Unicorn) and product teams to understand how different index types perform on their datasets and make optimal choices wrt their objectives. Longer term to enable automatic decision-making/auto-tuning.
Constraints, design choices:
1. I want to run the same evaluation on Meta-internal (fblearner, data from hive and manifold) or the local machine + research cluster (data on local disk or NFS) via OSS Faiss. Via fblearner, I want this to work in a way that it can be turned into a service and plugged into Unicorn or Laser, while the core Faiss part can be used/referred to in our research and to update the wiki with the latest results/recommendations for public datasets.
2. It must support a range of metrics for KNN and range search, and it should be easy to add new ones. Cost metrics need to be fine-grained to allow extrapolation.
3. It should automatically sweep all query time params (eg. nprobe, polysemous code hamming distance, params of quantizers), using`OperatingPointsWithRanges` to cut down the optimal param search space. (For now, it sweeps nprobes only.)
4. [FUTURE] It will generate/sweep index creation hyperparams (factory strings, quantizer sizes, quantizer params), using heuristics.
5. [FUTURE] It will sweep the dataset size: start small test with e.g. 100K db vectors and go up to millions, billions potentially, while narrowing down the index+param choices at each step.
6. [FUTURE] Extrapolate perf metrics (cost and accuracy)
7. Intermediate results must be saved (to disk, to manifold) throughout, and reused as much as possible to cut down on overall runtime and enable faster iteration during development.
For range search, this diff supports the metric proposed in https://docs.google.com/document/d/1v5OOj7kfsKJ16xzaEHuKQj12Lrb-HlWLa_T2ct0LJiw/edit?usp=sharing I also added support for the classical case where the scoring function steps from 1 to 0 at some arbitrary threshold.
For KNN, I added knn_intersection, but other metrics, particularly recall@1 will also be interesting. I also added the distance_ratio metric, which we previously discussed as an interesting alternative, since it shows how much the returned results approximate the ground-truth nearest-neighbours in terms of distances.
In the test case, I evaluated three current production indices for VCE with 1M vectors in the database and 10K queries. Each index is tested at various operating points (nprobes), which are shows on the charts. The results are not extrapolated to the true scale of these indices.
Reviewed By: yonglimeta
Differential Revision: D49958434
fbshipit-source-id: f7f567b299118003955dc9e2d9c5b971e0940fc5
2023-10-18 04:56:02 +08:00
|
|
|
if self.namespace is not None:
|
|
|
|
filename += self.namespace + "_"
|
|
|
|
assert self.tablename is not None
|
|
|
|
filename += self.tablename
|
|
|
|
if self.partitions is not None:
|
|
|
|
filename += "_" + "_".join(self.partitions).replace("=", "_")
|
|
|
|
if self.num_vectors is not None:
|
|
|
|
filename += f"_{self.num_vectors}"
|
|
|
|
filename += "."
|
|
|
|
return filename
|
2024-01-06 01:27:04 +08:00
|
|
|
|
|
|
|
def k_means(self, io, k, dry_run):
|
|
|
|
logger.info(f"k_means {k} {self}")
|
|
|
|
kmeans_vectors = DatasetDescriptor(
|
|
|
|
tablename=f"{self.get_filename()}kmeans_{k}.npy"
|
|
|
|
)
|
|
|
|
meta_filename = kmeans_vectors.tablename + ".json"
|
2024-01-31 02:58:13 +08:00
|
|
|
if not io.file_exist(kmeans_vectors.tablename) or not io.file_exist(
|
|
|
|
meta_filename
|
|
|
|
):
|
2024-01-06 01:27:04 +08:00
|
|
|
if dry_run:
|
|
|
|
return None, None, kmeans_vectors.tablename
|
|
|
|
x = io.get_dataset(self)
|
|
|
|
kmeans = faiss.Kmeans(d=x.shape[1], k=k, gpu=True)
|
|
|
|
_, t, _ = timer("k_means", lambda: kmeans.train(x))
|
|
|
|
io.write_nparray(kmeans.centroids, kmeans_vectors.tablename)
|
|
|
|
io.write_json({"k_means_time": t}, meta_filename)
|
|
|
|
else:
|
|
|
|
t = io.read_json(meta_filename)["k_means_time"]
|
|
|
|
return kmeans_vectors, t, None
|
2024-06-22 04:04:09 +08:00
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class IndexBaseDescriptor:
|
|
|
|
d: int
|
|
|
|
metric: str
|
|
|
|
desc_name: Optional[str] = None
|
|
|
|
flat_desc_name: Optional[str] = None
|
|
|
|
bucket: Optional[str] = None
|
|
|
|
path: Optional[str] = None
|
|
|
|
num_threads: int = 1
|
|
|
|
|
|
|
|
def get_name(self) -> str:
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
|
|
def get_path(self, benchmark_io: BenchmarkIO) -> Optional[str]:
|
|
|
|
if self.path is not None:
|
|
|
|
return self.path
|
|
|
|
self.path = benchmark_io.get_remote_filepath(self.desc_name)
|
|
|
|
return self.path
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def param_dict_list_to_name(param_dict_list):
|
|
|
|
if not param_dict_list:
|
|
|
|
return ""
|
|
|
|
l = 0
|
|
|
|
n = ""
|
|
|
|
for param_dict in param_dict_list:
|
|
|
|
n += IndexBaseDescriptor.param_dict_to_name(param_dict, f"cp{l}")
|
|
|
|
l += 1
|
|
|
|
return n
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def param_dict_to_name(param_dict, prefix="sp"):
|
|
|
|
if not param_dict:
|
|
|
|
return ""
|
|
|
|
n = prefix
|
|
|
|
for name, val in param_dict.items():
|
|
|
|
if name == "snap":
|
|
|
|
continue
|
|
|
|
if name == "lsq_gpu" and val == 0:
|
|
|
|
continue
|
|
|
|
if name == "use_beam_LUT" and val == 0:
|
|
|
|
continue
|
|
|
|
n += f"_{name}_{val}"
|
|
|
|
if n == prefix:
|
|
|
|
return ""
|
|
|
|
n += "."
|
|
|
|
return n
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class CodecDescriptor(IndexBaseDescriptor):
|
|
|
|
# either path or factory should be set,
|
|
|
|
# but not both at the same time.
|
|
|
|
factory: Optional[str] = None
|
|
|
|
construction_params: Optional[List[Dict[str, int]]] = None
|
|
|
|
training_vectors: Optional[DatasetDescriptor] = None
|
|
|
|
|
|
|
|
def __post_init__(self):
|
|
|
|
self.get_name()
|
|
|
|
|
|
|
|
def is_trained(self):
|
|
|
|
return self.factory is None and self.path is not None
|
|
|
|
|
|
|
|
def is_valid(self):
|
|
|
|
return self.factory is not None or self.path is not None
|
|
|
|
|
|
|
|
def get_name(self) -> str:
|
|
|
|
if self.desc_name is not None:
|
|
|
|
return self.desc_name
|
|
|
|
if self.factory is not None:
|
|
|
|
self.desc_name = self.name_from_factory()
|
|
|
|
return self.desc_name
|
|
|
|
if self.path is not None:
|
|
|
|
self.desc_name = self.name_from_path()
|
|
|
|
return self.desc_name
|
|
|
|
raise ValueError("name, factory or path must be set")
|
|
|
|
|
|
|
|
def flat_name(self) -> str:
|
|
|
|
if self.flat_desc_name is not None:
|
|
|
|
return self.flat_desc_name
|
|
|
|
self.flat_desc_name = f"Flat.d_{self.d}.{self.metric.upper()}."
|
|
|
|
return self.flat_desc_name
|
|
|
|
|
|
|
|
def path(self, benchmark_io) -> str:
|
|
|
|
if self.path is not None:
|
|
|
|
return self.path
|
|
|
|
return benchmark_io.get_remote_filepath(self.get_name())
|
|
|
|
|
|
|
|
def name_from_factory(self) -> str:
|
|
|
|
assert self.factory is not None
|
|
|
|
name = f"{self.factory.replace(',', '_')}."
|
|
|
|
assert self.d is not None
|
|
|
|
assert self.metric is not None
|
|
|
|
name += f"d_{self.d}.{self.metric.upper()}."
|
|
|
|
if self.factory != "Flat":
|
|
|
|
assert self.training_vectors is not None
|
|
|
|
name += self.training_vectors.get_filename("xt")
|
|
|
|
name += IndexBaseDescriptor.param_dict_list_to_name(self.construction_params)
|
|
|
|
return name
|
|
|
|
|
|
|
|
def name_from_path(self):
|
|
|
|
assert self.path is not None
|
|
|
|
filename = os.path.basename(self.path)
|
|
|
|
ext = filename.split(".")[-1]
|
|
|
|
if filename.endswith(ext):
|
|
|
|
name = filename[:-len(ext)]
|
|
|
|
else: # should never hit this rather raise value error
|
|
|
|
name = filename
|
|
|
|
return name
|
|
|
|
|
2024-08-28 01:02:15 +08:00
|
|
|
def alias(self, benchmark_io: BenchmarkIO):
|
2024-06-22 04:04:09 +08:00
|
|
|
if hasattr(benchmark_io, "bucket"):
|
|
|
|
return CodecDescriptor(desc_name=self.get_name(), bucket=benchmark_io.bucket, path=self.get_path(benchmark_io), d=self.d, metric=self.metric)
|
|
|
|
return CodecDescriptor(desc_name=self.get_name(), d=self.d, metric=self.metric)
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class IndexDescriptor(IndexBaseDescriptor):
|
|
|
|
codec_desc: Optional[CodecDescriptor] = None
|
|
|
|
database_desc: Optional[DatasetDescriptor] = None
|
|
|
|
|
|
|
|
def __hash__(self):
|
|
|
|
return hash(str(self))
|
|
|
|
|
|
|
|
def __post_init__(self):
|
|
|
|
self.get_name()
|
|
|
|
|
|
|
|
def is_built(self):
|
|
|
|
return self.codec_desc is None and self.database_desc is None
|
|
|
|
|
|
|
|
def get_name(self) -> str:
|
|
|
|
if self.desc_name is None:
|
|
|
|
self.desc_name = self.codec_desc.get_name() + self.database_desc.get_filename(prefix="xb")
|
|
|
|
|
|
|
|
return self.desc_name
|
|
|
|
|
|
|
|
def flat_name(self):
|
|
|
|
if self.flat_desc_name is not None:
|
|
|
|
return self.flat_desc_name
|
|
|
|
self.flat_desc_name = self.codec_desc.flat_name() + self.database_desc.get_filename(prefix="xb")
|
|
|
|
return self.flat_desc_name
|
|
|
|
|
|
|
|
# alias is used to refer when index is uploaded to blobstore and refered again
|
|
|
|
def alias(self, benchmark_io: BenchmarkIO):
|
|
|
|
if hasattr(benchmark_io, "bucket"):
|
|
|
|
return IndexDescriptor(desc_name=self.get_name(), bucket=benchmark_io.bucket, path=self.get_path(benchmark_io), d=self.d, metric=self.metric)
|
|
|
|
return IndexDescriptor(desc_name=self.get_name(), d=self.d, metric=self.metric)
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class KnnDescriptor(IndexBaseDescriptor):
|
|
|
|
index_desc: Optional[IndexDescriptor] = None
|
|
|
|
gt_index_desc: Optional[IndexDescriptor] = None
|
|
|
|
query_dataset: Optional[DatasetDescriptor] = None
|
|
|
|
search_params: Optional[Dict[str, int]] = None
|
|
|
|
reconstruct: bool = False
|
|
|
|
# range metric definitions
|
|
|
|
# key: name
|
|
|
|
# value: one of the following:
|
|
|
|
#
|
|
|
|
# radius
|
|
|
|
# [0..radius) -> 1
|
|
|
|
# [radius..inf) -> 0
|
|
|
|
#
|
|
|
|
# [[radius1, score1], ...]
|
|
|
|
# [0..radius1) -> score1
|
|
|
|
# [radius1..radius2) -> score2
|
|
|
|
#
|
|
|
|
# [[radius1_from, radius1_to, score1], ...]
|
|
|
|
# [radius1_from, radius1_to) -> score1,
|
|
|
|
# [radius2_from, radius2_to) -> score2
|
|
|
|
range_metrics: Optional[Dict[str, Any]] = None
|
|
|
|
radius: Optional[float] = None
|
|
|
|
k: int = 1
|
|
|
|
|
|
|
|
range_ref_index_desc: Optional[str] = None
|
|
|
|
|
|
|
|
def __hash__(self):
|
|
|
|
return hash(str(self))
|
|
|
|
|
|
|
|
def get_name(self):
|
|
|
|
name = self.index_desc.get_name()
|
|
|
|
name += IndexBaseDescriptor.param_dict_to_name(self.search_params)
|
|
|
|
name += self.query_dataset.get_filename("q")
|
|
|
|
name += f"k_{self.k}."
|
|
|
|
name += f"t_{self.num_threads}."
|
|
|
|
if self.reconstruct:
|
|
|
|
name += "rec."
|
|
|
|
else:
|
|
|
|
name += "knn."
|
|
|
|
return name
|
|
|
|
|
|
|
|
def flat_name(self):
|
|
|
|
if self.flat_desc_name is not None:
|
|
|
|
return self.flat_desc_name
|
|
|
|
name = self.index_desc.flat_name()
|
|
|
|
name += self.query_dataset.get_filename("q")
|
|
|
|
name += f"k_{self.k}."
|
|
|
|
name += f"t_{self.num_threads}."
|
|
|
|
if self.reconstruct:
|
|
|
|
name += "rec."
|
|
|
|
else:
|
|
|
|
name += "knn."
|
|
|
|
self.flat_desc_name = name
|
|
|
|
return name
|