faiss/benchs/bench_fw/utils.py

# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import functools
import logging
from enum import Enum
from multiprocessing.pool import ThreadPool
from time import perf_counter

import faiss  # @manual=//faiss/python:pyfaiss_gpu
import numpy as np

from faiss.contrib.evaluation import (  # @manual=//faiss/contrib:faiss_contrib_gpu
    OperatingPoints,
)

logger = logging.getLogger(__name__)


def timer(name, func, once=False) -> float:
    logger.info(f"Measuring {name}")
    t1 = perf_counter()
    res = func()
    t2 = perf_counter()
    t = t2 - t1
    repeat = 1
    if not once and t < 1.0:
        repeat = int(2.0 // t)
        logger.info(
            f"Time for {name}: {t:.3f} seconds, repeating {repeat} times"
        )
        t1 = perf_counter()
        for _ in range(repeat):
            res = func()
        t2 = perf_counter()
        t = (t2 - t1) / repeat
    logger.info(f"Time for {name}: {t:.3f} seconds")
    return res, t, repeat


def refine_distances_knn(
    xq: np.ndarray,
    xb: np.ndarray,
    I: np.ndarray,
    metric,
):
    """Recompute distances between xq[i] and xb[I[i, :]]"""
    nq, k = I.shape
    xq = np.ascontiguousarray(xq, dtype="float32")
    nq2, d = xq.shape
    xb = np.ascontiguousarray(xb, dtype="float32")
    nb, d2 = xb.shape
    I = np.ascontiguousarray(I, dtype="int64")
    assert nq2 == nq
    assert d2 == d
    D = np.empty(I.shape, dtype="float32")
    D[:] = np.inf
    if metric == faiss.METRIC_L2:
        faiss.fvec_L2sqr_by_idx(
            faiss.swig_ptr(D),
            faiss.swig_ptr(xq),
            faiss.swig_ptr(xb),
            faiss.swig_ptr(I),
            d,
            nq,
            k,
        )
    else:
        faiss.fvec_inner_products_by_idx(
            faiss.swig_ptr(D),
            faiss.swig_ptr(xq),
            faiss.swig_ptr(xb),
            faiss.swig_ptr(I),
            d,
            nq,
            k,
        )
    return D


def refine_distances_range(
    lims: np.ndarray,
    D: np.ndarray,
    I: np.ndarray,
    xq: np.ndarray,
    xb: np.ndarray,
    metric,
):
    with ThreadPool(32) as pool:
        R = pool.map(
            lambda i: (
                np.sum(np.square(xq[i] - xb[I[lims[i] : lims[i + 1]]]), axis=1)
                if metric == faiss.METRIC_L2
                else np.tensordot(
                    xq[i], xb[I[lims[i] : lims[i + 1]]], axes=(0, 1)
                )
            )
            if lims[i + 1] > lims[i]
            else [],
            range(len(lims) - 1),
        )
    return np.hstack(R)


def distance_ratio_measure(I, R, D_GT, metric):
    sum_of_R = np.sum(np.where(I >= 0, R, 0))
    sum_of_D_GT = np.sum(np.where(I >= 0, D_GT, 0))
    if metric == faiss.METRIC_INNER_PRODUCT:
        return (sum_of_R / sum_of_D_GT).item()
    elif metric == faiss.METRIC_L2:
        return (sum_of_D_GT / sum_of_R).item()
    else:
        raise RuntimeError(f"unknown metric {metric}")


@functools.cache
def get_cpu_info():
    return [l for l in open("/proc/cpuinfo", "r") if "model name" in l][0][
        13:
    ].strip()


def dict_merge(target, source):
    for k, v in source.items():
        if isinstance(v, dict) and k in target:
            dict_merge(target[k], v)
        else:
            target[k] = v


class Cost:
    def __init__(self, values):
        self.values = values

    def __le__(self, other):
        return all(
            v1 <= v2 for v1, v2 in zip(self.values, other.values, strict=True)
        )

    def __lt__(self, other):
        return all(
            v1 < v2 for v1, v2 in zip(self.values, other.values, strict=True)
        )


class ParetoMode(Enum):
    DISABLE = 1  # no Pareto filtering
    INDEX = 2  # index-local optima
    GLOBAL = 3  # global optima


class ParetoMetric(Enum):
    TIME = 0  # time vs accuracy
    SPACE = 1  # space vs accuracy
    TIME_SPACE = 2  # (time, space) vs accuracy


def range_search_recall_at_precision(experiment, precision):
    return round(
        max(
            r
            for r, p in zip(
                experiment["range_search_pr"]["recall"],
                experiment["range_search_pr"]["precision"],
            )
            if p > precision
        ),
        6,
    )


def filter_results(
    results,
    evaluation,
    accuracy_metric,  # str or func
    time_metric=None,  # func or None -> use default
    space_metric=None,  # func or None -> use default
    min_accuracy=0,
    max_space=0,
    max_time=0,
    scaling_factor=1.0,
    name_filter=None,  # func
    pareto_mode=ParetoMode.DISABLE,
    pareto_metric=ParetoMetric.TIME,
):
    if isinstance(accuracy_metric, str):
        accuracy_key = accuracy_metric
        accuracy_metric = lambda v: v[accuracy_key]

    if time_metric is None:
        time_metric = lambda v: v["time"] * scaling_factor + (
            v["quantizer"]["time"] if "quantizer" in v else 0
        )

    if space_metric is None:
        space_metric = lambda v: results["indices"][v["codec"]]["code_size"]

    fe = []
    ops = {}
    if pareto_mode == ParetoMode.GLOBAL:
        op = OperatingPoints()
        ops["global"] = op
    for k, v in results["experiments"].items():
        if f".{evaluation}" in k:
            accuracy = accuracy_metric(v)
            if min_accuracy > 0 and accuracy < min_accuracy:
                continue
            space = space_metric(v)
            if space is None:
                space = 0
            if max_space > 0 and space > max_space:
                continue
            time = time_metric(v)
            if max_time > 0 and time > max_time:
                continue
            idx_name = v["index"] + (
                "snap"
                if "search_params" in v and v["search_params"]["snap"] == 1
                else ""
            )
            if name_filter is not None and not name_filter(idx_name):
                continue
            experiment = (accuracy, space, time, k, v)
            if pareto_mode == ParetoMode.DISABLE:
                fe.append(experiment)
                continue
            if pareto_mode == ParetoMode.INDEX:
                if idx_name not in ops:
                    ops[idx_name] = OperatingPoints()
                op = ops[idx_name]
            if pareto_metric == ParetoMetric.TIME:
                op.add_operating_point(experiment, accuracy, time)
            elif pareto_metric == ParetoMetric.SPACE:
                op.add_operating_point(experiment, accuracy, space)
            else:
                op.add_operating_point(
                    experiment, accuracy, Cost([time, space])
                )

    if ops:
        for op in ops.values():
            for v, _, _ in op.operating_points:
                fe.append(v)

    fe.sort()
    return fe