faiss/benchs/bench_pq_transposed_centroid_table.py

#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import faiss
import time
import random

import faiss.contrib.datasets


# copied from benchs/bench_all_ivf/bench_all_ivf.py
def unwind_index_ivf(index):
    if isinstance(index, faiss.IndexPreTransform):
        assert index.chain.size() == 1
        vt = index.chain.at(0)
        index_ivf, vt2 = unwind_index_ivf(faiss.downcast_index(index.index))
        assert vt2 is None
        return index_ivf, vt
    if hasattr(faiss, "IndexRefine") and isinstance(index, faiss.IndexRefine):
        return unwind_index_ivf(faiss.downcast_index(index.base_index))
    if isinstance(index, faiss.IndexIVF):
        return index, None
    else:
        return None, None


def test_bigann10m(index_file, index_parameters):
    ds = faiss.contrib.datasets.DatasetBigANN(nb_M=10)

    xq = ds.get_queries()
    xb = ds.get_database()
    gt = ds.get_groundtruth()

    nb, d = xb.shape
    nq, d = xq.shape

    print("Reading index {}".format(index_file))
    index = faiss.read_index(index_file)

    ps = faiss.ParameterSpace()
    ps.initialize(index)

    index_ivf, vec_transform = unwind_index_ivf(index)

    print('params                                                                      regular    transp_centroids   regular   R@1    R@10   R@100')
    for index_parameter in index_parameters:
        ps.set_index_parameters(index, index_parameter)

        print(index_parameter.ljust(70), end=' ')

        k = 100

        # warmup
        D, I = index.search(xq, k)

        # warmup
        D, I = index.search(xq, k)

        # eval
        t2_0 = time.time()
        D, I = index.search(xq, k)
        t2_1 = time.time()

        # eval
        index_ivf.pq.sync_transposed_centroids()
        t3_0 = time.time()
        D, I = index.search(xq, k)
        t3_1 = time.time()

        # eval
        index_ivf.pq.clear_transposed_centroids()
        t4_0 = time.time()
        D, I = index.search(xq, k)
        t4_1 = time.time()

        print("   %9.5f  " % (t2_1 - t2_0), end=' ')
        print("   %9.5f  " % (t3_1 - t3_0), end=' ')
        print("   %9.5f  " % (t4_1 - t4_0), end=' ')

        for rank in 1, 10, 100:
            n_ok = (I[:, :rank] == gt[:, :1]).sum()
            print("%.4f" % (n_ok / float(nq)), end=' ')
        print()


if __name__ == "__main__":
    faiss.contrib.datasets.dataset_basedir = '/home/aguzhva/ANN_SIFT1B/'

    # represents OPQ32_128,IVF65536_HNSW32,PQ32 index
    index_file_1 = "/home/aguzhva/ANN_SIFT1B/run_tests/bench_ivf/indexes/hnsw32/.faissindex"

    nprobe_values = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]
    quantizer_efsearch_values = [4, 8, 16, 32, 64, 128, 256, 512]
    ht_values = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128, 256]

    # represents OPQ32_128,IVF65536(IVF256,PQHDx4fs,RFlat),PQ32 index
    index_file_2 = "/home/aguzhva/ANN_SIFT1B/run_tests/bench_ivf/indexes/pq4/.faissindex"

    quantizer_k_factor_rf_values = [1, 2, 4, 8, 16, 32, 64]
    quantizer_nprobe_values = [1, 2, 4, 8, 16, 32, 64, 128]

    # test the first index
    index_parameters_1 = []
    for _ in range(0, 20):
        nprobe = random.choice(nprobe_values)
        quantizer_efsearch = random.choice(quantizer_efsearch_values)
        ht = random.choice(ht_values)
        index_parameters_1.append(
            "nprobe={},quantizer_efSearch={},ht={}".format(
                nprobe,
                quantizer_efsearch,
                ht)
        )

    test_bigann10m(index_file_1, index_parameters_1)

    # test the second index
    index_parameters_2 = []
    for _ in range(0, 20):
        nprobe = random.choice(nprobe_values)
        quantizer_k_factor_rf = random.choice(quantizer_k_factor_rf_values)
        quantizer_nprobe = random.choice(quantizer_nprobe_values)
        ht = random.choice(ht_values)
        index_parameters_2.append(
            "nprobe={},quantizer_k_factor_rf={},quantizer_nprobe={},ht={}".format(
                nprobe,
                quantizer_k_factor_rf,
                quantizer_nprobe,
                ht)
        )

    test_bigann10m(index_file_2, index_parameters_2)