188 lines
5.8 KiB
Python
188 lines
5.8 KiB
Python
# @lint-ignore-every LICENSELINT
|
|
# Copyright (c) Meta Platforms, Inc. and its affiliates.
|
|
#
|
|
# This source code is licensed under the MIT license found in the
|
|
# LICENSE file in the root directory of this source tree.
|
|
#
|
|
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
import numpy as np
|
|
import faiss
|
|
import time
|
|
import argparse
|
|
import rmm
|
|
import ctypes
|
|
|
|
try:
|
|
from faiss.contrib.datasets_fb import \
|
|
DatasetSIFT1M, DatasetDeep1B, DatasetBigANN
|
|
except ImportError:
|
|
from faiss.contrib.datasets import \
|
|
DatasetSIFT1M, DatasetDeep1B, DatasetBigANN
|
|
|
|
|
|
# ds = DatasetDeep1B(10**6)
|
|
# ds = DatasetBigANN(nb_M=1)
|
|
ds = DatasetSIFT1M()
|
|
|
|
xq = ds.get_queries()
|
|
xb = ds.get_database()
|
|
gt = ds.get_groundtruth()
|
|
|
|
xt = ds.get_train()
|
|
|
|
nb, d = xb.shape
|
|
nq, d = xq.shape
|
|
nt, d = xt.shape
|
|
|
|
M = d / 2
|
|
|
|
######################################################
|
|
# Command-line parsing
|
|
######################################################
|
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
|
|
def aa(*args, **kwargs):
|
|
group.add_argument(*args, **kwargs)
|
|
|
|
|
|
group = parser.add_argument_group('benchmarking options')
|
|
|
|
aa('--bm_train', default=True,
|
|
help='whether to benchmark train operation on GPU index')
|
|
aa('--bm_add', default=True,
|
|
help='whether to benchmark add operation on GPU index')
|
|
aa('--bm_search', default=True,
|
|
help='whether to benchmark search operation on GPU index')
|
|
|
|
|
|
group = parser.add_argument_group('IVF options')
|
|
aa('--nlist', default=1024, type=np.int64,
|
|
help="number of IVF centroids")
|
|
aa('--bits_per_code', default=8, type=np.int64, help='bits per code. Note that < 8 is only supported when cuVS is enabled')
|
|
|
|
|
|
group = parser.add_argument_group('searching')
|
|
|
|
aa('--k', default=10, type=int, help='nb of nearest neighbors')
|
|
aa('--nprobe', default=10, help='nb of IVF lists to probe')
|
|
|
|
args = parser.parse_args()
|
|
|
|
print("args:", args)
|
|
|
|
gt = gt[:, :args.k]
|
|
nlist = args.nlist
|
|
bits_per_code = args.bits_per_code
|
|
|
|
rs = np.random.RandomState(123)
|
|
|
|
res = faiss.StandardGpuResources()
|
|
|
|
# Use an RMM pool memory resource for device allocations
|
|
mr = rmm.mr.PoolMemoryResource(rmm.mr.CudaMemoryResource())
|
|
rmm.mr.set_current_device_resource(mr)
|
|
|
|
|
|
def eval_recall(neighbors, t):
|
|
speed = t * 1000 / nq
|
|
qps = 1000 / speed
|
|
|
|
corrects = (gt == neighbors).sum()
|
|
recall = corrects / (nq * args.k)
|
|
|
|
return recall, qps
|
|
|
|
|
|
def bench_train_milliseconds(trainVecs, use_cuvs):
|
|
config = faiss.GpuIndexIVFPQConfig()
|
|
config.use_cuvs = use_cuvs
|
|
index = faiss.GpuIndexIVFPQ(res, d, 1024, 32, 8, faiss.METRIC_L2, config)
|
|
t0 = time.time()
|
|
index.train(trainVecs)
|
|
return 1000*(time.time() - t0)
|
|
|
|
|
|
#warmup
|
|
xw = rs.rand(nt, d)
|
|
bench_train_milliseconds(xw, True)
|
|
|
|
|
|
if args.bm_train:
|
|
print("=" * 40)
|
|
print("GPU Train Benchmarks")
|
|
print("=" * 40)
|
|
|
|
cuvs_gpu_train_time = bench_train_milliseconds(xt, True)
|
|
classical_gpu_train_time = bench_train_milliseconds(xt, False)
|
|
print("TRAIN, dim: %d, nlist %d, numTrain: %d, classical GPU train time: %.3f milliseconds, cuVS enabled GPU train time: %.3f milliseconds" % (
|
|
d, nlist, nt, classical_gpu_train_time, cuvs_gpu_train_time))
|
|
|
|
|
|
def bench_add_milliseconds(addVecs, index_cpu, use_cuvs):
|
|
# construct a GPU index using the same trained coarse quantizer
|
|
config = faiss.GpuClonerOptions()
|
|
config.use_cuvs = use_cuvs
|
|
index_gpu = faiss.index_cpu_to_gpu(res, 0, index_cpu, config)
|
|
assert(index_gpu.is_trained)
|
|
t0 = time.time()
|
|
index_gpu.add(addVecs)
|
|
return 1000*(time.time() - t0)
|
|
|
|
|
|
if args.bm_add:
|
|
print("=" * 40)
|
|
print("GPU Add Benchmarks")
|
|
print("=" * 40)
|
|
quantizer = faiss.IndexFlatL2(d)
|
|
index_cpu = faiss.IndexIVFPQ(quantizer, d, 1024, 32, 8, faiss.METRIC_L2)
|
|
index_cpu.train(xt)
|
|
cuvs_gpu_add_time = bench_add_milliseconds(xb, index_cpu, True)
|
|
classical_gpu_add_time = bench_add_milliseconds(xb, index_cpu, False)
|
|
print("ADD, dim: %d, nlist %d, numAdd: %d, classical GPU add time: %.3f milliseconds, cuVS enabled GPU add time: %.3f milliseconds" % (
|
|
d, nlist, nb, classical_gpu_add_time, cuvs_gpu_add_time))
|
|
|
|
|
|
def bench_search_milliseconds(index, queryVecs, nprobe, k, use_cuvs):
|
|
co = faiss.GpuClonerOptions()
|
|
co.use_cuvs = use_cuvs
|
|
index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co)
|
|
index_gpu.nprobe = nprobe
|
|
t0 = time.time()
|
|
_, I = index_gpu.search(queryVecs, k)
|
|
return I, 1000*(time.time() - t0)
|
|
|
|
|
|
# Search benchmarks: both indexes have identical IVF centroids and lists.
|
|
if args.bm_search:
|
|
print("=" * 40)
|
|
print("GPU Search Benchmarks")
|
|
print("=" * 40)
|
|
index_cpu = faiss.IndexIVFPQ(quantizer, d, 1024, 32, 8, faiss.METRIC_L2)
|
|
index_cpu.train(xt)
|
|
index_cpu.add(xb)
|
|
|
|
cuvs_indices, cuvs_gpu_search_time = bench_search_milliseconds(
|
|
index_cpu, xq, args.nprobe, args.k, True)
|
|
classical_gpu_indices, classical_gpu_search_time = bench_search_milliseconds(
|
|
index_cpu, xq, args.nprobe, args.k, False)
|
|
cuvs_recall, cuvs_qps = eval_recall(cuvs_indices, cuvs_gpu_search_time)
|
|
classical_recall, classical_qps = eval_recall(classical_gpu_indices, classical_gpu_search_time)
|
|
print("SEARCH, dim: %d, nlist: %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, classical GPU qps: %.3f, cuVS enabled GPU qps: %.3f" % (
|
|
d, nlist, nb, nq, args.nprobe, args.k, classical_qps, cuvs_qps))
|