faiss/benchs/bench_quantizer.py
Chengqi Deng eba1cb1a90 Support LSQ on GPU (#1978)
Summary:
## Description

This PR added support for LSQ on GPU. Only the encoding part is running on GPU and the others are still running on CPU.

Multi-GPU is also supported.

## Usage

``` python
lsq = faiss.LocalSearchQuantizer(d, M, nbits)
ngpus = faiss.get_num_gpus()
lsq.icm_encoder_factory = faiss.GpuIcmEncoderFactory(ngpus)  # we use all gpus

lsq.train(xt)
codes = lsq.compute_codes(xb)
decoded = lsq.decode(codes)
```

## Performance on SIFT1M

On 1 GPU:
```
===== lsq-gpu:
        mean square error = 17337.878528
        training time: 40.9857234954834 s
        encoding time: 27.12640070915222 s
```

On 2 GPUs:
```
===== lsq-gpu:
        mean square error = 17364.658176
        training time: 25.832106113433838 s
        encoding time: 14.879548072814941 s
```

On CPU:
```
===== lsq:
        mean square error = 17305.880576
        training time: 152.57522344589233 s
        encoding time: 110.01779270172119 s
```

Pull Request resolved: https://github.com/facebookresearch/faiss/pull/1978

Test Plan: buck test mode/dev-nosan //faiss/gpu/test/:test_gpu_index_py -- TestLSQIcmEncoder

Reviewed By: wickedfoo

Differential Revision: D29609763

Pulled By: mdouze

fbshipit-source-id: b6ffa2a3c02bf696a4e52348132affa0dd838870
2021-09-09 09:13:15 -07:00

137 lines
3.8 KiB
Python

# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import sys
import faiss
import time
import numpy as np
try:
from faiss.contrib.datasets_fb import \
DatasetSIFT1M, DatasetDeep1B, DatasetBigANN
except ImportError:
from faiss.contrib.datasets import \
DatasetSIFT1M, DatasetDeep1B, DatasetBigANN
def eval_codec(q, xq, xb, gt):
t0 = time.time()
codes = q.compute_codes(xb)
t1 = time.time()
xb_decoded = q.decode(codes)
recons_err = ((xb - xb_decoded) ** 2).sum() / xb.shape[0]
# for compatibility with the codec benchmarks
err_compat = np.linalg.norm(xb - xb_decoded, axis=1).mean()
xq_decoded = q.decode(q.compute_codes(xq))
D, I = faiss.knn(xq_decoded, xb_decoded, 1)
recall = (I[:, 0] == gt[:, 0]).sum() / nq
print(
f"\tencode time: {t1 - t0:.3f} reconstruction error: {recons_err:.3f} "
f"1-recall@1: {recall:.4f} recons_err_compat {err_compat:.3f}")
def eval_quantizer(q, xq, xb, gt, xt, variants=None):
if variants is None:
variants = [(None, None)]
t0 = time.time()
q.train(xt)
t1 = time.time()
train_t = t1 - t0
print(f'\ttraining time: {train_t:.3f} s')
for name, val in variants:
if name is not None:
print(f"{name}={val}")
getattr(q, name) # make sure field exists
setattr(q, name, val)
eval_codec(q, xq, xb, gt)
todo = sys.argv[1:]
if len(todo) > 0 and "deep1M" in todo[0]:
ds = DatasetDeep1B(10**6)
del todo[0]
elif len(todo) > 0 and "bigann1M" in todo[0]:
ds = DatasetBigANN(nb_M=1)
del todo[0]
else:
ds = DatasetSIFT1M()
if len(todo) > 0:
if "x" in todo[0]:
M, nbits = todo[0].split("x")
M = int(M)
nbits = int(nbits)
del todo[0]
maxtrain = max(100 << nbits, 10**5)
print(f"eval on {M}x{nbits} maxtrain={maxtrain}")
xq = ds.get_queries()
xb = ds.get_database()
gt = ds.get_groundtruth()
xt = ds.get_train(maxtrain=maxtrain)
nb, d = xb.shape
nq, d = xq.shape
nt, d = xt.shape
# fastest to slowest
if 'lsq-gpu' in todo:
lsq = faiss.LocalSearchQuantizer(d, M, nbits)
ngpus = faiss.get_num_gpus()
lsq.icm_encoder_factory = faiss.GpuIcmEncoderFactory(ngpus)
lsq.verbose = True
eval_quantizer(lsq, xb, xt, 'lsq-gpu')
if 'pq' in todo:
pq = faiss.ProductQuantizer(d, M, nbits)
print("===== PQ")
eval_quantizer(pq, xq, xb, gt, xt)
if 'opq' in todo:
d2 = ((d + M - 1) // M) * M
print("OPQ d2=", d2)
opq = faiss.OPQMatrix(d, M, d2)
opq.train(xt)
xq2 = opq.apply(xq)
xb2 = opq.apply(xb)
xt2 = opq.apply(xt)
pq = faiss.ProductQuantizer(d2, M, nbits)
print("===== PQ")
eval_quantizer(pq, xq2, xb2, gt, xt2)
if 'rq' in todo:
print("===== RQ")
rq = faiss.ResidualQuantizer(d, M, nbits, )
rq.max_beam_size
rq.max_beam_size = 30 # for compatibility with older runs
# rq.train_type = faiss.ResidualQuantizer.Train_default
# rq.verbose = True
variants = [("max_beam_size", i) for i in (1, 2, 4, 8, 16, 32)]
eval_quantizer(rq, xq, xb, gt, xt, variants=variants)
if 'rq_lut' in todo:
print("===== RQ")
rq = faiss.ResidualQuantizer(d, M, nbits, )
rq.max_beam_size
rq.max_beam_size = 30 # for compatibility with older runs
rq.use_beam_LUT
rq.use_beam_LUT = 1
# rq.train_type = faiss.ResidualQuantizer.Train_default
# rq.verbose = True
variants = [("max_beam_size", i) for i in (1, 2, 4, 8, 16, 32, 64)]
eval_quantizer(rq, xq, xb, gt, xt, variants=variants)
if 'lsq' in todo:
print("===== LSQ")
lsq = faiss.LocalSearchQuantizer(d, M, nbits)
lsq.verbose = True
variants = [("encode_ils_iters", i) for i in (2, 3, 4, 8, 16)]
eval_quantizer(lsq, xq, xb, gt, xt, variants=variants)