faiss/contrib/ivf_tools.py
Matthijs Douze b9ea339617 support range search from GPU (#2860)
Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/2860

Optimized range search function where the GPU computes by default and falls back on gpu for queries where there are too many results.

Parallelize the CPU to GPU cloning, it seems to work.

Support range_search_preassigned in Python

Fix long-standing issue with SWIG exposed functions that did not release the GIL (in particular the MapLong2Long).

Adds a MapInt64ToInt64 that is more efficient than MapLong2Long.

Reviewed By: algoriddle

Differential Revision: D45672301

fbshipit-source-id: 2e77397c40083818584dbafa5427149359a2abfd
2023-05-16 00:27:53 -07:00

144 lines
4.5 KiB
Python

# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import numpy as np
import faiss
from faiss.contrib.inspect_tools import get_invlist_sizes
def add_preassigned(index_ivf, x, a, ids=None):
"""
Add elements to an IVF index, where the assignment is already computed
"""
n, d = x.shape
assert a.shape == (n, )
if isinstance(index_ivf, faiss.IndexBinaryIVF):
d *= 8
assert d == index_ivf.d
if ids is not None:
assert ids.shape == (n, )
ids = faiss.swig_ptr(ids)
index_ivf.add_core(
n, faiss.swig_ptr(x), ids, faiss.swig_ptr(a)
)
def search_preassigned(index_ivf, xq, k, list_nos, coarse_dis=None):
"""
Perform a search in the IVF index, with predefined lists to search into.
Supports indexes with pretransforms (as opposed to the
IndexIVF.search_preassigned, that cannot be applied with pretransform).
"""
n, d = xq.shape
if isinstance(index_ivf, faiss.IndexBinaryIVF):
d *= 8
dis_type = "int32"
else:
dis_type = "float32"
assert d == index_ivf.d
assert list_nos.shape == (n, index_ivf.nprobe)
# the coarse distances are used in IVFPQ with L2 distance and
# by_residual=True otherwise we provide dummy coarse_dis
if coarse_dis is None:
coarse_dis = np.zeros((n, index_ivf.nprobe), dtype=dis_type)
else:
assert coarse_dis.shape == (n, index_ivf.nprobe)
return index_ivf.search_preassigned(xq, k, list_nos, coarse_dis)
def range_search_preassigned(index_ivf, x, radius, list_nos, coarse_dis=None):
"""
Perform a range search in the IVF index, with predefined lists to
search into
"""
n, d = x.shape
if isinstance(index_ivf, faiss.IndexBinaryIVF):
d *= 8
dis_type = "int32"
else:
dis_type = "float32"
# the coarse distances are used in IVFPQ with L2 distance and
# by_residual=True otherwise we provide dummy coarse_dis
if coarse_dis is None:
coarse_dis = np.empty((n, index_ivf.nprobe), dtype=dis_type)
else:
assert coarse_dis.shape == (n, index_ivf.nprobe)
assert d == index_ivf.d
assert list_nos.shape == (n, index_ivf.nprobe)
res = faiss.RangeSearchResult(n)
sp = faiss.swig_ptr
index_ivf.range_search_preassigned_c(
n, sp(x), radius,
sp(list_nos), sp(coarse_dis),
res
)
# get pointers and copy them
lims = faiss.rev_swig_ptr(res.lims, n + 1).copy()
num_results = int(lims[-1])
dist = faiss.rev_swig_ptr(res.distances, num_results).copy()
indices = faiss.rev_swig_ptr(res.labels, num_results).copy()
return lims, dist, indices
def replace_ivf_quantizer(index_ivf, new_quantizer):
""" replace the IVF quantizer with a flat quantizer and return the
old quantizer"""
if new_quantizer.ntotal == 0:
centroids = index_ivf.quantizer.reconstruct_n()
new_quantizer.train(centroids)
new_quantizer.add(centroids)
else:
assert new_quantizer.ntotal == index_ivf.nlist
# cleanly dealloc old quantizer
old_own = index_ivf.own_fields
index_ivf.own_fields = False
old_quantizer = faiss.downcast_index(index_ivf.quantizer)
old_quantizer.this.own(old_own)
index_ivf.quantizer = new_quantizer
if hasattr(index_ivf, "referenced_objects"):
index_ivf.referenced_objects.append(new_quantizer)
else:
index_ivf.referenced_objects = [new_quantizer]
return old_quantizer
def permute_invlists(index_ivf, perm):
""" Apply some permutation to the inverted lists, and modify the quantizer
entries accordingly.
Perm is an array of size nlist, where old_index = perm[new_index]
"""
nlist, = perm.shape
assert index_ivf.nlist == nlist
quantizer = faiss.downcast_index(index_ivf.quantizer)
assert quantizer.ntotal == index_ivf.nlist
perm = np.ascontiguousarray(perm, dtype='int64')
# just make sure it's a permutation...
bc = np.bincount(perm, minlength=nlist)
assert np.all(bc == np.ones(nlist, dtype=int))
# handle quantizer
quantizer.permute_entries(perm)
# handle inverted lists
invlists = faiss.downcast_InvertedLists(index_ivf.invlists)
invlists.permute_invlists(faiss.swig_ptr(perm))
def sort_invlists_by_size(index_ivf):
invlist_sizes = get_invlist_sizes(index_ivf.invlists)
perm = np.argsort(invlist_sizes)
permute_invlists(index_ivf, perm)