mirror of
https://github.com/facebookresearch/faiss.git
synced 2025-06-03 21:54:02 +08:00
Summary: Pull Request resolved: https://github.com/facebookresearch/faiss/pull/2860 Optimized range search function where the GPU computes by default and falls back on gpu for queries where there are too many results. Parallelize the CPU to GPU cloning, it seems to work. Support range_search_preassigned in Python Fix long-standing issue with SWIG exposed functions that did not release the GIL (in particular the MapLong2Long). Adds a MapInt64ToInt64 that is more efficient than MapLong2Long. Reviewed By: algoriddle Differential Revision: D45672301 fbshipit-source-id: 2e77397c40083818584dbafa5427149359a2abfd
144 lines
4.5 KiB
Python
144 lines
4.5 KiB
Python
# Copyright (c) Facebook, Inc. and its affiliates.
|
|
#
|
|
# This source code is licensed under the MIT license found in the
|
|
# LICENSE file in the root directory of this source tree.
|
|
|
|
import numpy as np
|
|
import faiss
|
|
|
|
from faiss.contrib.inspect_tools import get_invlist_sizes
|
|
|
|
|
|
def add_preassigned(index_ivf, x, a, ids=None):
|
|
"""
|
|
Add elements to an IVF index, where the assignment is already computed
|
|
"""
|
|
n, d = x.shape
|
|
assert a.shape == (n, )
|
|
if isinstance(index_ivf, faiss.IndexBinaryIVF):
|
|
d *= 8
|
|
assert d == index_ivf.d
|
|
if ids is not None:
|
|
assert ids.shape == (n, )
|
|
ids = faiss.swig_ptr(ids)
|
|
index_ivf.add_core(
|
|
n, faiss.swig_ptr(x), ids, faiss.swig_ptr(a)
|
|
)
|
|
|
|
|
|
def search_preassigned(index_ivf, xq, k, list_nos, coarse_dis=None):
|
|
"""
|
|
Perform a search in the IVF index, with predefined lists to search into.
|
|
Supports indexes with pretransforms (as opposed to the
|
|
IndexIVF.search_preassigned, that cannot be applied with pretransform).
|
|
"""
|
|
n, d = xq.shape
|
|
if isinstance(index_ivf, faiss.IndexBinaryIVF):
|
|
d *= 8
|
|
dis_type = "int32"
|
|
else:
|
|
dis_type = "float32"
|
|
|
|
assert d == index_ivf.d
|
|
assert list_nos.shape == (n, index_ivf.nprobe)
|
|
|
|
# the coarse distances are used in IVFPQ with L2 distance and
|
|
# by_residual=True otherwise we provide dummy coarse_dis
|
|
if coarse_dis is None:
|
|
coarse_dis = np.zeros((n, index_ivf.nprobe), dtype=dis_type)
|
|
else:
|
|
assert coarse_dis.shape == (n, index_ivf.nprobe)
|
|
|
|
return index_ivf.search_preassigned(xq, k, list_nos, coarse_dis)
|
|
|
|
|
|
def range_search_preassigned(index_ivf, x, radius, list_nos, coarse_dis=None):
|
|
"""
|
|
Perform a range search in the IVF index, with predefined lists to
|
|
search into
|
|
"""
|
|
n, d = x.shape
|
|
if isinstance(index_ivf, faiss.IndexBinaryIVF):
|
|
d *= 8
|
|
dis_type = "int32"
|
|
else:
|
|
dis_type = "float32"
|
|
|
|
# the coarse distances are used in IVFPQ with L2 distance and
|
|
# by_residual=True otherwise we provide dummy coarse_dis
|
|
if coarse_dis is None:
|
|
coarse_dis = np.empty((n, index_ivf.nprobe), dtype=dis_type)
|
|
else:
|
|
assert coarse_dis.shape == (n, index_ivf.nprobe)
|
|
|
|
assert d == index_ivf.d
|
|
assert list_nos.shape == (n, index_ivf.nprobe)
|
|
|
|
res = faiss.RangeSearchResult(n)
|
|
sp = faiss.swig_ptr
|
|
|
|
index_ivf.range_search_preassigned_c(
|
|
n, sp(x), radius,
|
|
sp(list_nos), sp(coarse_dis),
|
|
res
|
|
)
|
|
# get pointers and copy them
|
|
lims = faiss.rev_swig_ptr(res.lims, n + 1).copy()
|
|
num_results = int(lims[-1])
|
|
dist = faiss.rev_swig_ptr(res.distances, num_results).copy()
|
|
indices = faiss.rev_swig_ptr(res.labels, num_results).copy()
|
|
return lims, dist, indices
|
|
|
|
|
|
def replace_ivf_quantizer(index_ivf, new_quantizer):
|
|
""" replace the IVF quantizer with a flat quantizer and return the
|
|
old quantizer"""
|
|
if new_quantizer.ntotal == 0:
|
|
centroids = index_ivf.quantizer.reconstruct_n()
|
|
new_quantizer.train(centroids)
|
|
new_quantizer.add(centroids)
|
|
else:
|
|
assert new_quantizer.ntotal == index_ivf.nlist
|
|
|
|
# cleanly dealloc old quantizer
|
|
old_own = index_ivf.own_fields
|
|
index_ivf.own_fields = False
|
|
old_quantizer = faiss.downcast_index(index_ivf.quantizer)
|
|
old_quantizer.this.own(old_own)
|
|
index_ivf.quantizer = new_quantizer
|
|
|
|
if hasattr(index_ivf, "referenced_objects"):
|
|
index_ivf.referenced_objects.append(new_quantizer)
|
|
else:
|
|
index_ivf.referenced_objects = [new_quantizer]
|
|
return old_quantizer
|
|
|
|
|
|
def permute_invlists(index_ivf, perm):
|
|
""" Apply some permutation to the inverted lists, and modify the quantizer
|
|
entries accordingly.
|
|
Perm is an array of size nlist, where old_index = perm[new_index]
|
|
"""
|
|
nlist, = perm.shape
|
|
assert index_ivf.nlist == nlist
|
|
quantizer = faiss.downcast_index(index_ivf.quantizer)
|
|
assert quantizer.ntotal == index_ivf.nlist
|
|
perm = np.ascontiguousarray(perm, dtype='int64')
|
|
|
|
# just make sure it's a permutation...
|
|
bc = np.bincount(perm, minlength=nlist)
|
|
assert np.all(bc == np.ones(nlist, dtype=int))
|
|
|
|
# handle quantizer
|
|
quantizer.permute_entries(perm)
|
|
|
|
# handle inverted lists
|
|
invlists = faiss.downcast_InvertedLists(index_ivf.invlists)
|
|
invlists.permute_invlists(faiss.swig_ptr(perm))
|
|
|
|
|
|
def sort_invlists_by_size(index_ivf):
|
|
invlist_sizes = get_invlist_sizes(index_ivf.invlists)
|
|
perm = np.argsort(invlist_sizes)
|
|
permute_invlists(index_ivf, perm)
|