mirror of
https://github.com/facebookresearch/faiss.git
synced 2025-06-03 21:54:02 +08:00
Summary: IndexPQ and IndexIVFPQ implementations with AVX shuffle instructions. The training and computing of the codes does not change wrt. the original PQ versions but the code layout is "packed" so that it can be used efficiently by the SIMD computation kernels. The main changes are: - new IndexPQFastScan and IndexIVFPQFastScan objects - simdib.h for an abstraction above the AVX2 intrinsics - BlockInvertedLists for invlists that are 32-byte aligned and where codes are not sequential - pq4_fast_scan.h/.cpp: for packing codes and look-up tables + optmized distance comptuation kernels - simd_result_hander.h: SIMD version of result collection in heaps / reservoirs Misc changes: - added contrib.inspect_tools to access fields in C++ objects - moved .h and .cpp code for inverted lists to an invlists/ subdirectory, and made a .h/.cpp for InvertedListsIOHook - added a new inverted lists type with 32-byte aligned codes (for consumption by SIMD) - moved Windows-specific intrinsics to platfrom_macros.h Pull Request resolved: https://github.com/facebookresearch/faiss/pull/1542 Test Plan: ``` buck test mode/opt -j 4 //faiss/tests/:test_fast_scan_ivf //faiss/tests/:test_fast_scan buck test mode/opt //faiss/manifold/... ``` Reviewed By: wickedfoo Differential Revision: D25175439 Pulled By: mdouze fbshipit-source-id: ad1a40c0df8c10f4b364bdec7172e43d71b56c34
642 lines
20 KiB
Python
642 lines
20 KiB
Python
# Copyright (c) Facebook, Inc. and its affiliates.
|
|
#
|
|
# This source code is licensed under the MIT license found in the
|
|
# LICENSE file in the root directory of this source tree.
|
|
|
|
from __future__ import absolute_import, division, print_function
|
|
|
|
import numpy as np
|
|
|
|
import faiss
|
|
import unittest
|
|
import array
|
|
|
|
from common import get_dataset_2
|
|
|
|
|
|
|
|
class TestClustering(unittest.TestCase):
|
|
|
|
def test_clustering(self):
|
|
d = 64
|
|
n = 1000
|
|
rs = np.random.RandomState(123)
|
|
x = rs.uniform(size=(n, d)).astype('float32')
|
|
|
|
x *= 10
|
|
|
|
km = faiss.Kmeans(d, 32, niter=10)
|
|
err32 = km.train(x)
|
|
|
|
# check that objective is decreasing
|
|
prev = 1e50
|
|
for o in km.obj:
|
|
self.assertGreater(prev, o)
|
|
prev = o
|
|
|
|
km = faiss.Kmeans(d, 64, niter=10)
|
|
err64 = km.train(x)
|
|
|
|
# check that 64 centroids give a lower quantization error than 32
|
|
self.assertGreater(err32, err64)
|
|
|
|
km = faiss.Kmeans(d, 32, niter=10, int_centroids=True)
|
|
err_int = km.train(x)
|
|
|
|
# check that integer centoids are not as good as float ones
|
|
self.assertGreater(err_int, err32)
|
|
self.assertTrue(np.all(km.centroids == np.floor(km.centroids)))
|
|
|
|
|
|
def test_nasty_clustering(self):
|
|
d = 2
|
|
rs = np.random.RandomState(123)
|
|
x = np.zeros((100, d), dtype='float32')
|
|
for i in range(5):
|
|
x[i * 20:i * 20 + 20] = rs.uniform(size=d)
|
|
|
|
# we have 5 distinct points but ask for 10 centroids...
|
|
km = faiss.Kmeans(d, 10, niter=10, verbose=True)
|
|
km.train(x)
|
|
|
|
def test_redo(self):
|
|
d = 64
|
|
n = 1000
|
|
|
|
rs = np.random.RandomState(123)
|
|
x = rs.uniform(size=(n, d)).astype('float32')
|
|
|
|
# make sure that doing 10 redos yields a better objective than just 1
|
|
|
|
clus = faiss.Clustering(d, 20)
|
|
clus.nredo = 1
|
|
clus.train(x, faiss.IndexFlatL2(d))
|
|
obj1 = clus.iteration_stats.at(clus.iteration_stats.size() - 1).obj
|
|
|
|
clus = faiss.Clustering(d, 20)
|
|
clus.nredo = 10
|
|
clus.train(x, faiss.IndexFlatL2(d))
|
|
obj10 = clus.iteration_stats.at(clus.iteration_stats.size() - 1).obj
|
|
|
|
self.assertGreater(obj1, obj10)
|
|
|
|
def test_redo_cosine(self):
|
|
# test redo with cosine distance (inner prod, so objectives are reversed)
|
|
d = 64
|
|
n = 1000
|
|
|
|
rs = np.random.RandomState(123)
|
|
x = rs.uniform(size=(n, d)).astype('float32')
|
|
faiss.normalize_L2(x)
|
|
|
|
# make sure that doing 10 redos yields a better objective than just 1
|
|
# for cosine distance, it is IP so higher is better
|
|
|
|
clus = faiss.Clustering(d, 20)
|
|
clus.nredo = 1
|
|
clus.train(x, faiss.IndexFlatIP(d))
|
|
obj1 = clus.iteration_stats.at(clus.iteration_stats.size() - 1).obj
|
|
|
|
clus = faiss.Clustering(d, 20)
|
|
clus.nredo = 10
|
|
clus.train(x, faiss.IndexFlatIP(d))
|
|
obj10 = clus.iteration_stats.at(clus.iteration_stats.size() - 1).obj
|
|
|
|
self.assertGreater(obj10, obj1)
|
|
|
|
|
|
def test_1ptpercluster(self):
|
|
# https://github.com/facebookresearch/faiss/issues/842
|
|
X = np.random.randint(0, 1, (5, 10)).astype('float32')
|
|
k = 5
|
|
niter = 10
|
|
verbose = True
|
|
kmeans = faiss.Kmeans(X.shape[1], k, niter=niter, verbose=verbose)
|
|
kmeans.train(X)
|
|
l2_distances, I = kmeans.index.search(X, 1)
|
|
|
|
def test_weighted(self):
|
|
d = 32
|
|
sigma = 0.1
|
|
|
|
# Data is naturally clustered in 10 clusters.
|
|
# 5 clusters have 100 points
|
|
# 5 clusters have 10 points
|
|
# run k-means with 5 clusters
|
|
|
|
ccent = faiss.randn((10, d), 123)
|
|
faiss.normalize_L2(ccent)
|
|
x = [ccent[i] + sigma * faiss.randn((100, d), 1234 + i) for i in range(5)]
|
|
x += [ccent[i] + sigma * faiss.randn((10, d), 1234 + i) for i in range(5, 10)]
|
|
x = np.vstack(x)
|
|
|
|
clus = faiss.Clustering(d, 5)
|
|
index = faiss.IndexFlatL2(d)
|
|
clus.train(x, index)
|
|
cdis1, perm1 = index.search(ccent, 1)
|
|
|
|
# distance^2 of ground-truth centroids to clusters
|
|
cdis1_first = cdis1[:5].sum()
|
|
cdis1_last = cdis1[5:].sum()
|
|
|
|
# now assign weight 0.1 to the 5 first clusters and weight 10
|
|
# to the 5 last ones and re-run k-means
|
|
weights = np.ones(100 * 5 + 10 * 5, dtype='float32')
|
|
weights[:100 * 5] = 0.1
|
|
weights[100 * 5:] = 10
|
|
|
|
clus = faiss.Clustering(d, 5)
|
|
index = faiss.IndexFlatL2(d)
|
|
clus.train(x, index, weights=weights)
|
|
cdis2, perm2 = index.search(ccent, 1)
|
|
|
|
# distance^2 of ground-truth centroids to clusters
|
|
cdis2_first = cdis2[:5].sum()
|
|
cdis2_last = cdis2[5:].sum()
|
|
|
|
print(cdis1_first, cdis1_last)
|
|
print(cdis2_first, cdis2_last)
|
|
|
|
# with the new clustering, the last should be much (*2) closer
|
|
# to their centroids
|
|
self.assertGreater(cdis1_last, cdis1_first * 2)
|
|
self.assertGreater(cdis2_first, cdis2_last * 2)
|
|
|
|
def test_encoded(self):
|
|
d = 32
|
|
k = 5
|
|
xt, xb, xq = get_dataset_2(d, 1000, 0, 0)
|
|
|
|
# make sure that training on a compressed then decompressed
|
|
# dataset gives the same result as decompressing on-the-fly
|
|
|
|
codec = faiss.IndexScalarQuantizer(d, faiss.ScalarQuantizer.QT_4bit)
|
|
codec.train(xt)
|
|
codes = codec.sa_encode(xt)
|
|
|
|
xt2 = codec.sa_decode(codes)
|
|
|
|
clus = faiss.Clustering(d, k)
|
|
# clus.verbose = True
|
|
clus.niter = 0
|
|
index = faiss.IndexFlatL2(d)
|
|
clus.train(xt2, index)
|
|
ref_centroids = faiss.vector_to_array(clus.centroids).reshape(-1, d)
|
|
|
|
_, ref_errs = index.search(xt2, 1)
|
|
|
|
clus = faiss.Clustering(d, k)
|
|
# clus.verbose = True
|
|
clus.niter = 0
|
|
clus.decode_block_size = 120
|
|
index = faiss.IndexFlatL2(d)
|
|
clus.train_encoded(codes, codec, index)
|
|
new_centroids = faiss.vector_to_array(clus.centroids).reshape(-1, d)
|
|
|
|
_, new_errs = index.search(xt2, 1)
|
|
|
|
# It's the same operation, so should be bit-exact the same
|
|
self.assertTrue(np.all(ref_centroids == new_centroids))
|
|
|
|
def test_init(self):
|
|
d = 32
|
|
k = 5
|
|
xt, xb, xq = get_dataset_2(d, 1000, 0, 0)
|
|
km = faiss.Kmeans(d, k, niter=4)
|
|
km.train(xt)
|
|
|
|
km2 = faiss.Kmeans(d, k, niter=4)
|
|
km2.train(xt, init_centroids=km.centroids)
|
|
|
|
# check that the initial objective is better for km2 than km
|
|
self.assertGreater(km.obj[0], km2.obj[0] * 1.01)
|
|
|
|
def test_stats(self):
|
|
d = 32
|
|
k = 5
|
|
xt, xb, xq = get_dataset_2(d, 1000, 0, 0)
|
|
km = faiss.Kmeans(d, k, niter=4)
|
|
km.train(xt)
|
|
assert list(km.obj) == [st['obj'] for st in km.iteration_stats]
|
|
|
|
class TestPCA(unittest.TestCase):
|
|
|
|
def test_pca(self):
|
|
d = 64
|
|
n = 1000
|
|
np.random.seed(123)
|
|
x = np.random.random(size=(n, d)).astype('float32')
|
|
|
|
pca = faiss.PCAMatrix(d, 10)
|
|
pca.train(x)
|
|
y = pca.apply_py(x)
|
|
|
|
# check that energy per component is decreasing
|
|
column_norm2 = (y**2).sum(0)
|
|
|
|
prev = 1e50
|
|
for o in column_norm2:
|
|
self.assertGreater(prev, o)
|
|
prev = o
|
|
|
|
|
|
|
|
class TestRevSwigPtr(unittest.TestCase):
|
|
|
|
def test_rev_swig_ptr(self):
|
|
|
|
index = faiss.IndexFlatL2(4)
|
|
xb0 = np.vstack([
|
|
i * 10 + np.array([1, 2, 3, 4], dtype='float32')
|
|
for i in range(5)])
|
|
index.add(xb0)
|
|
xb = faiss.rev_swig_ptr(index.xb.data(), 4 * 5).reshape(5, 4)
|
|
self.assertEqual(np.abs(xb0 - xb).sum(), 0)
|
|
|
|
|
|
class TestException(unittest.TestCase):
|
|
|
|
def test_exception(self):
|
|
|
|
index = faiss.IndexFlatL2(10)
|
|
|
|
a = np.zeros((5, 10), dtype='float32')
|
|
b = np.zeros(5, dtype='int64')
|
|
|
|
# an unsupported operation for IndexFlat
|
|
self.assertRaises(
|
|
RuntimeError,
|
|
index.add_with_ids, a, b
|
|
)
|
|
# assert 'add_with_ids not implemented' in str(e)
|
|
|
|
def test_exception_2(self):
|
|
self.assertRaises(
|
|
RuntimeError,
|
|
faiss.index_factory, 12, 'IVF256,Flat,PQ8'
|
|
)
|
|
# assert 'could not parse' in str(e)
|
|
|
|
|
|
class TestMapLong2Long(unittest.TestCase):
|
|
|
|
def test_maplong2long(self):
|
|
keys = np.array([13, 45, 67], dtype=np.int64)
|
|
vals = np.array([3, 8, 2], dtype=np.int64)
|
|
|
|
m = faiss.MapLong2Long()
|
|
m.add(keys, vals)
|
|
|
|
assert np.all(m.search_multiple(keys) == vals)
|
|
|
|
assert m.search(12343) == -1
|
|
|
|
|
|
class TestOrthognalReconstruct(unittest.TestCase):
|
|
|
|
def test_recons_orthonormal(self):
|
|
lt = faiss.LinearTransform(20, 10, True)
|
|
rs = np.random.RandomState(10)
|
|
A, _ = np.linalg.qr(rs.randn(20, 20))
|
|
A = A[:10].astype('float32')
|
|
faiss.copy_array_to_vector(A.ravel(), lt.A)
|
|
faiss.copy_array_to_vector(rs.randn(10).astype('float32'), lt.b)
|
|
|
|
lt.set_is_orthonormal()
|
|
lt.is_trained = True
|
|
assert lt.is_orthonormal
|
|
|
|
x = rs.rand(30, 20).astype('float32')
|
|
xt = lt.apply_py(x)
|
|
xtt = lt.reverse_transform(xt)
|
|
xttt = lt.apply_py(xtt)
|
|
|
|
err = ((xt - xttt)**2).sum()
|
|
|
|
self.assertGreater(1e-5, err)
|
|
|
|
def test_recons_orthogona_impossible(self):
|
|
lt = faiss.LinearTransform(20, 10, True)
|
|
rs = np.random.RandomState(10)
|
|
A = rs.randn(10 * 20).astype('float32')
|
|
faiss.copy_array_to_vector(A.ravel(), lt.A)
|
|
faiss.copy_array_to_vector(rs.randn(10).astype('float32'), lt.b)
|
|
lt.is_trained = True
|
|
|
|
lt.set_is_orthonormal()
|
|
assert not lt.is_orthonormal
|
|
|
|
x = rs.rand(30, 20).astype('float32')
|
|
xt = lt.apply_py(x)
|
|
try:
|
|
lt.reverse_transform(xt)
|
|
except Exception:
|
|
pass
|
|
else:
|
|
self.assertFalse('should do an exception')
|
|
|
|
|
|
class TestMAdd(unittest.TestCase):
|
|
|
|
def test_1(self):
|
|
# try with dimensions that are multiples of 16 or not
|
|
rs = np.random.RandomState(123)
|
|
swig_ptr = faiss.swig_ptr
|
|
for dim in 16, 32, 20, 25:
|
|
for _repeat in 1, 2, 3, 4, 5:
|
|
a = rs.rand(dim).astype('float32')
|
|
b = rs.rand(dim).astype('float32')
|
|
c = np.zeros(dim, dtype='float32')
|
|
bf = rs.uniform(5.0) - 2.5
|
|
idx = faiss.fvec_madd_and_argmin(
|
|
dim, swig_ptr(a), bf, swig_ptr(b),
|
|
swig_ptr(c))
|
|
ref_c = a + b * bf
|
|
assert np.abs(c - ref_c).max() < 1e-5
|
|
assert idx == ref_c.argmin()
|
|
|
|
|
|
class TestNyFuncs(unittest.TestCase):
|
|
|
|
def test_l2(self):
|
|
rs = np.random.RandomState(123)
|
|
swig_ptr = faiss.swig_ptr
|
|
for d in 1, 2, 4, 8, 12, 16:
|
|
x = rs.rand(d).astype('float32')
|
|
for ny in 128, 129, 130:
|
|
print("d=%d ny=%d" % (d, ny))
|
|
y = rs.rand(ny, d).astype('float32')
|
|
ref = ((x - y) ** 2).sum(1)
|
|
new = np.zeros(ny, dtype='float32')
|
|
faiss.fvec_L2sqr_ny(swig_ptr(new), swig_ptr(x),
|
|
swig_ptr(y), d, ny)
|
|
assert np.abs(ref - new).max() < 1e-4
|
|
|
|
def test_IP(self):
|
|
# this one is not optimized with SIMD but just in case
|
|
rs = np.random.RandomState(123)
|
|
swig_ptr = faiss.swig_ptr
|
|
for d in 1, 2, 4, 8, 12, 16:
|
|
x = rs.rand(d).astype('float32')
|
|
for ny in 128, 129, 130:
|
|
print("d=%d ny=%d" % (d, ny))
|
|
y = rs.rand(ny, d).astype('float32')
|
|
ref = (x * y).sum(1)
|
|
new = np.zeros(ny, dtype='float32')
|
|
faiss.fvec_inner_products_ny(
|
|
swig_ptr(new), swig_ptr(x), swig_ptr(y), d, ny)
|
|
assert np.abs(ref - new).max() < 1e-4
|
|
|
|
|
|
class TestMatrixStats(unittest.TestCase):
|
|
|
|
def test_0s(self):
|
|
rs = np.random.RandomState(123)
|
|
m = rs.rand(40, 20).astype('float32')
|
|
m[5:10] = 0
|
|
comments = faiss.MatrixStats(m).comments
|
|
print(comments)
|
|
assert 'has 5 copies' in comments
|
|
assert '5 null vectors' in comments
|
|
|
|
def test_copies(self):
|
|
rs = np.random.RandomState(123)
|
|
m = rs.rand(40, 20).astype('float32')
|
|
m[::2] = m[1::2]
|
|
comments = faiss.MatrixStats(m).comments
|
|
print(comments)
|
|
assert '20 vectors are distinct' in comments
|
|
|
|
def test_dead_dims(self):
|
|
rs = np.random.RandomState(123)
|
|
m = rs.rand(40, 20).astype('float32')
|
|
m[:, 5:10] = 0
|
|
comments = faiss.MatrixStats(m).comments
|
|
print(comments)
|
|
assert '5 dimensions are constant' in comments
|
|
|
|
def test_rogue_means(self):
|
|
rs = np.random.RandomState(123)
|
|
m = rs.rand(40, 20).astype('float32')
|
|
m[:, 5:10] += 12345
|
|
comments = faiss.MatrixStats(m).comments
|
|
print(comments)
|
|
assert '5 dimensions are too large wrt. their variance' in comments
|
|
|
|
def test_normalized(self):
|
|
rs = np.random.RandomState(123)
|
|
m = rs.rand(40, 20).astype('float32')
|
|
faiss.normalize_L2(m)
|
|
comments = faiss.MatrixStats(m).comments
|
|
print(comments)
|
|
assert 'vectors are normalized' in comments
|
|
|
|
|
|
class TestScalarQuantizer(unittest.TestCase):
|
|
|
|
def test_8bit_equiv(self):
|
|
rs = np.random.RandomState(123)
|
|
for _it in range(20):
|
|
for d in 13, 16, 24:
|
|
x = np.floor(rs.rand(5, d) * 256).astype('float32')
|
|
x[0] = 0
|
|
x[1] = 255
|
|
|
|
# make sure to test extreme cases
|
|
x[2, 0] = 0
|
|
x[3, 0] = 255
|
|
x[2, 1] = 255
|
|
x[3, 1] = 0
|
|
|
|
ref_index = faiss.IndexScalarQuantizer(
|
|
d, faiss.ScalarQuantizer.QT_8bit)
|
|
ref_index.train(x[:2])
|
|
ref_index.add(x[2:3])
|
|
|
|
index = faiss.IndexScalarQuantizer(
|
|
d, faiss.ScalarQuantizer.QT_8bit_direct)
|
|
assert index.is_trained
|
|
index.add(x[2:3])
|
|
|
|
assert np.all(
|
|
faiss.vector_to_array(ref_index.codes) ==
|
|
faiss.vector_to_array(index.codes))
|
|
|
|
# Note that distances are not the same because ref_index
|
|
# reconstructs x as x + 0.5
|
|
D, I = index.search(x[3:], 1)
|
|
|
|
# assert D[0, 0] == Dref[0, 0]
|
|
# print(D[0, 0], ((x[3] - x[2]) ** 2).sum())
|
|
assert D[0, 0] == ((x[3] - x[2]) ** 2).sum()
|
|
|
|
def test_6bit_equiv(self):
|
|
rs = np.random.RandomState(123)
|
|
for d in 3, 6, 8, 16, 36:
|
|
trainset = np.zeros((2, d), dtype='float32')
|
|
trainset[0, :] = 0
|
|
trainset[0, :] = 63
|
|
|
|
index = faiss.IndexScalarQuantizer(
|
|
d, faiss.ScalarQuantizer.QT_6bit)
|
|
index.train(trainset)
|
|
|
|
print('cs=', index.code_size)
|
|
|
|
x = rs.randint(64, size=(100, d)).astype('float32')
|
|
|
|
# verify encoder / decoder
|
|
index.add(x)
|
|
x2 = index.reconstruct_n(0, x.shape[0])
|
|
assert np.all(x == x2 - 0.5)
|
|
|
|
# verify AVX decoder (used only for search)
|
|
y = 63 * rs.rand(20, d).astype('float32')
|
|
|
|
D, I = index.search(y, 10)
|
|
for i in range(20):
|
|
for j in range(10):
|
|
dis = ((y[i] - x2[I[i, j]]) ** 2).sum()
|
|
# print(dis, D[i, j])
|
|
assert abs(D[i, j] - dis) / dis < 1e-5
|
|
|
|
class TestRandom(unittest.TestCase):
|
|
|
|
def test_rand(self):
|
|
x = faiss.rand(2000)
|
|
assert np.all(x >= 0) and np.all(x < 1)
|
|
h, _ = np.histogram(x, np.arange(0, 1, 0.1))
|
|
assert h.min() > 160 and h.max() < 240
|
|
|
|
def test_randint(self):
|
|
x = faiss.randint(20000, vmax=100)
|
|
assert np.all(x >= 0) and np.all(x < 100)
|
|
c = np.bincount(x, minlength=100)
|
|
print(c)
|
|
assert c.max() - c.min() < 50 * 2
|
|
|
|
|
|
class TestPairwiseDis(unittest.TestCase):
|
|
|
|
def test_L2(self):
|
|
swig_ptr = faiss.swig_ptr
|
|
x = faiss.rand((100, 10), seed=1)
|
|
y = faiss.rand((200, 10), seed=2)
|
|
ix = faiss.randint(50, vmax=100)
|
|
iy = faiss.randint(50, vmax=200)
|
|
dis = np.empty(50, dtype='float32')
|
|
faiss.pairwise_indexed_L2sqr(
|
|
10, 50,
|
|
swig_ptr(x), swig_ptr(ix),
|
|
swig_ptr(y), swig_ptr(iy),
|
|
swig_ptr(dis))
|
|
|
|
for i in range(50):
|
|
assert np.allclose(
|
|
dis[i], ((x[ix[i]] - y[iy[i]]) ** 2).sum())
|
|
|
|
def test_IP(self):
|
|
swig_ptr = faiss.swig_ptr
|
|
x = faiss.rand((100, 10), seed=1)
|
|
y = faiss.rand((200, 10), seed=2)
|
|
ix = faiss.randint(50, vmax=100)
|
|
iy = faiss.randint(50, vmax=200)
|
|
dis = np.empty(50, dtype='float32')
|
|
faiss.pairwise_indexed_inner_product(
|
|
10, 50,
|
|
swig_ptr(x), swig_ptr(ix),
|
|
swig_ptr(y), swig_ptr(iy),
|
|
swig_ptr(dis))
|
|
|
|
for i in range(50):
|
|
assert np.allclose(
|
|
dis[i], np.dot(x[ix[i]], y[iy[i]]))
|
|
|
|
|
|
class TestSWIGWrap(unittest.TestCase):
|
|
""" various regressions with the SWIG wrapper """
|
|
|
|
def test_size_t_ptr(self):
|
|
# issue 1064
|
|
index = faiss.IndexHNSWFlat(10, 32)
|
|
|
|
hnsw = index.hnsw
|
|
index.add(np.random.rand(100, 10).astype('float32'))
|
|
be = np.empty(2, 'uint64')
|
|
hnsw.neighbor_range(23, 0, faiss.swig_ptr(be), faiss.swig_ptr(be[1:]))
|
|
|
|
def test_id_map_at(self):
|
|
# issue 1020
|
|
n_features = 100
|
|
feature_dims = 10
|
|
|
|
features = np.random.random((n_features, feature_dims)).astype(np.float32)
|
|
idx = np.arange(n_features).astype(np.int64)
|
|
|
|
index = faiss.IndexFlatL2(feature_dims)
|
|
index = faiss.IndexIDMap2(index)
|
|
index.add_with_ids(features, idx)
|
|
|
|
[index.id_map.at(int(i)) for i in range(index.ntotal)]
|
|
|
|
def test_downcast_Refine(self):
|
|
|
|
index = faiss.IndexRefineFlat(
|
|
faiss.IndexScalarQuantizer(10, faiss.ScalarQuantizer.QT_8bit)
|
|
)
|
|
|
|
# serialize and deserialize
|
|
index2 = faiss.deserialize_index(
|
|
faiss.serialize_index(index)
|
|
)
|
|
|
|
assert isinstance(index2, faiss.IndexRefineFlat)
|
|
|
|
def do_test_array_type(self, dtype):
|
|
""" tests swig_ptr and rev_swig_ptr for this type of array """
|
|
a = np.arange(12).astype(dtype)
|
|
ptr = faiss.swig_ptr(a)
|
|
print(ptr)
|
|
a2 = faiss.rev_swig_ptr(ptr, 12)
|
|
np.testing.assert_array_equal(a, a2)
|
|
|
|
def test_all_array_types(self):
|
|
self.do_test_array_type('float32')
|
|
self.do_test_array_type('float64')
|
|
self.do_test_array_type('int8')
|
|
self.do_test_array_type('uint8')
|
|
self.do_test_array_type('int16')
|
|
self.do_test_array_type('uint16')
|
|
self.do_test_array_type('int32')
|
|
self.do_test_array_type('uint32')
|
|
self.do_test_array_type('int64')
|
|
self.do_test_array_type('uint64')
|
|
|
|
def test_int64(self):
|
|
# see https://github.com/facebookresearch/faiss/issues/1529
|
|
sizeof_long = array.array("l").itemsize
|
|
if sizeof_long == 4:
|
|
v = faiss.LongLongVector()
|
|
elif sizeof_long == 8:
|
|
v = faiss.LongVector()
|
|
else:
|
|
raise AssertionError("weird long size")
|
|
|
|
for i in range(10):
|
|
v.push_back(i)
|
|
a = faiss.vector_to_array(v)
|
|
assert a.dtype == 'int64'
|
|
np.testing.assert_array_equal(a, np.arange(10, dtype='int64'))
|
|
|
|
# check if it works in an IDMap
|
|
idx = faiss.IndexIDMap(faiss.IndexFlatL2(32))
|
|
idx.add_with_ids(
|
|
np.random.rand(10, 32).astype('float32'),
|
|
np.random.randint(1000, size=10, dtype='int64')
|
|
)
|
|
faiss.vector_to_array(idx.id_map)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|