faiss/tests/test_contrib_with_scipy.py
Michael Norris eff0898a13 Enable linting: lint config changes plus arc lint command (#3966)
Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3966

This actually enables the linting.

Manual changes:
- tools/arcanist/lint/fbsource-licenselint-config.toml
- tools/arcanist/lint/fbsource-lint-engine.toml

Automated changes:
`arc lint --apply-patches --take LICENSELINT --paths-cmd 'hg files faiss'`

Reviewed By: asadoughi

Differential Revision: D64484165

fbshipit-source-id: 4f2f6e953c94ef6ebfea8a5ae035ccfbea65ed04
2024-10-22 09:46:48 -07:00

68 lines
2.0 KiB
Python

# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import faiss
import unittest
import numpy as np
from faiss.contrib import datasets
from faiss.contrib import clustering
import scipy.sparse
# this test is not in test_contrib because it depends on scipy
class TestClustering(unittest.TestCase):
def test_sparse_routines(self):
""" the sparse assignment routine """
ds = datasets.SyntheticDataset(1000, 2000, 0, 200)
xt = ds.get_train().copy()
faiss.normalize_L2(xt)
mask = np.abs(xt) > 0.045
xt[np.logical_not(mask)] = 0
centroids = ds.get_queries()
assert len(centroids) == 200
xsparse = scipy.sparse.csr_matrix(xt)
Dref, Iref = faiss.knn(xsparse.todense(), centroids, 1)
D, I = clustering.sparse_assign_to_dense(xsparse, centroids)
np.testing.assert_array_equal(Iref.ravel(), I)
np.testing.assert_array_almost_equal(Dref.ravel(), D, decimal=3)
D, I = clustering.sparse_assign_to_dense_blocks(
xsparse, centroids, qbs=123, bbs=33, nt=4)
np.testing.assert_array_equal(Iref.ravel(), I)
np.testing.assert_array_almost_equal(Dref.ravel(), D, decimal=3)
def test_sparse_kmeans(self):
""" demo on how to cluster sparse data into dense clusters """
ds = datasets.SyntheticDataset(1000, 1500, 0, 0)
xt = ds.get_train().copy()
faiss.normalize_L2(xt)
mask = np.abs(xt) > 0.045
xt[np.logical_not(mask)] = 0
km = faiss.Kmeans(ds.d, 50)
km.train(xt)
ref_err = km.iteration_stats[-1]["obj"]
xsparse = scipy.sparse.csr_matrix(xt)
centroids, iteration_stats = clustering.kmeans(
50, clustering.DatasetAssignSparse(xsparse), return_stats=True)
new_err = iteration_stats[-1]["obj"]
self.assertLess(new_err, ref_err * 1.1)