# Copyright (c) 2015-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD+Patents license found in the
# LICENSE file in the root directory of this source tree.

#! /usr/bin/env python2

# translation of test_knn.lua

import numpy as np
import unittest
import faiss

from common import Randu10k, get_dataset_2, Randu10kUnbalanced

ev = Randu10k()

d = ev.d

# Parameters inverted indexes
ncentroids = int(4 * np.sqrt(ev.nb))
kprobe = int(np.sqrt(ncentroids))

# Parameters for LSH
nbits = d

# Parameters for indexes involving PQ
M = int(d / 8)           # for PQ: #subquantizers
nbits_per_index = 8      # for PQ


class IndexAccuracy(unittest.TestCase):

    def test_IndexFlatIP(self):
        q = faiss.IndexFlatIP(d)  # Ask inner product
        res = ev.launch('FLAT / IP', q)
        e = ev.evalres(res)
        assert e[1] == 1.0

    def test_IndexFlatL2(self):
        q = faiss.IndexFlatL2(d)
        res = ev.launch('FLAT / L2', q)
        e = ev.evalres(res)
        assert e[1] == 1.0

    def test_ivf_kmeans(self):
        ivfk = faiss.IndexIVFFlat(faiss.IndexFlatL2(d), d, ncentroids)
        ivfk.nprobe = kprobe
        res = ev.launch('IVF K-means', ivfk)
        e = ev.evalres(res)
        # should give 0.260  0.260  0.260
        assert e[1] > 0.2

    def test_indexLSH(self):
        q = faiss.IndexLSH(d, nbits)
        res = ev.launch('FLAT / LSH Cosine', q)
        e = ev.evalres(res)
        # should give 0.070  0.250  0.580
        assert e[10] > 0.2

    def test_IndexLSH_32_48(self):
        # CHECK: the difference between 32 and 48 does not make much sense
        for nbits2 in 32, 48:
            q = faiss.IndexLSH(d, nbits2)
            res = ev.launch('LSH half size', q)
            e = ev.evalres(res)
            # should give 0.003  0.019  0.108
            assert e[10] > 0.018

    def test_IndexPQ(self):
        q = faiss.IndexPQ(d, M, nbits_per_index)
        res = ev.launch('FLAT / PQ L2', q)
        e = ev.evalres(res)
        # should give 0.070  0.230  0.260
        assert e[10] > 0.2

    # Approximate search module: PQ with inner product distance
    def test_IndexPQ_ip(self):
        q = faiss.IndexPQ(d, M, nbits_per_index, faiss.METRIC_INNER_PRODUCT)
        res = ev.launch('FLAT / PQ IP', q)
        e = ev.evalres(res)
        # should give 0.070  0.230  0.260
        #(same result as regular PQ on normalized distances)
        assert e[10] > 0.2

    def test_IndexIVFPQ(self):
        ivfpq = faiss.IndexIVFPQ(faiss.IndexFlatL2(d), d, ncentroids, M, 8)
        ivfpq.nprobe = kprobe
        res = ev.launch('IVF PQ', ivfpq)
        e = ev.evalres(res)
        # should give 0.070  0.230  0.260
        assert e[10] > 0.2

    # TODO: translate evaluation of nested

    # Approximate search: PQ with full vector refinement
    def test_IndexPQ_refined(self):
        q = faiss.IndexPQ(d, M, nbits_per_index)
        res = ev.launch('PQ non-refined', q)
        e = ev.evalres(res)
        q.reset()

        rq = faiss.IndexRefineFlat(q)
        res = ev.launch('PQ refined', rq)
        e2 = ev.evalres(res)
        assert e2[10] >= e[10]
        rq.k_factor = 4

        res = ev.launch('PQ refined*4', rq)
        e3 = ev.evalres(res)
        assert e3[10] >= e2[10]

    def test_polysemous(self):
        index = faiss.IndexPQ(d, M, nbits_per_index)
        index.do_polysemous_training = True
        # reduce nb iterations to speed up training for the test
        index.polysemous_training.n_iter = 50000
        index.polysemous_training.n_redo = 1
        res = ev.launch('normal PQ', index)
        e_baseline = ev.evalres(res)
        index.search_type = faiss.IndexPQ.ST_polysemous

        index.polysemous_ht = int(M / 16. * 58)

        stats = faiss.cvar.indexPQ_stats
        stats.reset()

        res = ev.launch('Polysemous ht=%d' % index.polysemous_ht,
                        index)
        e_polysemous = ev.evalres(res)
        print(e_baseline, e_polysemous,  index.polysemous_ht)
        print(stats.n_hamming_pass, stats.ncode)
        # The randu dataset is difficult, so we are not too picky on
        # the results. Here we assert that we have < 10 % loss when
        # computing full PQ on fewer than 20% of the data.
        assert stats.n_hamming_pass < stats.ncode / 5
        # Test disabled because difference is 0.17 on aarch64
        # TODO check why???
        # assert e_polysemous[10] > e_baseline[10] - 0.1

    def test_ScalarQuantizer(self):
        quantizer = faiss.IndexFlatL2(d)
        ivfpq = faiss.IndexIVFScalarQuantizer(
            quantizer, d, ncentroids,
            faiss.ScalarQuantizer.QT_8bit)
        ivfpq.nprobe = kprobe
        res = ev.launch('IVF SQ', ivfpq)
        e = ev.evalres(res)
        # should give 0.234  0.236  0.236
        assert e[10] > 0.235


class TestSQFlavors(unittest.TestCase):
    """ tests IP in addition to L2, non multiple of 8 dimensions
    """

    def add2columns(self, x):
        return np.hstack((
            x, np.zeros((x.shape[0], 2), dtype='float32')
        ))

    def subtest_add2col(self, xb, xq, index, qname):
        """Test with 2 additional dimensions to take also the non-SIMD
        codepath. We don't retrain anything but add 2 dims to the
        queries, the centroids and the trained ScalarQuantizer.
        """
        nb, d = xb.shape

        d2 = d + 2
        xb2 = self.add2columns(xb)
        xq2 = self.add2columns(xq)

        nlist = index.nlist
        quantizer = faiss.downcast_index(index.quantizer)
        quantizer2 = faiss.IndexFlat(d2, index.metric_type)
        centroids = faiss.vector_to_array(quantizer.xb).reshape(nlist, d)
        centroids2 = self.add2columns(centroids)
        quantizer2.add(centroids2)
        index2 = faiss.IndexIVFScalarQuantizer(
            quantizer2, d2, index.nlist, index.sq.qtype,
            index.metric_type)
        index2.nprobe = 4
        if qname in ('8bit', '4bit'):
            trained = faiss.vector_to_array(index.sq.trained).reshape(2, -1)
            nt = trained.shape[1]
            # 2 lines: vmins and vdiffs
            new_nt = int(nt * d2 / d)
            trained2 = np.hstack((
                trained,
                np.zeros((2, new_nt - nt), dtype='float32')
            ))
            trained2[1, nt:] = 1.0   # set vdiff to 1 to avoid div by 0
            faiss.copy_array_to_vector(trained2.ravel(), index2.sq.trained)
        else:
            index2.sq.trained = index.sq.trained

        index2.is_trained = True
        index2.add(xb2)
        return index2.search(xq2, 10)

    # run on Sept 6, 2018 with nprobe=1
    ref_results_xx = {
        (1, '8bit'): 387,
        (1, '4bit'): 216,
        (1, '8bit_uniform'): 387,
        (1, '4bit_uniform'): 216,
        (1, 'fp16'): 387,
        (0, '8bit'): 364,
        (0, '4bit'): 187,
        (0, '8bit_uniform'): 364,
        (0, '4bit_uniform'): 186,
        (0, 'fp16'): 364,
    }

    # run on Sept 18, 2018 with nprobe=4 + 4 bit bugfix
    ref_results = {
        (0, '8bit'): 984,
        (0, '4bit'): 978,
        (0, '8bit_uniform'): 985,
        (0, '4bit_uniform'): 979,
        (0, 'fp16'): 985,
        (1, '8bit'): 979,
        (1, '4bit'): 973,
        (1, '8bit_uniform'): 979,
        (1, '4bit_uniform'): 972,
        (1, 'fp16'): 979,
    }


    def subtest(self, mt):
        d = 32
        xt, xb, xq = get_dataset_2(d, 1000, 2000, 200)
        nlist = 64

        gt_index = faiss.IndexFlat(d, mt)
        gt_index.add(xb)
        gt_D, gt_I = gt_index.search(xq, 10)
        quantizer = faiss.IndexFlat(d, mt)
        for qname in '8bit 4bit 8bit_uniform 4bit_uniform fp16'.split():
            qtype = getattr(faiss.ScalarQuantizer, 'QT_' + qname)
            index = faiss.IndexIVFScalarQuantizer(
                quantizer, d, nlist, qtype, mt)
            index.train(xt)
            index.add(xb)
            index.nprobe = 4   # hopefully more robust than 1
            D, I = index.search(xq, 10)
            ninter = faiss.eval_intersection(I, gt_I)
            print('(%d, %s): %d, ' % (mt, repr(qname), ninter))
            assert abs(ninter - self.ref_results[(mt, qname)]) <= 9

            D2, I2 = self.subtest_add2col(xb, xq, index, qname)

            assert np.all(I2 == I)


    def test_SQ_IP(self):
        self.subtest(faiss.METRIC_INNER_PRODUCT)

    def test_SQ_L2(self):
        self.subtest(faiss.METRIC_L2)


class TestPQFlavors(unittest.TestCase):

    # run on Dec 14, 2018
    ref_results = {
        (1, True): 800,
        (1, True, 20): 794,
        (1, False): 769,
        (0, True): 831,
        (0, True, 20): 828,
        (0, False): 829,
    }

    def test_IVFPQ_IP(self):
        self.subtest(faiss.METRIC_INNER_PRODUCT)

    def test_IVFPQ_L2(self):
        self.subtest(faiss.METRIC_L2)

    def subtest(self, mt):
        d = 32
        xt, xb, xq = get_dataset_2(d, 1000, 2000, 200)
        nlist = 64

        gt_index = faiss.IndexFlat(d, mt)
        gt_index.add(xb)
        gt_D, gt_I = gt_index.search(xq, 10)
        quantizer = faiss.IndexFlat(d, mt)
        for by_residual in True, False:

            index = faiss.IndexIVFPQ(
                quantizer, d, nlist, 4, 8)
            index.metric_type = mt
            index.by_residual = by_residual
            if by_residual:
                # perform cheap polysemous training
                index.do_polysemous_training = True
                pt = faiss.PolysemousTraining()
                pt.n_iter = 50000
                pt.n_redo = 1
                index.polysemous_training = pt

            index.train(xt)
            index.add(xb)
            index.nprobe = 4
            D, I = index.search(xq, 10)

            ninter = faiss.eval_intersection(I, gt_I)
            print('(%d, %s): %d, ' % (mt, by_residual, ninter))

            assert abs(ninter - self.ref_results[mt, by_residual]) <= 3

            index.use_precomputed_table = 0
            D2, I2 = index.search(xq, 10)
            assert np.all(I == I2)

            if by_residual:

                index.use_precomputed_table = 1
                index.polysemous_ht = 20
                D, I = index.search(xq, 10)
                ninter = faiss.eval_intersection(I, gt_I)
                print('(%d, %s, %d): %d, ' % (
                    mt, by_residual, index.polysemous_ht, ninter))

                # polysemous behaves bizarrely on ARM
                assert (ninter >= self.ref_results[
                    mt, by_residual, index.polysemous_ht] - 4)


class TestFlat1D(unittest.TestCase):

    def test_flat_1d(self):
        rs = np.random.RandomState(123545)
        k = 10
        xb = rs.uniform(size=(100, 1)).astype('float32')
        # make sure to test below and above
        xq = rs.uniform(size=(1000, 1)).astype('float32') * 1.1 - 0.05

        ref = faiss.IndexFlatL2(1)
        ref.add(xb)
        ref_D, ref_I = ref.search(xq, k)

        new = faiss.IndexFlat1D()
        new.add(xb)

        new_D, new_I = new.search(xq, 10)

        ndiff = (np.abs(ref_I - new_I) != 0).sum()

        assert(ndiff < 100)
        new_D = new_D ** 2
        max_diff_D = np.abs(ref_D - new_D).max()
        assert(max_diff_D < 1e-5)


class OPQRelativeAccuracy(unittest.TestCase):
    # translated from test_opq.lua

    def test_OPQ(self):

        M = 4

        ev = Randu10kUnbalanced()
        d = ev.d
        index = faiss.IndexPQ(d, M, 8)

        res = ev.launch('PQ', index)
        e_pq = ev.evalres(res)

        index_pq = faiss.IndexPQ(d, M, 8)
        opq_matrix = faiss.OPQMatrix(d, M)
        # opq_matrix.verbose = true
        opq_matrix.niter = 10
        opq_matrix.niter_pq = 4
        index = faiss.IndexPreTransform(opq_matrix, index_pq)

        res = ev.launch('OPQ', index)
        e_opq = ev.evalres(res)

        print('e_pq=%s' % e_pq)
        print('e_opq=%s' % e_opq)

        # verify that OPQ better than PQ
        assert(e_opq[10] > e_pq[10])

    def test_OIVFPQ(self):
        # Parameters inverted indexes
        ncentroids = 50
        M = 4

        ev = Randu10kUnbalanced()
        d = ev.d
        quantizer = faiss.IndexFlatL2(d)
        index = faiss.IndexIVFPQ(quantizer, d, ncentroids, M, 8)
        index.nprobe = 5

        res = ev.launch('IVFPQ', index)
        e_ivfpq = ev.evalres(res)

        index_ivfpq = faiss.IndexIVFPQ(quantizer, d, ncentroids, M, 8)
        index_ivfpq.nprobe = 5
        opq_matrix = faiss.OPQMatrix(d, M)
        opq_matrix.niter = 10
        index = faiss.IndexPreTransform(opq_matrix, index_ivfpq)

        res = ev.launch('O+IVFPQ', index)
        e_oivfpq = ev.evalres(res)

        # TODO(beauby): Fix and re-enable.
        # verify same on OIVFPQ
        # assert(e_oivfpq[1] > e_ivfpq[1])


if __name__ == '__main__':
    unittest.main()