Add manifold check for size 0 (#1867)

Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/1867

Merging code for the 1T photodna index seems to fail at

https://www.internalfb.com/phabricator/paste/view/P412975011?lines=174

with
```
terminate called after throwing an instance of 'facebook::manifold::blobstore::StorageException'
  what():  [400] Begin offset and/or length were invalid -- Begin offset must be positive and length must be non-negative. Received: offset = 2642410612, length = 0
Aborted (core dumped)
```
traces back to

https://www.internalfb.com/intern/diffusion/FBS/browsefile/master/fbcode/manifold/blobstore/BlobstoreThriftHandler.cpp?lines=671%2C700%2C732

There is a single case where we don't check if the read or write size is 0. So let's try this fix.

In the process I realized that the Manifold tests were non functional due to a name collision on common.py. Also fix this in all dependent files.

Differential Revision: D28231710

fbshipit-source-id: 700ffa6ca0c82c49e7d1eae9e76549ec5ff16332
pull/1878/head^2
Matthijs Douze 2021-05-09 22:29:28 -07:00 committed by Facebook GitHub Bot
parent 441ccebbff
commit 2d380e992b
17 changed files with 17 additions and 144 deletions

View File

@ -174,8 +174,9 @@ jobs:
command: |
docker build -t faiss -f .circleci/Dockerfile.faiss_gpu .
docker run --gpus all faiss make -C build test
docker run --gpus all faiss sh -c '(cd build/faiss/python; python3 setup.py install) && python3 -m unittest discover -s faiss/gpu/test -p "test_*"'
docker run --gpus all faiss sh -c '(cd build/faiss/python; python3 setup.py install) && python3 -m unittest discover -s faiss/gpu/test -p "torch_*.py"'
docker run --gpus all faiss sh -c '(pwd; find)'
docker run --gpus all faiss sh -c '(cd build/faiss/python; python3 setup.py install) && cp tests/common_faiss_tests.py faiss/gpu/test && python3 -m unittest discover -s faiss/gpu/test -p "test_*"'
docker run --gpus all faiss sh -c '(cd build/faiss/python; python3 setup.py install) && cp tests/common_faiss_tests.py faiss/gpu/test && python3 -m unittest discover -s faiss/gpu/test -p "torch_*.py"'
no_output_timeout: 60m
deploy_linux:

View File

@ -12,7 +12,7 @@ from faiss.contrib.exhaustive_search import knn_ground_truth, range_ground_truth
from faiss.contrib import evaluation
from common import get_dataset_2
from common_faiss_tests import get_dataset_2
class TestComputeGT(unittest.TestCase):

View File

@ -8,7 +8,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import unittest
import numpy as np
import faiss
from common import get_dataset_2
from common_faiss_tests import get_dataset_2
class ReferencedObject(unittest.TestCase):

View File

@ -1,128 +0,0 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# a few common functions for the tests
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import faiss
# reduce number of threads to avoid excessive nb of threads in opt
# mode (recuces runtime from 100s to 4s!)
faiss.omp_set_num_threads(4)
def random_unitary(n, d, seed):
x = faiss.randn(n * d, seed).reshape(n, d)
faiss.normalize_L2(x)
return x
class Randu10k:
def __init__(self):
self.nb = 10000
self.nq = 1000
self.nt = 10000
self.d = 128
self.xb = random_unitary(self.nb, self.d, 1)
self.xt = random_unitary(self.nt, self.d, 2)
self.xq = random_unitary(self.nq, self.d, 3)
dotprods = np.dot(self.xq, self.xb.T)
self.gt = dotprods.argmax(1)
self.k = 100
def launch(self, name, index):
if not index.is_trained:
index.train(self.xt)
index.add(self.xb)
return index.search(self.xq, self.k)
def evalres(self, DI):
D, I = DI
e = {}
for rank in 1, 10, 100:
e[rank] = ((I[:, :rank] == self.gt.reshape(-1, 1)).sum() /
float(self.nq))
print("1-recalls: %s" % e)
return e
class Randu10kUnbalanced(Randu10k):
def __init__(self):
Randu10k.__init__(self)
weights = 0.95 ** np.arange(self.d)
rs = np.random.RandomState(123)
weights = weights[rs.permutation(self.d)]
self.xb *= weights
self.xb /= np.linalg.norm(self.xb, axis=1)[:, np.newaxis]
self.xq *= weights
self.xq /= np.linalg.norm(self.xq, axis=1)[:, np.newaxis]
self.xt *= weights
self.xt /= np.linalg.norm(self.xt, axis=1)[:, np.newaxis]
dotprods = np.dot(self.xq, self.xb.T)
self.gt = dotprods.argmax(1)
self.k = 100
def get_dataset(d, nb, nt, nq):
rs = np.random.RandomState(123)
xb = rs.rand(nb, d).astype('float32')
xt = rs.rand(nt, d).astype('float32')
xq = rs.rand(nq, d).astype('float32')
return (xt, xb, xq)
def get_dataset_2(d, nt, nb, nq):
"""A dataset that is not completely random but still challenging to
index
"""
d1 = 10 # intrinsic dimension (more or less)
n = nb + nt + nq
rs = np.random.RandomState(1338)
x = rs.normal(size=(n, d1))
x = np.dot(x, rs.rand(d1, d))
# now we have a d1-dim ellipsoid in d-dimensional space
# higher factor (>4) -> higher frequency -> less linear
x = x * (rs.rand(d) * 4 + 0.1)
x = np.sin(x)
x = x.astype('float32')
return x[:nt], x[nt:nt + nb], x[nt + nb:]
def make_binary_dataset(d, nt, nb, nq):
assert d % 8 == 0
rs = np.random.RandomState(123)
x = rs.randint(256, size=(nb + nq + nt, int(d / 8))).astype('uint8')
return x[:nt], x[nt:-nq], x[-nq:]
def compare_binary_result_lists(D1, I1, D2, I2):
"""comparing result lists is difficult because there are many
ties. Here we sort by (distance, index) pairs and ignore the largest
distance of each result. Compatible result lists should pass this."""
assert D1.shape == I1.shape == D2.shape == I2.shape
n, k = D1.shape
ndiff = (D1 != D2).sum()
assert ndiff == 0, '%d differences in distance matrix %s' % (
ndiff, D1.shape)
def normalize_DI(D, I):
norm = I.max() + 1.0
Dr = D.astype('float64') + I / norm
# ignore -1s and elements on last column
Dr[I1 == -1] = 1e20
Dr[D == D[:, -1:]] = 1e20
Dr.sort(axis=1)
return Dr
ndiff = (normalize_DI(D1, I1) != normalize_DI(D2, I2)).sum()
assert ndiff == 0, '%d differences in normalized D matrix' % ndiff

View File

@ -7,7 +7,7 @@ import unittest
import numpy as np
import faiss
from common import make_binary_dataset
from common_faiss_tests import make_binary_dataset
def bitvec_shuffle(a, order):

View File

@ -11,7 +11,7 @@ import faiss
import unittest
import array
from common import get_dataset_2
from common_faiss_tests import get_dataset_2

View File

@ -11,7 +11,7 @@ import faiss
import unittest
import array
from common import get_dataset_2
from common_faiss_tests import get_dataset_2
class TestClustering(unittest.TestCase):

View File

@ -13,7 +13,7 @@ from faiss.contrib import inspect_tools
from faiss.contrib import evaluation
from faiss.contrib import ivf_tools
from common import get_dataset_2
from common_faiss_tests import get_dataset_2
try:
from faiss.contrib.exhaustive_search import knn_ground_truth, knn, range_ground_truth
from faiss.contrib.exhaustive_search import range_search_max_results

View File

@ -10,7 +10,7 @@ import numpy as np
import faiss
import unittest
from common import get_dataset_2
from common_faiss_tests import get_dataset_2
import scipy.spatial.distance

View File

@ -15,7 +15,7 @@ import os
import re
import warnings
from common import get_dataset, get_dataset_2
from common_faiss_tests import get_dataset, get_dataset_2
class TestModuleInterface(unittest.TestCase):

View File

@ -11,7 +11,7 @@ import numpy as np
import unittest
import faiss
from common import Randu10k, get_dataset_2, Randu10kUnbalanced
from common_faiss_tests import Randu10k, get_dataset_2, Randu10kUnbalanced
ev = Randu10k()

View File

@ -11,7 +11,7 @@ import numpy as np
import unittest
import faiss
from common import compare_binary_result_lists, make_binary_dataset
from common_faiss_tests import compare_binary_result_lists, make_binary_dataset

View File

@ -14,7 +14,7 @@ import shutil
import tempfile
import platform
from common import get_dataset_2
from common_faiss_tests import get_dataset_2
class TestRemove(unittest.TestCase):

View File

@ -13,7 +13,7 @@ import sys
import pickle
from multiprocessing.dummy import Pool as ThreadPool
from common import get_dataset, get_dataset_2
from common_faiss_tests import get_dataset, get_dataset_2
class TestIOVariants(unittest.TestCase):

View File

@ -12,7 +12,7 @@ import numpy as np
import faiss
import unittest
from common import Randu10k
from common_faiss_tests import Randu10k
ru = Randu10k()

View File

@ -12,7 +12,7 @@ import faiss
import tempfile
import os
from common import get_dataset_2
from common_faiss_tests import get_dataset_2
class TestEncodeDecode(unittest.TestCase):