Automatic type conversions for Python API (#2274)

Summary: Pull Request resolved: https://github.com/facebookresearch/faiss/pull/2274 All input matrices needed to be of the correct type and to be C-contiguous. This diff passes the main entry points of the api through `np.ascontiguousarray` so that the function parameters are transparently converted to the suitable format if needed. We did not have this before because users need to be made aware of the performance impact, but it seems that maybe usability is more useful. This diff is an alternative to D35007365 https://github.com/facebookresearch/faiss/pull/2250 Reviewed By: beauby Differential Revision: D35009612 fbshipit-source-id: fa0d5cfdfbff6b0916d47bd33c620e3ca9d5dd40
2022-03-30 05:42:08 -07:00 · 2022-03-30 05:42:08 -07:00 · 1806c6af27
parent b32abc95c2
commit 1806c6af27
2 changed files with 72 additions and 8 deletions
--- a/faiss/python/init.py
+++ b/faiss/python/init.py
@ -28,6 +28,16 @@ __version__ = "%d.%d.%d" % (FAISS_VERSION_MAJOR,
 # The C++ version of the classnames will be suffixed with _c
 ##################################################################

+# For most arrays we force the convesion to the target type with
+# np.ascontiguousarray, but for uint8 codes, we raise a type error
+# because it is unclear how the conversion should occur: with a view
+# (= cast) or conversion?
+def _check_dtype_uint8(codes):
+    if codes.dtype != 'uint8':
+        raise TypeError("Input argument %s must be ndarray of dtype "
+                " uint8, but found %s" % ("x", x.dtype))
+    return np.ascontiguousarray(codes)
+

 def replace_method(the_class, name, replacement, ignore_missing=False):
    """ Replaces a method in a class with another version. The old method
@ -60,8 +70,10 @@ def handle_Clustering():
            average to obtain the centroid (default is 1 for all training vectors).
        """
        n, d = x.shape
+        x = np.ascontiguousarray(x, dtype='float32')
        assert d == self.d
        if weights is not None:
+            weights = np.ascontiguousarray(weights, dtype='float32')
            assert weights.shape == (n, )
            self.train_c(n, swig_ptr(x), index, swig_ptr(weights))
        else:
@ -84,9 +96,11 @@ def handle_Clustering():
            average to obtain the centroid (default is 1 for all training vectors).
        """
        n, d = x.shape
+        x = _check_dtype_uint8(x)
        assert d == codec.sa_code_size()
        assert codec.d == index.d
        if weights is not None:
+            weights = np.ascontiguousarray(weights, dtype='float32')
            assert weights.shape == (n, )
            self.train_encoded_c(n, swig_ptr(x), codec, index, swig_ptr(weights))
        else:
@ -110,6 +124,7 @@ def handle_Clustering1D():
            Training vectors, shape (n, 1). `dtype` must be float32.
        """
        n, d = x.shape
+        x = np.ascontiguousarray(x, dtype='float32')
        assert d == self.d
        self.train_exact_c(n, swig_ptr(x))

@ -130,6 +145,7 @@ def handle_Quantizer(the_class):
            Training vectors, shape (n, self.d). `dtype` must be float32.
        """
        n, d = x.shape
+        x = np.ascontiguousarray(x, dtype='float32')
        assert d == self.d
        self.train_c(n, swig_ptr(x))

@ -148,6 +164,7 @@ def handle_Quantizer(the_class):
            and `dtype` uint8.
        """
        n, d = x.shape
+        x = np.ascontiguousarray(x, dtype='float32')
        assert d == self.d
        codes = np.empty((n, self.code_size), dtype='uint8')
        self.compute_codes_c(swig_ptr(x), swig_ptr(codes), n)
@ -166,6 +183,7 @@ def handle_Quantizer(the_class):
            Reconstructed vectors for each code, shape `(n, d)` and `dtype` float32.
        """
        n, cs = codes.shape
+        codes = _check_dtype_uint8(codes)
        assert cs == self.code_size
        x = np.empty((n, self.d), dtype='float32')
        self.decode_c(swig_ptr(codes), swig_ptr(x), n)
@ -190,6 +208,8 @@ def handle_NSG(the_class):
        assert graph.ndim == 2
        assert graph.shape[0] == n
        K = graph.shape[1]
+        x = np.ascontiguousarray(x, dtype='float32')
+        graph = np.ascontiguousarray(graph, dtype='int64')
        self.build_c(n, swig_ptr(x), swig_ptr(graph), K)

    replace_method(the_class, 'build', replacement_build)
@ -212,6 +232,7 @@ def handle_Index(the_class):

        n, d = x.shape
        assert d == self.d
+        x = np.ascontiguousarray(x, dtype='float32')
        self.add_c(n, swig_ptr(x))

    def replacement_add_with_ids(self, x, ids):
@ -230,7 +251,8 @@ def handle_Index(the_class):
        """
        n, d = x.shape
        assert d == self.d
-
+        x = np.ascontiguousarray(x, dtype='float32')
+        ids = np.ascontiguousarray(ids, dtype='int64')
        assert ids.shape == (n, ), 'not same nb of vectors as ids'
        self.add_with_ids_c(n, swig_ptr(x), swig_ptr(ids))

@ -256,6 +278,7 @@ def handle_Index(the_class):
        """
        n, d = x.shape
        assert d == self.d
+        x = np.ascontiguousarray(x, dtype='float32')

        if labels is None:
            labels = np.empty((n, k), dtype=np.int64)
@ -277,6 +300,7 @@ def handle_Index(the_class):
        """
        n, d = x.shape
        assert d == self.d
+        x = np.ascontiguousarray(x, dtype='float32')
        self.train_c(n, swig_ptr(x))

    def replacement_search(self, x, k, D=None, I=None):
@ -305,6 +329,7 @@ def handle_Index(the_class):
        """

        n, d = x.shape
+        x = np.ascontiguousarray(x, dtype='float32')
        assert d == self.d

        assert k > 0
@ -353,6 +378,7 @@ def handle_Index(the_class):
        """
        n, d = x.shape
        assert d == self.d
+        x = np.ascontiguousarray(x, dtype='float32')

        assert k > 0

@ -398,6 +424,7 @@ def handle_Index(the_class):
        else:
            assert x.ndim == 1
            index_ivf = try_extract_index_ivf (self)
+            x = np.ascontiguousarray(x, dtype='int64')
            if index_ivf and index_ivf.direct_map.type == DirectMap.Hashtable:
                sel = IDSelectorArray(x.size, swig_ptr(x))
            else:
@ -457,7 +484,8 @@ def handle_Index(the_class):
        n = keys.size
        assert keys.shape == (n, )
        assert x.shape == (n, self.d)
-
+        x = np.ascontiguousarray(x, dtype='float32')
+        keys = np.ascontiguousarray(keys, dtype='int64')
        self.update_vectors_c(n, swig_ptr(keys), swig_ptr(x))

    # The CPU does not support passed-in output buffers
@ -488,6 +516,7 @@ def handle_Index(the_class):
        """
        n, d = x.shape
        assert d == self.d
+        x = np.ascontiguousarray(x, dtype='float32')

        res = RangeSearchResult(n)
        self.range_search_c(n, swig_ptr(x), thresh, res)
@ -501,6 +530,7 @@ def handle_Index(the_class):
    def replacement_sa_encode(self, x, codes=None):
        n, d = x.shape
        assert d == self.d
+        x = np.ascontiguousarray(x, dtype='float32')

        if codes is None:
            codes = np.empty((n, self.sa_code_size()), dtype=np.uint8)
@ -513,6 +543,7 @@ def handle_Index(the_class):
    def replacement_sa_decode(self, codes, x=None):
        n, cs = codes.shape
        assert cs == self.sa_code_size()
+        codes = _check_dtype_uint8(codes)

        if x is None:
            x = np.empty((n, self.d), dtype=np.float32)
@ -525,6 +556,8 @@ def handle_Index(the_class):
    def replacement_add_sa_codes(self, codes, ids=None):
        n, cs = codes.shape
        assert cs == self.sa_code_size()
+        codes = _check_dtype_uint8(codes)
+
        if ids is not None:
            assert ids.shape == (n,)
            ids = swig_ptr(ids)
@ -568,17 +601,21 @@ def handle_IndexBinary(the_class):

    def replacement_add(self, x):
        n, d = x.shape
+        x = _check_dtype_uint8(x)
        assert d * 8 == self.d
        self.add_c(n, swig_ptr(x))

    def replacement_add_with_ids(self, x, ids):
        n, d = x.shape
+        x = _check_dtype_uint8(x)
+        ids = np.ascontiguousarray(ids, dtype='int64')
        assert d * 8 == self.d
        assert ids.shape == (n, ), 'not same nb of vectors as ids'
        self.add_with_ids_c(n, swig_ptr(x), swig_ptr(ids))

    def replacement_train(self, x):
        n, d = x.shape
+        x = _check_dtype_uint8(x)
        assert d * 8 == self.d
        self.train_c(n, swig_ptr(x))

@ -588,6 +625,7 @@ def handle_IndexBinary(the_class):
        return x

    def replacement_search(self, x, k):
+        x = _check_dtype_uint8(x)
        n, d = x.shape
        assert d * 8 == self.d
        assert k > 0
@ -600,6 +638,7 @@ def handle_IndexBinary(the_class):

    def replacement_range_search(self, x, thresh):
        n, d = x.shape
+        x = _check_dtype_uint8(x)
        assert d * 8 == self.d
        res = RangeSearchResult(n)
        self.range_search_c(n, swig_ptr(x), thresh, res)
@ -615,6 +654,7 @@ def handle_IndexBinary(the_class):
            sel = x
        else:
            assert x.ndim == 1
+            x = np.ascontiguousarray(x, dtype='int64')
            sel = IDSelectorBatch(x.size, swig_ptr(x))
        return self.remove_ids_c(sel)

@ -631,6 +671,7 @@ def handle_VectorTransform(the_class):

    def apply_method(self, x):
        n, d = x.shape
+        x = np.ascontiguousarray(x, dtype='float32')
        assert d == self.d_in
        y = np.empty((n, self.d_out), dtype=np.float32)
        self.apply_noalloc(n, swig_ptr(x), swig_ptr(y))
@ -638,6 +679,7 @@ def handle_VectorTransform(the_class):

    def replacement_reverse_transform(self, x):
        n, d = x.shape
+        x = np.ascontiguousarray(x, dtype='float32')
        assert d == self.d_out
        y = np.empty((n, self.d_in), dtype=np.float32)
        self.reverse_transform_c(n, swig_ptr(x), swig_ptr(y))
@ -645,6 +687,7 @@ def handle_VectorTransform(the_class):

    def replacement_vt_train(self, x):
        n, d = x.shape
+        x = np.ascontiguousarray(x, dtype='float32')
        assert d == self.d_in
        self.train_c(n, swig_ptr(x))

@ -676,6 +719,7 @@ def handle_AutoTuneCriterion(the_class):
 def handle_ParameterSpace(the_class):
    def replacement_explore(self, index, xq, crit):
        assert xq.shape == (crit.nq, index.d)
+        xq = np.ascontiguousarray(xq, dtype='float32')
        ops = OperatingPoints()
        self.explore_c(index, crit.nq, swig_ptr(xq),
                       crit, ops)
@ -688,6 +732,7 @@ def handle_MatrixStats(the_class):

    def replacement_init(self, m):
        assert len(m.shape) == 2
+        m = np.ascontiguousarray(m, dtype='float32')
        original_init(self, m.shape[0], m.shape[1], swig_ptr(m))

    the_class.__init__ = replacement_init
@ -937,7 +982,8 @@ def knn_gpu(res, xq, xb, k, D=None, I=None, metric=METRIC_L2):
        xq = xq.T
        xq_row_major = False
    else:
-        raise TypeError('xq matrix should be row (C) or column-major (Fortran)')
+        xq = np.ascontiguousarray(xq, dtype='float32')
+        xq_row_major = True

    xq_ptr = swig_ptr(xq)

@ -956,7 +1002,8 @@ def knn_gpu(res, xq, xb, k, D=None, I=None, metric=METRIC_L2):
        xb = xb.T
        xb_row_major = False
    else:
-        raise TypeError('xb matrix should be row (C) or column-major (Fortran)')
+        xb = np.ascontiguousarray(xb, dtype='float32')
+        xb_row_major = True

    xb_ptr = swig_ptr(xb)

@ -1053,7 +1100,8 @@ def pairwise_distance_gpu(res, xq, xb, D=None, metric=METRIC_L2):
    elif xq.dtype == np.float16:
        xq_type = DistanceDataType_F16
    else:
-        raise TypeError('xq must be float32 or float16')
+        xq = np.ascontiguousarray(xb, dtype='float32')
+        xq_row_major = True

    nb, d2 = xb.shape
    assert d2 == d
@ -1063,7 +1111,8 @@ def pairwise_distance_gpu(res, xq, xb, D=None, metric=METRIC_L2):
        xb = xb.T
        xb_row_major = False
    else:
-        raise TypeError('xb matrix should be row (C) or column-major (Fortran)')
+        xb = np.ascontiguousarray(xb, dtype='float32')
+        xb_row_major = True

    xb_ptr = swig_ptr(xb)

@ -1212,6 +1261,7 @@ def AlignedTable_to_array(v):
 def kmin(array, k):
    """return k smallest values (and their indices) of the lines of a
    float32 array"""
+    array = np.ascontiguousarray(array, dtype='float32')
    m, n = array.shape
    I = np.zeros((m, k), dtype='int64')
    D = np.zeros((m, k), dtype='float32')
@ -1229,6 +1279,7 @@ def kmin(array, k):
 def kmax(array, k):
    """return k largest values (and their indices) of the lines of a
    float32 array"""
+    array = np.ascontiguousarray(array, dtype='float32')
    m, n = array.shape
    I = np.zeros((m, k), dtype='int64')
    D = np.zeros((m, k), dtype='float32')
@ -1246,6 +1297,8 @@ def kmax(array, k):
 def pairwise_distances(xq, xb, mt=METRIC_L2, metric_arg=0):
    """compute the whole pairwise distance matrix between two sets of
    vectors"""
+    xq = np.ascontiguousarray(xq, dtype='float32')
+    xb = np.ascontiguousarray(xb, dtype='float32')
    nq, d = xq.shape
    nb, d2 = xb.shape
    assert d == d2
@ -1296,6 +1349,8 @@ def rand_smooth_vectors(n, d, seed=1234):

 def eval_intersection(I1, I2):
    """ size of intersection between each line of two result tables"""
+    I1 = np.ascontiguousarray(I1, dtype='int64')
+    I2 = np.ascontiguousarray(I2, dtype='int64')
    n = I1.shape[0]
    assert I2.shape[0] == n
    k1, k2 = I1.shape[1], I2.shape[1]
@ -1334,6 +1389,7 @@ replace_method(MapLong2Long, 'search_multiple', replacement_map_search_multiple)
 search_with_parameters_c = search_with_parameters

 def search_with_parameters(index, x, k, params=None, output_stats=False):
+    x = np.ascontiguousarray(x, dtype='float32')
    n, d = x.shape
    assert d == index.d
    if not params:
@ -1366,6 +1422,7 @@ def search_with_parameters(index, x, k, params=None, output_stats=False):
 range_search_with_parameters_c = range_search_with_parameters

 def range_search_with_parameters(index, x, radius, params=None, output_stats=False):
+    x = np.ascontiguousarray(x, dtype='float32')
    n, d = x.shape
    assert d == index.d
    if not params:
@ -1427,6 +1484,8 @@ def knn(xq, xb, k, metric=METRIC_L2):
    I : array_like
        Labels of the nearest neighbors, shape (nq, k)
    """
+    xq = np.ascontiguousarray(xq, dtype='float32')
+    xb = np.ascontiguousarray(xb, dtype='float32')
    nq, d = xq.shape
    nb, d2 = xb.shape
    assert d == d2
@ -1537,6 +1596,7 @@ class Kmeans:
            final optimization objective

        """
+        x = np.ascontiguousarray(x, dtype='float32')
        n, d = x.shape
        assert d == self.d

@ -1581,6 +1641,7 @@ class Kmeans:
        return self.obj[-1] if self.obj.size > 0 else 0.0

    def assign(self, x):
+        x = np.ascontiguousarray(x, dtype='float32')
        assert self.centroids is not None, "should train before assigning"
        self.index.reset()
        self.index.add(self.centroids)
@ -1647,6 +1708,8 @@ class ResultHeap:
    def add_result(self, D, I):
        """D, I do not need to be in a particular order (heap or sorted)"""
        nq, kd = D.shape
+        D = np.ascontiguousarray(D, dtype='float32')
+        I = np.ascontiguousarray(I, dtype='int64')
        assert I.shape == (nq, kd)
        assert nq == self.nq
        self.heaps.addn_with_ids(
--- a/tests/torch_test_contrib.py
+++ b/tests/torch_test_contrib.py
@ -340,5 +340,6 @@ class TestTorchUtilsCPU(unittest.TestCase):
        with self.assertRaises(AssertionError):
            index.add(xb)

-        with self.assertRaises(ValueError):
-            index.add(xb.numpy())
+        # disabled since we now accept non-contiguous arrays
+        # with self.assertRaises(ValueError):
+        #    index.add(xb.numpy())