From c5b49b79df57cab7b7890c28f0ee5cb7329cbddd Mon Sep 17 00:00:00 2001
From: Matthijs Douze <matthijs@fb.com>
Date: Mon, 3 Oct 2022 11:45:41 -0700
Subject: [PATCH] split __init__.py into subsections (#2508)

Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/2508

the Faiss python module was in a monolythic __init__.py
This diff splits it in several sub-modules.
The tricky thing is to make inter-dependencies work.

Reviewed By: alexanderguzhva

Differential Revision: D39969794

fbshipit-source-id: 6e7f896a4b35a7c1a0a1f3a986daa32a00bfae6b
---
 faiss/VectorTransform.h           |   13 +-
 faiss/python/CMakeLists.txt       |    4 +
 faiss/python/__init__.py          | 1613 +----------------------------
 faiss/python/array_conversions.py |  162 +++
 faiss/python/class_wrappers.py    |  834 +++++++++++++++
 faiss/python/extra_wrappers.py    |  362 +++++++
 faiss/python/gpu_wrappers.py      |  263 +++++
 faiss/python/setup.py             |    4 +
 8 files changed, 1678 insertions(+), 1577 deletions(-)
 create mode 100644 faiss/python/array_conversions.py
 create mode 100644 faiss/python/class_wrappers.py
 create mode 100644 faiss/python/extra_wrappers.py
 create mode 100644 faiss/python/gpu_wrappers.py

diff --git a/faiss/VectorTransform.h b/faiss/VectorTransform.h
index de61d329a..03f7332c3 100644
--- a/faiss/VectorTransform.h
+++ b/faiss/VectorTransform.h
@@ -43,13 +43,18 @@ struct VectorTransform {
      */
     virtual void train(idx_t n, const float* x);
 
-    /** apply the random rotation, return new allocated matrix
-     * @param     x size n * d_in
-     * @return    size n * d_out
+    /** apply the transformation and return the result in an allocated pointer
+     * @param     n number of vectors to transform
+     * @param     x input vectors, size n * d_in
+     * @return    output vectors, size n * d_out
      */
     float* apply(idx_t n, const float* x) const;
 
-    /// same as apply, but result is pre-allocated
+    /** apply the transformation and return the result in a provided matrix
+     * @param     n number of vectors to transform
+     * @param     x input vectors, size n * d_in
+     * @param    xt output vectors, size n * d_out
+     */
     virtual void apply_noalloc(idx_t n, const float* x, float* xt) const = 0;
 
     /// reverse transformation. May not be implemented or may return
diff --git a/faiss/python/CMakeLists.txt b/faiss/python/CMakeLists.txt
index bec4482a0..0e1f4c2f0 100644
--- a/faiss/python/CMakeLists.txt
+++ b/faiss/python/CMakeLists.txt
@@ -141,6 +141,10 @@ target_link_libraries(swigfaiss_avx2 PRIVATE faiss_python_callbacks)
 configure_file(setup.py setup.py COPYONLY)
 configure_file(__init__.py __init__.py COPYONLY)
 configure_file(loader.py loader.py COPYONLY)
+configure_file(class_wrappers.py class_wrappers.py COPYONLY)
+configure_file(gpu_wrappers.py gpu_wrappers.py COPYONLY)
+configure_file(extra_wrappers.py extra_wrappers.py COPYONLY)
+configure_file(array_conversions.py array_conversions.py COPYONLY)
 
 file(GLOB files "${PROJECT_SOURCE_DIR}/../../contrib/*.py")
 file(COPY ${files} DESTINATION contrib/)
diff --git a/faiss/python/__init__.py b/faiss/python/__init__.py
index d5c9cee9e..e67df6f4c 100644
--- a/faiss/python/__init__.py
+++ b/faiss/python/__init__.py
@@ -3,7 +3,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
-#@nolint
+# @nolint
 
 # not linting this file because it imports * from swigfaiss, which
 # causes a ton of useless warnings.
@@ -11,905 +11,66 @@
 import numpy as np
 import sys
 import inspect
-import array
-import warnings
 
 # We import * so that the symbol foo can be accessed as faiss.foo.
 from .loader import *
 
+# additional wrappers
+from faiss import class_wrappers
+from faiss.gpu_wrappers import *
+from faiss.array_conversions import *
+from faiss.extra_wrappers import kmin, kmax, pairwise_distances, rand, randint, \
+    lrand, randn, rand_smooth_vectors, eval_intersection, normalize_L2, \
+    ResultHeap, knn, Kmeans
+
 
 __version__ = "%d.%d.%d" % (FAISS_VERSION_MAJOR,
                             FAISS_VERSION_MINOR,
                             FAISS_VERSION_PATCH)
 
-##################################################################
-# The functions below add or replace some methods for classes
-# this is to be able to pass in numpy arrays directly
-# The C++ version of the classnames will be suffixed with _c
-##################################################################
-
-# For most arrays we force the convesion to the target type with
-# np.ascontiguousarray, but for uint8 codes, we raise a type error
-# because it is unclear how the conversion should occur: with a view
-# (= cast) or conversion?
-def _check_dtype_uint8(codes):
-    if codes.dtype != 'uint8':
-        raise TypeError("Input argument %s must be ndarray of dtype "
-                " uint8, but found %s" % ("x", x.dtype))
-    return np.ascontiguousarray(codes)
-
-
-def replace_method(the_class, name, replacement, ignore_missing=False):
-    """ Replaces a method in a class with another version. The old method
-    is renamed to method_name_c (because presumably it was implemented in C) """
-    try:
-        orig_method = getattr(the_class, name)
-    except AttributeError:
-        if ignore_missing:
-            return
-        raise
-    if orig_method.__name__ == 'replacement_' + name:
-        # replacement was done in parent class
-        return
-    setattr(the_class, name + '_c', orig_method)
-    setattr(the_class, name, replacement)
-
-def handle_Clustering():
-
-    def replacement_train(self, x, index, weights=None):
-        """Perform clustering on a set of vectors. The index is used for assignment.
-
-        Parameters
-        ----------
-        x : array_like
-            Training vectors, shape (n, self.d). `dtype` must be float32.
-        index : faiss.Index
-            Index used for assignment. The dimension of the index should be `self.d`.
-        weights : array_like, optional
-            Per training sample weight (size n) used when computing the weighted
-            average to obtain the centroid (default is 1 for all training vectors).
-        """
-        n, d = x.shape
-        x = np.ascontiguousarray(x, dtype='float32')
-        assert d == self.d
-        if weights is not None:
-            weights = np.ascontiguousarray(weights, dtype='float32')
-            assert weights.shape == (n, )
-            self.train_c(n, swig_ptr(x), index, swig_ptr(weights))
-        else:
-            self.train_c(n, swig_ptr(x), index)
-
-    def replacement_train_encoded(self, x, codec, index, weights=None):
-        """ Perform clustering on a set of compressed vectors. The index is used for assignment.
-        The decompression is performed on-the-fly.
-
-        Parameters
-        ----------
-        x : array_like
-            Training vectors, shape (n, codec.code_size()). `dtype` must be `uint8`.
-        codec : faiss.Index
-            Index used to decode the vectors. Should have dimension `self.d`.
-        index : faiss.Index
-            Index used for assignment. The dimension of the index should be `self.d`.
-        weigths : array_like, optional
-            Per training sample weight (size n) used when computing the weighted
-            average to obtain the centroid (default is 1 for all training vectors).
-        """
-        n, d = x.shape
-        x = _check_dtype_uint8(x)
-        assert d == codec.sa_code_size()
-        assert codec.d == index.d
-        if weights is not None:
-            weights = np.ascontiguousarray(weights, dtype='float32')
-            assert weights.shape == (n, )
-            self.train_encoded_c(n, swig_ptr(x), codec, index, swig_ptr(weights))
-        else:
-            self.train_encoded_c(n, swig_ptr(x), codec, index)
-
-    replace_method(Clustering, 'train', replacement_train)
-    replace_method(Clustering, 'train_encoded', replacement_train_encoded)
-
-
-handle_Clustering()
-
-
-def handle_Clustering1D():
-
-    def replacement_train_exact(self, x):
-        """Perform clustering on a set of 1D vectors.
-
-        Parameters
-        ----------
-        x : array_like
-            Training vectors, shape (n, 1). `dtype` must be float32.
-        """
-        n, d = x.shape
-        x = np.ascontiguousarray(x, dtype='float32')
-        assert d == self.d
-        self.train_exact_c(n, swig_ptr(x))
-
-    replace_method(Clustering1D, 'train_exact', replacement_train_exact)
-
-
-handle_Clustering1D()
-
-
-def handle_Quantizer(the_class):
-
-    def replacement_train(self, x):
-        """ Train the quantizer on a set of training vectors.
-
-        Parameters
-        ----------
-        x : array_like
-            Training vectors, shape (n, self.d). `dtype` must be float32.
-        """
-        n, d = x.shape
-        x = np.ascontiguousarray(x, dtype='float32')
-        assert d == self.d
-        self.train_c(n, swig_ptr(x))
-
-    def replacement_compute_codes(self, x):
-        """ Compute the codes corresponding to a set of vectors.
-
-        Parameters
-        ----------
-        x : array_like
-            Vectors to encode, shape (n, self.d). `dtype` must be float32.
-
-        Returns
-        -------
-        codes : array_like
-            Corresponding code for each vector, shape (n, self.code_size)
-            and `dtype` uint8.
-        """
-        n, d = x.shape
-        x = np.ascontiguousarray(x, dtype='float32')
-        assert d == self.d
-        codes = np.empty((n, self.code_size), dtype='uint8')
-        self.compute_codes_c(swig_ptr(x), swig_ptr(codes), n)
-        return codes
-
-    def replacement_decode(self, codes):
-        """Reconstruct an approximation of vectors given their codes.
-
-        Parameters
-        ----------
-        codes : array_like
-            Codes to decode, shape (n, self.code_size). `dtype` must be uint8.
-
-        Returns
-        -------
-            Reconstructed vectors for each code, shape `(n, d)` and `dtype` float32.
-        """
-        n, cs = codes.shape
-        codes = _check_dtype_uint8(codes)
-        assert cs == self.code_size
-        x = np.empty((n, self.d), dtype='float32')
-        self.decode_c(swig_ptr(codes), swig_ptr(x), n)
-        return x
-
-    replace_method(the_class, 'train', replacement_train)
-    replace_method(the_class, 'compute_codes', replacement_compute_codes)
-    replace_method(the_class, 'decode', replacement_decode)
-
-
-def handle_NSG(the_class):
-
-    def replacement_build(self, x, graph):
-        n, d = x.shape
-        assert d == self.d
-        assert graph.ndim == 2
-        assert graph.shape[0] == n
-        K = graph.shape[1]
-        x = np.ascontiguousarray(x, dtype='float32')
-        graph = np.ascontiguousarray(graph, dtype='int64')
-        self.build_c(n, swig_ptr(x), swig_ptr(graph), K)
-
-    replace_method(the_class, 'build', replacement_build)
-
-
-def handle_Index(the_class):
-
-    def replacement_add(self, x):
-        """Adds vectors to the index.
-        The index must be trained before vectors can be added to it.
-        The vectors are implicitly numbered in sequence. When `n` vectors are
-        added to the index, they are given ids `ntotal`, `ntotal + 1`, ..., `ntotal + n - 1`.
-
-        Parameters
-        ----------
-        x : array_like
-            Query vectors, shape (n, d) where d is appropriate for the index.
-            `dtype` must be float32.
-        """
-
-        n, d = x.shape
-        assert d == self.d
-        x = np.ascontiguousarray(x, dtype='float32')
-        self.add_c(n, swig_ptr(x))
-
-    def replacement_add_with_ids(self, x, ids):
-        """Adds vectors with arbitrary ids to the index (not all indexes support this).
-        The index must be trained before vectors can be added to it.
-        Vector `i` is stored in `x[i]` and has id `ids[i]`.
-
-        Parameters
-        ----------
-        x : array_like
-            Query vectors, shape (n, d) where d is appropriate for the index.
-            `dtype` must be float32.
-        ids : array_like
-            Array if ids of size n. The ids must be of type `int64`. Note that `-1` is reserved
-            in result lists to mean "not found" so it's better to not use it as an id.
-        """
-        n, d = x.shape
-        assert d == self.d
-        x = np.ascontiguousarray(x, dtype='float32')
-        ids = np.ascontiguousarray(ids, dtype='int64')
-        assert ids.shape == (n, ), 'not same nb of vectors as ids'
-        self.add_with_ids_c(n, swig_ptr(x), swig_ptr(ids))
-
-    def replacement_assign(self, x, k, labels=None):
-        """Find the k nearest neighbors of the set of vectors x in the index.
-        This is the same as the `search` method, but discards the distances.
-
-        Parameters
-        ----------
-        x : array_like
-            Query vectors, shape (n, d) where d is appropriate for the index.
-            `dtype` must be float32.
-        k : int
-            Number of nearest neighbors.
-        labels : array_like, optional
-            Labels array to store the results.
-
-        Returns
-        -------
-        labels: array_like
-            Labels of the nearest neighbors, shape (n, k).
-            When not enough results are found, the label is set to -1
-        """
-        n, d = x.shape
-        assert d == self.d
-        x = np.ascontiguousarray(x, dtype='float32')
-
-        if labels is None:
-            labels = np.empty((n, k), dtype=np.int64)
-        else:
-            assert labels.shape == (n, k)
-
-        self.assign_c(n, swig_ptr(x), swig_ptr(labels), k)
-        return labels
-
-    def replacement_train(self, x):
-        """Trains the index on a representative set of vectors.
-        The index must be trained before vectors can be added to it.
-
-        Parameters
-        ----------
-        x : array_like
-            Query vectors, shape (n, d) where d is appropriate for the index.
-            `dtype` must be float32.
-        """
-        n, d = x.shape
-        assert d == self.d
-        x = np.ascontiguousarray(x, dtype='float32')
-        self.train_c(n, swig_ptr(x))
-
-    def replacement_search(self, x, k, *, params=None, D=None, I=None):
-        """Find the k nearest neighbors of the set of vectors x in the index.
-
-        Parameters
-        ----------
-        x : array_like
-            Query vectors, shape (n, d) where d is appropriate for the index.
-            `dtype` must be float32.
-        k : int
-            Number of nearest neighbors.
-        D : array_like, optional
-            Distance array to store the result.
-        I : array_like, optional
-            Labels array to store the results.
-        params : SearchParameters
-            Search parameters of the current search (overrides the class-level params)
-
-        Returns
-        -------
-        D : array_like
-            Distances of the nearest neighbors, shape (n, k). When not enough results are found
-            the label is set to +Inf or -Inf.
-        I : array_like
-            Labels of the nearest neighbors, shape (n, k).
-            When not enough results are found, the label is set to -1
-        """
-
-        n, d = x.shape
-        x = np.ascontiguousarray(x, dtype='float32')
-        assert d == self.d
-
-        assert k > 0
-
-        if D is None:
-            D = np.empty((n, k), dtype=np.float32)
-        else:
-            assert D.shape == (n, k)
-
-        if I is None:
-            I = np.empty((n, k), dtype=np.int64)
-        else:
-            assert I.shape == (n, k)
-
-        self.search_c(n, swig_ptr(x), k, swig_ptr(D), swig_ptr(I), params)
-        return D, I
-
-    def replacement_search_and_reconstruct(self, x, k, *, params=None, D=None, I=None, R=None):
-        """Find the k nearest neighbors of the set of vectors x in the index,
-        and return an approximation of these vectors.
-
-        Parameters
-        ----------
-        x : array_like
-            Query vectors, shape (n, d) where d is appropriate for the index.
-            `dtype` must be float32.
-        k : int
-            Number of nearest neighbors.
-        D : array_like, optional
-            Distance array to store the result.
-        I : array_like, optional
-            Labels array to store the result.
-        R : array_like, optional
-            reconstruction array to store
-        params : SearchParameters
-            Search parameters of the current search (overrides the class-level params)
-
-        Returns
-        -------
-        D : array_like
-            Distances of the nearest neighbors, shape (n, k). When not enough results are found
-            the label is set to +Inf or -Inf.
-        I : array_like
-            Labels of the nearest neighbors, shape (n, k). When not enough results are found,
-            the label is set to -1
-        R : array_like
-            Approximate (reconstructed) nearest neighbor vectors, shape (n, k, d).
-        """
-        n, d = x.shape
-        assert d == self.d
-        x = np.ascontiguousarray(x, dtype='float32')
-
-        assert k > 0
-
-        if D is None:
-            D = np.empty((n, k), dtype=np.float32)
-        else:
-            assert D.shape == (n, k)
-
-        if I is None:
-            I = np.empty((n, k), dtype=np.int64)
-        else:
-            assert I.shape == (n, k)
-
-        if R is None:
-            R = np.empty((n, k, d), dtype=np.float32)
-        else:
-            assert R.shape == (n, k, d)
-
-        self.search_and_reconstruct_c(
-                n, swig_ptr(x), k,
-                swig_ptr(D), swig_ptr(I), swig_ptr(R),
-                params
-        )
-        return D, I, R
-
-    def replacement_remove_ids(self, x):
-        """Remove some ids from the index.
-        This is a O(ntotal) operation by default, so could be expensive.
-
-        Parameters
-        ----------
-        x : array_like or faiss.IDSelector
-            Either an IDSelector that returns True for vectors to remove, or a
-            list of ids to reomove (1D array of int64). When `x` is a list,
-            it is wrapped into an IDSelector.
-
-        Returns
-        -------
-        n_remove: int
-            number of vectors that were removed
-        """
-        if isinstance(x, IDSelector):
-            sel = x
-        else:
-            assert x.ndim == 1
-            index_ivf = try_extract_index_ivf (self)
-            x = np.ascontiguousarray(x, dtype='int64')
-            if index_ivf and index_ivf.direct_map.type == DirectMap.Hashtable:
-                sel = IDSelectorArray(x.size, swig_ptr(x))
-            else:
-                sel = IDSelectorBatch(x.size, swig_ptr(x))
-        return self.remove_ids_c(sel)
-
-    def replacement_reconstruct(self, key, x=None):
-        """Approximate reconstruction of one vector from the index.
-
-        Parameters
-        ----------
-        key : int
-            Id of the vector to reconstruct
-        x : array_like, optional
-            pre-allocated array to store the results
-
-        Returns
-        -------
-        x : array_like reconstructed vector, size `self.d`, `dtype`=float32
-        """
-        if x is None:
-            x = np.empty(self.d, dtype=np.float32)
-        else:
-            assert x.shape == (self.d, )
-
-        self.reconstruct_c(key, swig_ptr(x))
-        return x
-
-    def replacement_reconstruct_batch(self, key, x=None):
-        """Approximate reconstruction of several vectors from the index.
-
-        Parameters
-        ----------
-        key : array of ints
-            Ids of the vectors to reconstruct
-        x : array_like, optional
-            pre-allocated array to store the results
-
-        Returns
-        -------
-        x : array_like
-            reconstrcuted vectors, size `len(key), self.d`
-        """
-        key = np.ascontiguousarray(key, dtype='int64')
-        n, = key.shape
-        if x is None:
-            x = np.empty((n, self.d), dtype=np.float32)
-        else:
-            assert x.shape == (n, self.d)
-        self.reconstruct_batch_c(n, swig_ptr(key), swig_ptr(x))
-        return x
-
-    def replacement_reconstruct_n(self, n0, ni, x=None):
-        """Approximate reconstruction of vectors `n0` ... `n0 + ni - 1` from the index.
-        Missing vectors trigger an exception.
-
-        Parameters
-        ----------
-        n0 : int
-            Id of the first vector to reconstruct
-        ni : int
-            Number of vectors to reconstruct
-        x : array_like, optional
-            pre-allocated array to store the results
-
-        Returns
-        -------
-        x : array_like
-            Reconstructed vectors, size (`ni`, `self.d`), `dtype`=float32
-        """
-        if x is None:
-            x = np.empty((ni, self.d), dtype=np.float32)
-        else:
-            assert x.shape == (ni, self.d)
-
-        self.reconstruct_n_c(n0, ni, swig_ptr(x))
-        return x
-
-    def replacement_update_vectors(self, keys, x):
-        n = keys.size
-        assert keys.shape == (n, )
-        assert x.shape == (n, self.d)
-        x = np.ascontiguousarray(x, dtype='float32')
-        keys = np.ascontiguousarray(keys, dtype='int64')
-        self.update_vectors_c(n, swig_ptr(keys), swig_ptr(x))
-
-    # The CPU does not support passed-in output buffers
-    def replacement_range_search(self, x, thresh, *, params=None):
-        """Search vectors that are within a distance of the query vectors.
-
-        Parameters
-        ----------
-        x : array_like
-            Query vectors, shape (n, d) where d is appropriate for the index.
-            `dtype` must be float32.
-        thresh : float
-            Threshold to select neighbors. All elements within this radius are returned,
-            except for maximum inner product indexes, where the elements above the
-            threshold are returned
-        params : SearchParameters
-            Search parameters of the current search (overrides the class-level params)
-
-        Returns
-        -------
-        lims: array_like
-            Startring index of the results for each query vector, size n+1.
-        D : array_like
-            Distances of the nearest neighbors, shape `lims[n]`. The distances for
-            query i are in `D[lims[i]:lims[i+1]]`.
-        I : array_like
-            Labels of nearest neighbors, shape `lims[n]`. The labels for query i
-            are in `I[lims[i]:lims[i+1]]`.
-
-        """
-        n, d = x.shape
-        assert d == self.d
-        x = np.ascontiguousarray(x, dtype='float32')
-
-        res = RangeSearchResult(n)
-        self.range_search_c(n, swig_ptr(x), thresh, res, params)
-        # get pointers and copy them
-        lims = rev_swig_ptr(res.lims, n + 1).copy()
-        nd = int(lims[-1])
-        D = rev_swig_ptr(res.distances, nd).copy()
-        I = rev_swig_ptr(res.labels, nd).copy()
-        return lims, D, I
-
-    def replacement_sa_encode(self, x, codes=None):
-        n, d = x.shape
-        assert d == self.d
-        x = np.ascontiguousarray(x, dtype='float32')
-
-        if codes is None:
-            codes = np.empty((n, self.sa_code_size()), dtype=np.uint8)
-        else:
-            assert codes.shape == (n, self.sa_code_size())
-
-        self.sa_encode_c(n, swig_ptr(x), swig_ptr(codes))
-        return codes
-
-    def replacement_sa_decode(self, codes, x=None):
-        n, cs = codes.shape
-        assert cs == self.sa_code_size()
-        codes = _check_dtype_uint8(codes)
-
-        if x is None:
-            x = np.empty((n, self.d), dtype=np.float32)
-        else:
-            assert x.shape == (n, self.d)
-
-        self.sa_decode_c(n, swig_ptr(codes), swig_ptr(x))
-        return x
-
-    def replacement_add_sa_codes(self, codes, ids=None):
-        n, cs = codes.shape
-        assert cs == self.sa_code_size()
-        codes = _check_dtype_uint8(codes)
-
-        if ids is not None:
-            assert ids.shape == (n,)
-            ids = swig_ptr(ids)
-        self.add_sa_codes_c(n, swig_ptr(codes), ids)
-
-    replace_method(the_class, 'add', replacement_add)
-    replace_method(the_class, 'add_with_ids', replacement_add_with_ids)
-    replace_method(the_class, 'assign', replacement_assign)
-    replace_method(the_class, 'train', replacement_train)
-    replace_method(the_class, 'search', replacement_search)
-    replace_method(the_class, 'remove_ids', replacement_remove_ids)
-    replace_method(the_class, 'reconstruct', replacement_reconstruct)
-    replace_method(the_class, 'reconstruct_batch', replacement_reconstruct_batch)
-    replace_method(the_class, 'reconstruct_n', replacement_reconstruct_n)
-    replace_method(the_class, 'range_search', replacement_range_search)
-    replace_method(the_class, 'update_vectors', replacement_update_vectors,
-                   ignore_missing=True)
-    replace_method(the_class, 'search_and_reconstruct',
-                   replacement_search_and_reconstruct, ignore_missing=True)
-    replace_method(the_class, 'sa_encode', replacement_sa_encode)
-    replace_method(the_class, 'sa_decode', replacement_sa_decode)
-    replace_method(the_class, 'add_sa_codes', replacement_add_sa_codes,
-                ignore_missing=True)
-
-    # get/set state for pickle
-    # the data is serialized to std::vector -> numpy array -> python bytes
-    # so not very efficient for now.
-
-    def index_getstate(self):
-        return {"this": serialize_index(self).tobytes()}
-
-    def index_setstate(self, st):
-        index2 = deserialize_index(np.frombuffer(st["this"], dtype="uint8"))
-        self.this = index2.this
-
-    the_class.__getstate__ = index_getstate
-    the_class.__setstate__ = index_setstate
-
-
-
-def handle_IndexBinary(the_class):
-
-    def replacement_add(self, x):
-        n, d = x.shape
-        x = _check_dtype_uint8(x)
-        assert d * 8 == self.d
-        self.add_c(n, swig_ptr(x))
-
-    def replacement_add_with_ids(self, x, ids):
-        n, d = x.shape
-        x = _check_dtype_uint8(x)
-        ids = np.ascontiguousarray(ids, dtype='int64')
-        assert d * 8 == self.d
-        assert ids.shape == (n, ), 'not same nb of vectors as ids'
-        self.add_with_ids_c(n, swig_ptr(x), swig_ptr(ids))
-
-    def replacement_train(self, x):
-        n, d = x.shape
-        x = _check_dtype_uint8(x)
-        assert d * 8 == self.d
-        self.train_c(n, swig_ptr(x))
-
-    def replacement_reconstruct(self, key):
-        x = np.empty(self.d // 8, dtype=np.uint8)
-        self.reconstruct_c(key, swig_ptr(x))
-        return x
-
-    def replacement_search(self, x, k):
-        x = _check_dtype_uint8(x)
-        n, d = x.shape
-        assert d * 8 == self.d
-        assert k > 0
-        distances = np.empty((n, k), dtype=np.int32)
-        labels = np.empty((n, k), dtype=np.int64)
-        self.search_c(n, swig_ptr(x),
-                      k, swig_ptr(distances),
-                      swig_ptr(labels))
-        return distances, labels
-
-    def replacement_range_search(self, x, thresh):
-        n, d = x.shape
-        x = _check_dtype_uint8(x)
-        assert d * 8 == self.d
-        res = RangeSearchResult(n)
-        self.range_search_c(n, swig_ptr(x), thresh, res)
-        # get pointers and copy them
-        lims = rev_swig_ptr(res.lims, n + 1).copy()
-        nd = int(lims[-1])
-        D = rev_swig_ptr(res.distances, nd).copy()
-        I = rev_swig_ptr(res.labels, nd).copy()
-        return lims, D, I
-
-    def replacement_remove_ids(self, x):
-        if isinstance(x, IDSelector):
-            sel = x
-        else:
-            assert x.ndim == 1
-            x = np.ascontiguousarray(x, dtype='int64')
-            sel = IDSelectorBatch(x.size, swig_ptr(x))
-        return self.remove_ids_c(sel)
-
-    replace_method(the_class, 'add', replacement_add)
-    replace_method(the_class, 'add_with_ids', replacement_add_with_ids)
-    replace_method(the_class, 'train', replacement_train)
-    replace_method(the_class, 'search', replacement_search)
-    replace_method(the_class, 'range_search', replacement_range_search)
-    replace_method(the_class, 'reconstruct', replacement_reconstruct)
-    replace_method(the_class, 'remove_ids', replacement_remove_ids)
-
-
-def handle_VectorTransform(the_class):
-
-    def apply_method(self, x):
-        n, d = x.shape
-        x = np.ascontiguousarray(x, dtype='float32')
-        assert d == self.d_in
-        y = np.empty((n, self.d_out), dtype=np.float32)
-        self.apply_noalloc(n, swig_ptr(x), swig_ptr(y))
-        return y
-
-    def replacement_reverse_transform(self, x):
-        n, d = x.shape
-        x = np.ascontiguousarray(x, dtype='float32')
-        assert d == self.d_out
-        y = np.empty((n, self.d_in), dtype=np.float32)
-        self.reverse_transform_c(n, swig_ptr(x), swig_ptr(y))
-        return y
-
-    def replacement_vt_train(self, x):
-        n, d = x.shape
-        x = np.ascontiguousarray(x, dtype='float32')
-        assert d == self.d_in
-        self.train_c(n, swig_ptr(x))
-
-    replace_method(the_class, 'train', replacement_vt_train)
-    # apply is reserved in Pyton...
-    the_class.apply_py = apply_method
-    the_class.apply = apply_method
-    replace_method(the_class, 'reverse_transform',
-                   replacement_reverse_transform)
-
-
-def handle_AutoTuneCriterion(the_class):
-    def replacement_set_groundtruth(self, D, I):
-        if D:
-            assert I.shape == D.shape
-        self.nq, self.gt_nnn = I.shape
-        self.set_groundtruth_c(
-            self.gt_nnn, swig_ptr(D) if D else None, swig_ptr(I))
-
-    def replacement_evaluate(self, D, I):
-        assert I.shape == D.shape
-        assert I.shape == (self.nq, self.nnn)
-        return self.evaluate_c(swig_ptr(D), swig_ptr(I))
-
-    replace_method(the_class, 'set_groundtruth', replacement_set_groundtruth)
-    replace_method(the_class, 'evaluate', replacement_evaluate)
-
-
-def handle_ParameterSpace(the_class):
-    def replacement_explore(self, index, xq, crit):
-        assert xq.shape == (crit.nq, index.d)
-        xq = np.ascontiguousarray(xq, dtype='float32')
-        ops = OperatingPoints()
-        self.explore_c(index, crit.nq, swig_ptr(xq),
-                       crit, ops)
-        return ops
-    replace_method(the_class, 'explore', replacement_explore)
-
-
-def handle_MatrixStats(the_class):
-    original_init = the_class.__init__
-
-    def replacement_init(self, m):
-        assert len(m.shape) == 2
-        m = np.ascontiguousarray(m, dtype='float32')
-        original_init(self, m.shape[0], m.shape[1], swig_ptr(m))
-
-    the_class.__init__ = replacement_init
-
-handle_MatrixStats(MatrixStats)
-
-def handle_IOWriter(the_class):
-
-    def write_bytes(self, b):
-        return self(swig_ptr(b), 1, len(b))
-
-    the_class.write_bytes = write_bytes
-
-handle_IOWriter(IOWriter)
-
-def handle_IOReader(the_class):
-
-    def read_bytes(self, totsz):
-        buf = bytearray(totsz)
-        was_read = self(swig_ptr(buf), 1, len(buf))
-        return bytes(buf[:was_read])
-
-    the_class.read_bytes = read_bytes
-
-handle_IOReader(IOReader)
-
-def handle_IndexRowwiseMinMax(the_class):
-    def replacement_train_inplace(self, x):
-        """Trains the index on a representative set of vectors inplace.
-        The index must be trained before vectors can be added to it.
-
-        This call WILL change the values in the input array, because
-        of two scaling proceduces being performed inplace.
-
-        Parameters
-        ----------
-        x : array_like
-            Query vectors, shape (n, d) where d is appropriate for the index.
-            `dtype` must be float32.
-        """
-        n, d = x.shape
-        assert d == self.d
-        x = np.ascontiguousarray(x, dtype='float32')
-        self.train_inplace_c(n, swig_ptr(x))
-
-    replace_method(the_class, 'train_inplace', replacement_train_inplace)
-
-
-
-def handle_SearchParameters(the_class):
-    """ this wrapper is to enable initializations of the form
-    SearchParametersXX(a=3, b=SearchParamsYY)
-    This also requires the enclosing class to keep a reference on the
-    sub-object
-    """
-    the_class.original_init = the_class.__init__
-
-    def replacement_init(self, **args):
-        self.original_init()
-        self.referenced_objects = []
-        for k, v in args.items():
-            assert hasattr(self, k)
-            setattr(self, k, v)
-            if inspect.isclass(v):
-                self.referenced_objects.append(v)
-
-    the_class.__init__ = replacement_init
-
-
+class_wrappers.handle_Clustering(Clustering)
+class_wrappers.handle_Clustering1D(Clustering1D)
+class_wrappers.handle_MatrixStats(MatrixStats)
+class_wrappers.handle_IOWriter(IOWriter)
+class_wrappers.handle_IOReader(IOReader)
+class_wrappers.handle_AutoTuneCriterion(AutoTuneCriterion)
+class_wrappers.handle_ParameterSpace(ParameterSpace)
+class_wrappers.handle_NSG(IndexNSG)
+class_wrappers.handle_MapLong2Long(MapLong2Long)
 
 this_module = sys.modules[__name__]
 
+# handle sub-classes
 for symbol in dir(this_module):
     obj = getattr(this_module, symbol)
     # print symbol, isinstance(obj, (type, types.ClassType))
     if inspect.isclass(obj):
         the_class = obj
         if issubclass(the_class, Index):
-            handle_Index(the_class)
+            class_wrappers.handle_Index(the_class)
 
         if issubclass(the_class, IndexBinary):
-            handle_IndexBinary(the_class)
+            class_wrappers.handle_IndexBinary(the_class)
 
         if issubclass(the_class, VectorTransform):
-            handle_VectorTransform(the_class)
-
-        if issubclass(the_class, AutoTuneCriterion):
-            handle_AutoTuneCriterion(the_class)
-
-        if issubclass(the_class, ParameterSpace):
-            handle_ParameterSpace(the_class)
-
-        if issubclass(the_class, IndexNSG):
-            handle_NSG(the_class)
+            class_wrappers.handle_VectorTransform(the_class)
 
         if issubclass(the_class, Quantizer):
-            handle_Quantizer(the_class)
+            class_wrappers.handle_Quantizer(the_class)
 
         if issubclass(the_class, IndexRowwiseMinMax) or \
-            issubclass(the_class, IndexRowwiseMinMaxFP16):
-            handle_IndexRowwiseMinMax(the_class)
+                issubclass(the_class, IndexRowwiseMinMaxFP16):
+            class_wrappers.handle_IndexRowwiseMinMax(the_class)
 
         if issubclass(the_class, SearchParameters):
-            handle_SearchParameters(the_class)
-
-###########################################
-# Utility to add a deprecation warning to
-# classes from the SWIG interface
-###########################################
-
-def _make_deprecated_swig_class(deprecated_name, base_name):
-    """
-    Dynamically construct deprecated classes as wrappers around renamed ones
-
-    The deprecation warning added in their __new__-method will trigger upon
-    construction of an instance of the class, but only once per session.
-
-    We do this here (in __init__.py) because the base classes are defined in
-    the SWIG interface, making it cumbersome to add the deprecation there.
-
-    Parameters
-    ----------
-    deprecated_name : string
-        Name of the class to be deprecated; _not_ present in SWIG interface.
-    base_name : string
-        Name of the class that is replacing deprecated_name; must already be
-        imported into the current namespace.
-
-    Returns
-    -------
-    None
-        However, the deprecated class gets added to the faiss namespace
-    """
-    base_class = globals()[base_name]
-    def new_meth(cls, *args, **kwargs):
-        msg = f"The class faiss.{deprecated_name} is deprecated in favour of faiss.{base_name}!"
-        warnings.warn(msg, DeprecationWarning, stacklevel=2)
-        instance = super(base_class, cls).__new__(cls, *args, **kwargs)
-        return instance
-
-    # three-argument version of "type" uses (name, tuple-of-bases, dict-of-attributes)
-    klazz = type(deprecated_name, (base_class,), {"__new__": new_meth})
-
-    # this ends up adding the class to the "faiss" namespace, in a way that it
-    # is available both through "import faiss" and "from faiss import *"
-    globals()[deprecated_name] = klazz
+            class_wrappers.handle_SearchParameters(the_class)
 
 ###########################################
 # Add Python references to objects
 # we do this at the Python class wrapper level.
 ###########################################
 
+
 def add_ref_in_constructor(the_class, parameter_no):
     # adds a reference to parameter parameter_no in self
     # so that that parameter does not get deallocated before self
@@ -933,6 +94,7 @@ def add_ref_in_constructor(the_class, parameter_no):
 
 def add_ref_in_method(the_class, method_name, parameter_no):
     original_method = getattr(the_class, method_name)
+
     def replacement_method(self, *args):
         ref = args[parameter_no]
         if not hasattr(self, 'referenced_objects'):
@@ -942,9 +104,11 @@ def add_ref_in_method(the_class, method_name, parameter_no):
         return original_method(self, *args)
     setattr(the_class, method_name, replacement_method)
 
+
 def add_ref_in_function(function_name, parameter_no):
     # assumes the function returns an object
     original_function = getattr(this_module, function_name)
+
     def replacement_function(*args):
         result = original_function(*args)
         ref = args[parameter_no]
@@ -952,6 +116,7 @@ def add_ref_in_function(function_name, parameter_no):
         return result
     setattr(this_module, function_name, replacement_function)
 
+
 add_ref_in_constructor(IndexIVFFlat, 0)
 add_ref_in_constructor(IndexIVFFlatDedup, 0)
 add_ref_in_constructor(IndexPreTransform, {2: [0, 1], 1: [0]})
@@ -973,8 +138,8 @@ add_ref_in_constructor(IndexIDMap2, 0)
 add_ref_in_constructor(IndexHNSW, 0)
 add_ref_in_method(IndexShards, 'add_shard', 0)
 add_ref_in_method(IndexBinaryShards, 'add_shard', 0)
-add_ref_in_constructor(IndexRefineFlat, {2:[0], 1:[0]})
-add_ref_in_constructor(IndexRefine, {2:[0, 1]})
+add_ref_in_constructor(IndexRefineFlat, {2: [0], 1: [0]})
+add_ref_in_constructor(IndexRefine, {2: [0, 1]})
 
 add_ref_in_constructor(IndexBinaryIVF, 0)
 add_ref_in_constructor(IndexBinaryFromFloat, 0)
@@ -990,479 +155,6 @@ add_ref_in_constructor(BufferedIOReader, 0)
 # seems really marginal...
 # remove_ref_from_method(IndexReplicas, 'removeIndex', 0)
 
-###########################################
-# GPU functions
-###########################################
-
-
-def index_cpu_to_gpu_multiple_py(resources, index, co=None, gpus=None):
-    """ builds the C++ vectors for the GPU indices and the
-    resources. Handles the case where the resources are assigned to
-    the list of GPUs """
-    if gpus is None:
-        gpus = range(len(resources))
-    vres = GpuResourcesVector()
-    vdev = Int32Vector()
-    for i, res in zip(gpus, resources):
-        vdev.push_back(i)
-        vres.push_back(res)
-    index = index_cpu_to_gpu_multiple(vres, vdev, index, co)
-    return index
-
-
-def index_cpu_to_all_gpus(index, co=None, ngpu=-1):
-    index_gpu = index_cpu_to_gpus_list(index, co=co, gpus=None, ngpu=ngpu)
-    return index_gpu
-
-
-def index_cpu_to_gpus_list(index, co=None, gpus=None, ngpu=-1):
-    """ Here we can pass list of GPU ids as a parameter or ngpu to
-    use first n GPU's. gpus mut be a list or None"""
-    if (gpus is None) and (ngpu == -1):  # All blank
-        gpus = range(get_num_gpus())
-    elif (gpus is None) and (ngpu != -1):  # Get number of GPU's only
-        gpus = range(ngpu)
-    res = [StandardGpuResources() for _ in gpus]
-    index_gpu = index_cpu_to_gpu_multiple_py(res, index, co, gpus)
-    return index_gpu
-
-# allows numpy ndarray usage with bfKnn
-def knn_gpu(res, xq, xb, k, D=None, I=None, metric=METRIC_L2):
-    """
-    Compute the k nearest neighbors of a vector on one GPU without constructing an index
-
-    Parameters
-    ----------
-    res : StandardGpuResources
-        GPU resources to use during computation
-    xq : array_like
-        Query vectors, shape (nq, d) where d is appropriate for the index.
-        `dtype` must be float32.
-    xb : array_like
-        Database vectors, shape (nb, d) where d is appropriate for the index.
-        `dtype` must be float32.
-    k : int
-        Number of nearest neighbors.
-    D : array_like, optional
-        Output array for distances of the nearest neighbors, shape (nq, k)
-    I : array_like, optional
-        Output array for the nearest neighbors, shape (nq, k)
-    distance_type : MetricType, optional
-        distance measure to use (either METRIC_L2 or METRIC_INNER_PRODUCT)
-
-    Returns
-    -------
-    D : array_like
-        Distances of the nearest neighbors, shape (nq, k)
-    I : array_like
-        Labels of the nearest neighbors, shape (nq, k)
-    """
-    nq, d = xq.shape
-    if xq.flags.c_contiguous:
-        xq_row_major = True
-    elif xq.flags.f_contiguous:
-        xq = xq.T
-        xq_row_major = False
-    else:
-        xq = np.ascontiguousarray(xq, dtype='float32')
-        xq_row_major = True
-
-    xq_ptr = swig_ptr(xq)
-
-    if xq.dtype == np.float32:
-        xq_type = DistanceDataType_F32
-    elif xq.dtype == np.float16:
-        xq_type = DistanceDataType_F16
-    else:
-        raise TypeError('xq must be f32 or f16')
-
-    nb, d2 = xb.shape
-    assert d2 == d
-    if xb.flags.c_contiguous:
-        xb_row_major = True
-    elif xb.flags.f_contiguous:
-        xb = xb.T
-        xb_row_major = False
-    else:
-        xb = np.ascontiguousarray(xb, dtype='float32')
-        xb_row_major = True
-
-    xb_ptr = swig_ptr(xb)
-
-    if xb.dtype == np.float32:
-        xb_type = DistanceDataType_F32
-    elif xb.dtype == np.float16:
-        xb_type = DistanceDataType_F16
-    else:
-        raise TypeError('xb must be float32 or float16')
-
-    if D is None:
-        D = np.empty((nq, k), dtype=np.float32)
-    else:
-        assert D.shape == (nq, k)
-        # interface takes void*, we need to check this
-        assert D.dtype == np.float32
-
-    D_ptr = swig_ptr(D)
-
-    if I is None:
-        I = np.empty((nq, k), dtype=np.int64)
-    else:
-        assert I.shape == (nq, k)
-
-    I_ptr = swig_ptr(I)
-
-    if I.dtype == np.int64:
-        I_type = IndicesDataType_I64
-    elif I.dtype == I.dtype == np.int32:
-        I_type = IndicesDataType_I32
-    else:
-        raise TypeError('I must be i64 or i32')
-
-    args = GpuDistanceParams()
-    args.metric = metric
-    args.k = k
-    args.dims = d
-    args.vectors = xb_ptr
-    args.vectorsRowMajor = xb_row_major
-    args.vectorType = xb_type
-    args.numVectors = nb
-    args.queries = xq_ptr
-    args.queriesRowMajor = xq_row_major
-    args.queryType = xq_type
-    args.numQueries = nq
-    args.outDistances = D_ptr
-    args.outIndices = I_ptr
-    args.outIndicesType = I_type
-
-    # no stream synchronization needed, inputs and outputs are guaranteed to
-    # be on the CPU (numpy arrays)
-    bfKnn(res, args)
-
-    return D, I
-
-# allows numpy ndarray usage with bfKnn for all pairwise distances
-def pairwise_distance_gpu(res, xq, xb, D=None, metric=METRIC_L2):
-    """
-    Compute all pairwise distances between xq and xb on one GPU without constructing an index
-
-    Parameters
-    ----------
-    res : StandardGpuResources
-        GPU resources to use during computation
-    xq : array_like
-        Query vectors, shape (nq, d) where d is appropriate for the index.
-        `dtype` must be float32.
-    xb : array_like
-        Database vectors, shape (nb, d) where d is appropriate for the index.
-        `dtype` must be float32.
-    D : array_like, optional
-        Output array for all pairwise distances, shape (nq, nb)
-    distance_type : MetricType, optional
-        distance measure to use (either METRIC_L2 or METRIC_INNER_PRODUCT)
-
-    Returns
-    -------
-    D : array_like
-        All pairwise distances, shape (nq, nb)
-    """
-    nq, d = xq.shape
-    if xq.flags.c_contiguous:
-        xq_row_major = True
-    elif xq.flags.f_contiguous:
-        xq = xq.T
-        xq_row_major = False
-    else:
-        raise TypeError('xq matrix should be row (C) or column-major (Fortran)')
-
-    xq_ptr = swig_ptr(xq)
-
-    if xq.dtype == np.float32:
-        xq_type = DistanceDataType_F32
-    elif xq.dtype == np.float16:
-        xq_type = DistanceDataType_F16
-    else:
-        xq = np.ascontiguousarray(xb, dtype='float32')
-        xq_row_major = True
-
-    nb, d2 = xb.shape
-    assert d2 == d
-    if xb.flags.c_contiguous:
-        xb_row_major = True
-    elif xb.flags.f_contiguous:
-        xb = xb.T
-        xb_row_major = False
-    else:
-        xb = np.ascontiguousarray(xb, dtype='float32')
-        xb_row_major = True
-
-    xb_ptr = swig_ptr(xb)
-
-    if xb.dtype == np.float32:
-        xb_type = DistanceDataType_F32
-    elif xb.dtype == np.float16:
-        xb_type = DistanceDataType_F16
-    else:
-        raise TypeError('xb must be float32 or float16')
-
-    if D is None:
-        D = np.empty((nq, nb), dtype=np.float32)
-    else:
-        assert D.shape == (nq, nb)
-        # interface takes void*, we need to check this
-        assert D.dtype == np.float32
-
-    D_ptr = swig_ptr(D)
-
-    args = GpuDistanceParams()
-    args.metric = metric
-    args.k = -1 # selects all pairwise distances
-    args.dims = d
-    args.vectors = xb_ptr
-    args.vectorsRowMajor = xb_row_major
-    args.vectorType = xb_type
-    args.numVectors = nb
-    args.queries = xq_ptr
-    args.queriesRowMajor = xq_row_major
-    args.queryType = xq_type
-    args.numQueries = nq
-    args.outDistances = D_ptr
-
-    # no stream synchronization needed, inputs and outputs are guaranteed to
-    # be on the CPU (numpy arrays)
-    bfKnn(res, args)
-
-    return D
-
-
-###########################################
-# numpy array / std::vector conversions
-###########################################
-
-sizeof_long = array.array('l').itemsize
-deprecated_name_map = {
-    # deprecated: replacement
-    'Float': 'Float32',
-    'Double': 'Float64',
-    'Char': 'Int8',
-    'Int': 'Int32',
-    'Long': 'Int32' if sizeof_long == 4 else 'Int64',
-    'LongLong': 'Int64',
-    'Byte': 'UInt8',
-    # previously misspelled variant
-    'Uint64': 'UInt64',
-}
-
-for depr_prefix, base_prefix in deprecated_name_map.items():
-    _make_deprecated_swig_class(depr_prefix + "Vector", base_prefix + "Vector")
-
-    # same for the three legacy *VectorVector classes
-    if depr_prefix in ['Float', 'Long', 'Byte']:
-        _make_deprecated_swig_class(depr_prefix + "VectorVector",
-                                    base_prefix + "VectorVector")
-
-# mapping from vector names in swigfaiss.swig and the numpy dtype names
-# TODO: once deprecated classes are removed, remove the dict and just use .lower() below
-vector_name_map = {
-    'Float32': 'float32',
-    'Float64': 'float64',
-    'Int8': 'int8',
-    'Int16': 'int16',
-    'Int32': 'int32',
-    'Int64': 'int64',
-    'UInt8': 'uint8',
-    'UInt16': 'uint16',
-    'UInt32': 'uint32',
-    'UInt64': 'uint64',
-    **{k: v.lower() for k, v in deprecated_name_map.items()}
-}
-
-
-def vector_to_array(v):
-    """ convert a C++ vector to a numpy array """
-    classname = v.__class__.__name__
-    assert classname.endswith('Vector')
-    dtype = np.dtype(vector_name_map[classname[:-6]])
-    a = np.empty(v.size(), dtype=dtype)
-    if v.size() > 0:
-        memcpy(swig_ptr(a), v.data(), a.nbytes)
-    return a
-
-
-def vector_float_to_array(v):
-    return vector_to_array(v)
-
-
-def copy_array_to_vector(a, v):
-    """ copy a numpy array to a vector """
-    n, = a.shape
-    classname = v.__class__.__name__
-    assert classname.endswith('Vector')
-    dtype = np.dtype(vector_name_map[classname[:-6]])
-    assert dtype == a.dtype, (
-        'cannot copy a %s array to a %s (should be %s)' % (
-            a.dtype, classname, dtype))
-    v.resize(n)
-    if n > 0:
-        memcpy(v.data(), swig_ptr(a), a.nbytes)
-
-# same for AlignedTable
-
-def copy_array_to_AlignedTable(a, v):
-    n, = a.shape
-    # TODO check class name
-    assert v.itemsize() == a.itemsize
-    v.resize(n)
-    if n > 0:
-        memcpy(v.get(), swig_ptr(a), a.nbytes)
-
-def array_to_AlignedTable(a):
-    if a.dtype == 'uint16':
-        v = AlignedTableUint16(a.size)
-    elif a.dtype == 'uint8':
-        v = AlignedTableUint8(a.size)
-    else:
-        assert False
-    copy_array_to_AlignedTable(a, v)
-    return v
-
-def AlignedTable_to_array(v):
-    """ convert an AlignedTable to a numpy array """
-    classname = v.__class__.__name__
-    assert classname.startswith('AlignedTable')
-    dtype = classname[12:].lower()
-    a = np.empty(v.size(), dtype=dtype)
-    if a.size > 0:
-        memcpy(swig_ptr(a), v.data(), a.nbytes)
-    return a
-
-###########################################
-# Wrapper for a few functions
-###########################################
-
-def kmin(array, k):
-    """return k smallest values (and their indices) of the lines of a
-    float32 array"""
-    array = np.ascontiguousarray(array, dtype='float32')
-    m, n = array.shape
-    I = np.zeros((m, k), dtype='int64')
-    D = np.zeros((m, k), dtype='float32')
-    ha = float_maxheap_array_t()
-    ha.ids = swig_ptr(I)
-    ha.val = swig_ptr(D)
-    ha.nh = m
-    ha.k = k
-    ha.heapify()
-    ha.addn(n, swig_ptr(array))
-    ha.reorder()
-    return D, I
-
-
-def kmax(array, k):
-    """return k largest values (and their indices) of the lines of a
-    float32 array"""
-    array = np.ascontiguousarray(array, dtype='float32')
-    m, n = array.shape
-    I = np.zeros((m, k), dtype='int64')
-    D = np.zeros((m, k), dtype='float32')
-    ha = float_minheap_array_t()
-    ha.ids = swig_ptr(I)
-    ha.val = swig_ptr(D)
-    ha.nh = m
-    ha.k = k
-    ha.heapify()
-    ha.addn(n, swig_ptr(array))
-    ha.reorder()
-    return D, I
-
-
-def pairwise_distances(xq, xb, mt=METRIC_L2, metric_arg=0):
-    """compute the whole pairwise distance matrix between two sets of
-    vectors"""
-    xq = np.ascontiguousarray(xq, dtype='float32')
-    xb = np.ascontiguousarray(xb, dtype='float32')
-    nq, d = xq.shape
-    nb, d2 = xb.shape
-    assert d == d2
-    dis = np.empty((nq, nb), dtype='float32')
-    if mt == METRIC_L2:
-        pairwise_L2sqr(
-            d, nq, swig_ptr(xq),
-            nb, swig_ptr(xb),
-            swig_ptr(dis))
-    else:
-        pairwise_extra_distances(
-            d, nq, swig_ptr(xq),
-            nb, swig_ptr(xb),
-            mt, metric_arg,
-            swig_ptr(dis))
-    return dis
-
-
-
-
-def rand(n, seed=12345):
-    res = np.empty(n, dtype='float32')
-    float_rand(swig_ptr(res), res.size, seed)
-    return res
-
-
-def randint(n, seed=12345, vmax=None):
-    res = np.empty(n, dtype='int64')
-    if vmax is None:
-        int64_rand(swig_ptr(res), res.size, seed)
-    else:
-        int64_rand_max(swig_ptr(res), res.size, vmax, seed)
-    return res
-
-lrand = randint
-
-def randn(n, seed=12345):
-    res = np.empty(n, dtype='float32')
-    float_randn(swig_ptr(res), res.size, seed)
-    return res
-
-rand_smooth_vectors_c = rand_smooth_vectors
-
-def rand_smooth_vectors(n, d, seed=1234):
-    res = np.empty((n, d), dtype='float32')
-    rand_smooth_vectors_c(n, d, swig_ptr(res), seed)
-    return res
-
-def eval_intersection(I1, I2):
-    """ size of intersection between each line of two result tables"""
-    I1 = np.ascontiguousarray(I1, dtype='int64')
-    I2 = np.ascontiguousarray(I2, dtype='int64')
-    n = I1.shape[0]
-    assert I2.shape[0] == n
-    k1, k2 = I1.shape[1], I2.shape[1]
-    ninter = 0
-    for i in range(n):
-        ninter += ranklist_intersection_size(
-            k1, swig_ptr(I1[i]), k2, swig_ptr(I2[i]))
-    return ninter
-
-
-def normalize_L2(x):
-    fvec_renorm_L2(x.shape[1], x.shape[0], swig_ptr(x))
-
-######################################################
-# MapLong2Long interface
-######################################################
-
-def replacement_map_add(self, keys, vals):
-    n, = keys.shape
-    assert (n,) == keys.shape
-    self.add_c(n, swig_ptr(keys), swig_ptr(vals))
-
-def replacement_map_search_multiple(self, keys):
-    n, = keys.shape
-    vals = np.empty(n, dtype='int64')
-    self.search_multiple_c(n, swig_ptr(keys), swig_ptr(vals))
-    return vals
-
-replace_method(MapLong2Long, 'add', replacement_map_add)
-replace_method(MapLong2Long, 'search_multiple', replacement_map_search_multiple)
 
 ######################################################
 # search_with_parameters interface
@@ -1470,6 +162,7 @@ replace_method(MapLong2Long, 'search_multiple', replacement_map_search_multiple)
 
 search_with_parameters_c = search_with_parameters
 
+
 def search_with_parameters(index, x, k, params=None, output_stats=False):
     x = np.ascontiguousarray(x, dtype='float32')
     n, d = x.shape
@@ -1501,8 +194,10 @@ def search_with_parameters(index, x, k, params=None, output_stats=False):
         }
         return distances, labels, stats
 
+
 range_search_with_parameters_c = range_search_with_parameters
 
+
 def range_search_with_parameters(index, x, radius, params=None, output_stats=False):
     x = np.ascontiguousarray(x, dtype='float32')
     n, d = x.shape
@@ -1537,199 +232,6 @@ def range_search_with_parameters(index, x, radius, params=None, output_stats=Fal
         return lims, Dout, Iout, stats
 
 
-######################################################
-# KNN function
-######################################################
-
-def knn(xq, xb, k, metric=METRIC_L2):
-    """
-    Compute the k nearest neighbors of a vector without constructing an index
-
-
-    Parameters
-    ----------
-    xq : array_like
-        Query vectors, shape (nq, d) where d is appropriate for the index.
-        `dtype` must be float32.
-    xb : array_like
-        Database vectors, shape (nb, d) where d is appropriate for the index.
-        `dtype` must be float32.
-    k : int
-        Number of nearest neighbors.
-    distance_type : MetricType, optional
-        distance measure to use (either METRIC_L2 or METRIC_INNER_PRODUCT)
-
-    Returns
-    -------
-    D : array_like
-        Distances of the nearest neighbors, shape (nq, k)
-    I : array_like
-        Labels of the nearest neighbors, shape (nq, k)
-    """
-    xq = np.ascontiguousarray(xq, dtype='float32')
-    xb = np.ascontiguousarray(xb, dtype='float32')
-    nq, d = xq.shape
-    nb, d2 = xb.shape
-    assert d == d2
-
-    I = np.empty((nq, k), dtype='int64')
-    D = np.empty((nq, k), dtype='float32')
-
-    if metric == METRIC_L2:
-        knn_L2sqr(
-            swig_ptr(xq), swig_ptr(xb),
-            d, nq, nb, k, swig_ptr(D), swig_ptr(I)
-        )
-    elif metric == METRIC_INNER_PRODUCT:
-        knn_inner_product(
-            swig_ptr(xq), swig_ptr(xb),
-            d, nq, nb, k, swig_ptr(D), swig_ptr(I)
-        )
-    else:
-        raise NotImplementedError("only L2 and INNER_PRODUCT are supported")
-    return D, I
-
-
-###########################################
-# Kmeans object
-###########################################
-
-
-class Kmeans:
-    """Object that performs k-means clustering and manages the centroids.
-    The `Kmeans` class is essentially a wrapper around the C++ `Clustering` object.
-
-    Parameters
-    ----------
-    d : int
-       dimension of the vectors to cluster
-    k : int
-       number of clusters
-    gpu: bool or int, optional
-       False: don't use GPU
-       True: use all GPUs
-       number: use this many GPUs
-    progressive_dim_steps:
-        use a progressive dimension clustering (with that number of steps)
-
-    Subsequent parameters are fields of the Clustring object. The most important are:
-
-    niter: int, optional
-       clustering iterations
-    nredo: int, optional
-       redo clustering this many times and keep best
-    verbose: bool, optional
-    spherical: bool, optional
-       do we want normalized centroids?
-    int_centroids: bool, optional
-       round centroids coordinates to integer
-    seed: int, optional
-       seed for the random number generator
-
-    """
-
-
-    def __init__(self, d, k, **kwargs):
-        """d: input dimension, k: nb of centroids. Additional
-         parameters are passed on the ClusteringParameters object,
-         including niter=25, verbose=False, spherical = False
-        """
-        self.d = d
-        self.k = k
-        self.gpu = False
-        if "progressive_dim_steps" in kwargs:
-            self.cp = ProgressiveDimClusteringParameters()
-        else:
-            self.cp = ClusteringParameters()
-        for k, v in kwargs.items():
-            if k == 'gpu':
-                if v == True or v == -1:
-                    v = get_num_gpus()
-                self.gpu = v
-            else:
-                # if this raises an exception, it means that it is a non-existent field
-                getattr(self.cp, k)
-                setattr(self.cp, k, v)
-        self.centroids = None
-
-    def train(self, x, weights=None, init_centroids=None):
-        """ Perform k-means clustering.
-        On output of the function call:
-
-        - the centroids are in the centroids field of size (`k`, `d`).
-
-        - the objective value at each iteration is in the array obj (size `niter`)
-
-        - detailed optimization statistics are in the array iteration_stats.
-
-        Parameters
-        ----------
-        x : array_like
-            Training vectors, shape (n, d), `dtype` must be float32 and n should
-            be larger than the number of clusters `k`.
-        weights : array_like
-            weight associated to each vector, shape `n`
-        init_centroids : array_like
-            initial set of centroids, shape (n, d)
-
-        Returns
-        -------
-        final_obj: float
-            final optimization objective
-
-        """
-        x = np.ascontiguousarray(x, dtype='float32')
-        n, d = x.shape
-        assert d == self.d
-
-        if self.cp.__class__ == ClusteringParameters:
-            # regular clustering
-            clus = Clustering(d, self.k, self.cp)
-            if init_centroids is not None:
-                nc, d2 = init_centroids.shape
-                assert d2 == d
-                copy_array_to_vector(init_centroids.ravel(), clus.centroids)
-            if self.cp.spherical:
-                self.index = IndexFlatIP(d)
-            else:
-                self.index = IndexFlatL2(d)
-            if self.gpu:
-                self.index = index_cpu_to_all_gpus(self.index, ngpu=self.gpu)
-            clus.train(x, self.index, weights)
-        else:
-            # not supported for progressive dim
-            assert weights is None
-            assert init_centroids is None
-            assert not self.cp.spherical
-            clus = ProgressiveDimClustering(d, self.k, self.cp)
-            if self.gpu:
-                fac = GpuProgressiveDimIndexFactory(ngpu=self.gpu)
-            else:
-                fac = ProgressiveDimIndexFactory()
-            clus.train(n, swig_ptr(x), fac)
-
-        centroids = vector_float_to_array(clus.centroids)
-
-        self.centroids = centroids.reshape(self.k, d)
-        stats = clus.iteration_stats
-        stats = [stats.at(i) for i in range(stats.size())]
-        self.obj = np.array([st.obj for st in stats])
-        # copy all the iteration_stats objects to a python array
-        stat_fields = 'obj time time_search imbalance_factor nsplit'.split()
-        self.iteration_stats = [
-            {field: getattr(st, field) for field in stat_fields}
-            for st in stats
-        ]
-        return self.obj[-1] if self.obj.size > 0 else 0.0
-
-    def assign(self, x):
-        x = np.ascontiguousarray(x, dtype='float32')
-        assert self.centroids is not None, "should train before assigning"
-        self.index.reset()
-        self.index.add(self.centroids)
-        D, I = self.index.search(x, 1)
-        return D.ravel(), I.ravel()
-
 # IndexProxy was renamed to IndexReplicas, remap the old name for any old code
 # people may have
 IndexProxy = IndexReplicas
@@ -1742,63 +244,28 @@ IVFSearchParameters = SearchParametersIVF
 # serialization of indexes to byte arrays
 ###########################################
 
+
 def serialize_index(index):
     """ convert an index to a numpy uint8 array  """
     writer = VectorIOWriter()
     write_index(index, writer)
     return vector_to_array(writer.data)
 
+
 def deserialize_index(data):
     reader = VectorIOReader()
     copy_array_to_vector(data, reader.data)
     return read_index(reader)
 
+
 def serialize_index_binary(index):
     """ convert an index to a numpy uint8 array  """
     writer = VectorIOWriter()
     write_index_binary(index, writer)
     return vector_to_array(writer.data)
 
+
 def deserialize_index_binary(data):
     reader = VectorIOReader()
     copy_array_to_vector(data, reader.data)
     return read_index_binary(reader)
-
-
-###########################################
-# ResultHeap
-###########################################
-
-class ResultHeap:
-    """Accumulate query results from a sliced dataset. The final result will
-    be in self.D, self.I."""
-
-    def __init__(self, nq, k, keep_max=False):
-        " nq: number of query vectors, k: number of results per query "
-        self.I = np.zeros((nq, k), dtype='int64')
-        self.D = np.zeros((nq, k), dtype='float32')
-        self.nq, self.k = nq, k
-        if keep_max:
-            heaps = float_minheap_array_t()
-        else:
-            heaps = float_maxheap_array_t()
-        heaps.k = k
-        heaps.nh = nq
-        heaps.val = swig_ptr(self.D)
-        heaps.ids = swig_ptr(self.I)
-        heaps.heapify()
-        self.heaps = heaps
-
-    def add_result(self, D, I):
-        """D, I do not need to be in a particular order (heap or sorted)"""
-        nq, kd = D.shape
-        D = np.ascontiguousarray(D, dtype='float32')
-        I = np.ascontiguousarray(I, dtype='int64')
-        assert I.shape == (nq, kd)
-        assert nq == self.nq
-        self.heaps.addn_with_ids(
-            kd, swig_ptr(D),
-            swig_ptr(I), kd)
-
-    def finalize(self):
-        self.heaps.reorder()
diff --git a/faiss/python/array_conversions.py b/faiss/python/array_conversions.py
new file mode 100644
index 000000000..cd111e6b1
--- /dev/null
+++ b/faiss/python/array_conversions.py
@@ -0,0 +1,162 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# @nolint
+
+# not linting this file because it imports * from swigfaiss, which
+# causes a ton of useless warnings.
+
+import numpy as np
+import array
+
+from faiss.loader import *
+
+###########################################
+# Utility to add a deprecation warning to
+# classes from the SWIG interface
+###########################################
+
+def _make_deprecated_swig_class(deprecated_name, base_name):
+    """
+    Dynamically construct deprecated classes as wrappers around renamed ones
+
+    The deprecation warning added in their __new__-method will trigger upon
+    construction of an instance of the class, but only once per session.
+
+    We do this here (in __init__.py) because the base classes are defined in
+    the SWIG interface, making it cumbersome to add the deprecation there.
+
+    Parameters
+    ----------
+    deprecated_name : string
+        Name of the class to be deprecated; _not_ present in SWIG interface.
+    base_name : string
+        Name of the class that is replacing deprecated_name; must already be
+        imported into the current namespace.
+
+    Returns
+    -------
+    None
+        However, the deprecated class gets added to the faiss namespace
+    """
+    base_class = globals()[base_name]
+
+    def new_meth(cls, *args, **kwargs):
+        msg = f"The class faiss.{deprecated_name} is deprecated in favour of faiss.{base_name}!"
+        warnings.warn(msg, DeprecationWarning, stacklevel=2)
+        instance = super(base_class, cls).__new__(cls, *args, **kwargs)
+        return instance
+
+    # three-argument version of "type" uses (name, tuple-of-bases, dict-of-attributes)
+    klazz = type(deprecated_name, (base_class,), {"__new__": new_meth})
+
+    # this ends up adding the class to the "faiss" namespace, in a way that it
+    # is available both through "import faiss" and "from faiss import *"
+    globals()[deprecated_name] = klazz
+
+
+###########################################
+# numpy array / std::vector conversions
+###########################################
+
+sizeof_long = array.array('l').itemsize
+deprecated_name_map = {
+    # deprecated: replacement
+    'Float': 'Float32',
+    'Double': 'Float64',
+    'Char': 'Int8',
+    'Int': 'Int32',
+    'Long': 'Int32' if sizeof_long == 4 else 'Int64',
+    'LongLong': 'Int64',
+    'Byte': 'UInt8',
+    # previously misspelled variant
+    'Uint64': 'UInt64',
+}
+
+for depr_prefix, base_prefix in deprecated_name_map.items():
+    _make_deprecated_swig_class(depr_prefix + "Vector", base_prefix + "Vector")
+
+    # same for the three legacy *VectorVector classes
+    if depr_prefix in ['Float', 'Long', 'Byte']:
+        _make_deprecated_swig_class(depr_prefix + "VectorVector",
+                                    base_prefix + "VectorVector")
+
+# mapping from vector names in swigfaiss.swig and the numpy dtype names
+# TODO: once deprecated classes are removed, remove the dict and just use .lower() below
+vector_name_map = {
+    'Float32': 'float32',
+    'Float64': 'float64',
+    'Int8': 'int8',
+    'Int16': 'int16',
+    'Int32': 'int32',
+    'Int64': 'int64',
+    'UInt8': 'uint8',
+    'UInt16': 'uint16',
+    'UInt32': 'uint32',
+    'UInt64': 'uint64',
+    **{k: v.lower() for k, v in deprecated_name_map.items()}
+}
+
+
+def vector_to_array(v):
+    """ convert a C++ vector to a numpy array """
+    classname = v.__class__.__name__
+    assert classname.endswith('Vector')
+    dtype = np.dtype(vector_name_map[classname[:-6]])
+    a = np.empty(v.size(), dtype=dtype)
+    if v.size() > 0:
+        memcpy(swig_ptr(a), v.data(), a.nbytes)
+    return a
+
+
+def vector_float_to_array(v):
+    return vector_to_array(v)
+
+
+def copy_array_to_vector(a, v):
+    """ copy a numpy array to a vector """
+    n, = a.shape
+    classname = v.__class__.__name__
+    assert classname.endswith('Vector')
+    dtype = np.dtype(vector_name_map[classname[:-6]])
+    assert dtype == a.dtype, (
+        'cannot copy a %s array to a %s (should be %s)' % (
+            a.dtype, classname, dtype))
+    v.resize(n)
+    if n > 0:
+        memcpy(v.data(), swig_ptr(a), a.nbytes)
+
+# same for AlignedTable
+
+
+def copy_array_to_AlignedTable(a, v):
+    n, = a.shape
+    # TODO check class name
+    assert v.itemsize() == a.itemsize
+    v.resize(n)
+    if n > 0:
+        memcpy(v.get(), swig_ptr(a), a.nbytes)
+
+
+def array_to_AlignedTable(a):
+    if a.dtype == 'uint16':
+        v = AlignedTableUint16(a.size)
+    elif a.dtype == 'uint8':
+        v = AlignedTableUint8(a.size)
+    else:
+        assert False
+    copy_array_to_AlignedTable(a, v)
+    return v
+
+
+def AlignedTable_to_array(v):
+    """ convert an AlignedTable to a numpy array """
+    classname = v.__class__.__name__
+    assert classname.startswith('AlignedTable')
+    dtype = classname[12:].lower()
+    a = np.empty(v.size(), dtype=dtype)
+    if a.size > 0:
+        memcpy(swig_ptr(a), v.data(), a.nbytes)
+    return a
diff --git a/faiss/python/class_wrappers.py b/faiss/python/class_wrappers.py
new file mode 100644
index 000000000..be5b6e858
--- /dev/null
+++ b/faiss/python/class_wrappers.py
@@ -0,0 +1,834 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import inspect
+
+from faiss.loader import swig_ptr, RangeSearchResult, rev_swig_ptr, \
+    IDSelector, IDSelectorArray, IDSelectorBatch, try_extract_index_ivf, \
+    DirectMap, OperatingPoints
+
+import faiss
+
+##################################################################
+# The functions below add or replace some methods for classes
+# this is to be able to pass in numpy arrays directly
+# The C++ version of the classnames will be suffixed with _c
+##################################################################
+
+# For most arrays we force the convesion to the target type with
+# np.ascontiguousarray, but for uint8 codes, we raise a type error
+# because it is unclear how the conversion should occur: with a view
+# (= cast) or conversion?
+
+def _check_dtype_uint8(codes):
+    if codes.dtype != 'uint8':
+        raise TypeError("Input argument %s must be ndarray of dtype "
+                        " uint8, but found %s" % ("codes", codes.dtype))
+    return np.ascontiguousarray(codes)
+
+
+def replace_method(the_class, name, replacement, ignore_missing=False):
+    """ Replaces a method in a class with another version. The old method
+    is renamed to method_name_c (because presumably it was implemented in C) """
+    try:
+        orig_method = getattr(the_class, name)
+    except AttributeError:
+        if ignore_missing:
+            return
+        raise
+    if orig_method.__name__ == 'replacement_' + name:
+        # replacement was done in parent class
+        return
+    setattr(the_class, name + '_c', orig_method)
+    setattr(the_class, name, replacement)
+
+
+def handle_Clustering(the_class):
+
+    def replacement_train(self, x, index, weights=None):
+        """Perform clustering on a set of vectors. The index is used for assignment.
+
+        Parameters
+        ----------
+        x : array_like
+            Training vectors, shape (n, self.d). `dtype` must be float32.
+        index : faiss.Index
+            Index used for assignment. The dimension of the index should be `self.d`.
+        weights : array_like, optional
+            Per training sample weight (size n) used when computing the weighted
+            average to obtain the centroid (default is 1 for all training vectors).
+        """
+        n, d = x.shape
+        x = np.ascontiguousarray(x, dtype='float32')
+        assert d == self.d
+        if weights is not None:
+            weights = np.ascontiguousarray(weights, dtype='float32')
+            assert weights.shape == (n, )
+            self.train_c(n, swig_ptr(x), index, swig_ptr(weights))
+        else:
+            self.train_c(n, swig_ptr(x), index)
+
+    def replacement_train_encoded(self, x, codec, index, weights=None):
+        """ Perform clustering on a set of compressed vectors. The index is used for assignment.
+        The decompression is performed on-the-fly.
+
+        Parameters
+        ----------
+        x : array_like
+            Training vectors, shape (n, codec.code_size()). `dtype` must be `uint8`.
+        codec : faiss.Index
+            Index used to decode the vectors. Should have dimension `self.d`.
+        index : faiss.Index
+            Index used for assignment. The dimension of the index should be `self.d`.
+        weigths : array_like, optional
+            Per training sample weight (size n) used when computing the weighted
+            average to obtain the centroid (default is 1 for all training vectors).
+        """
+        n, d = x.shape
+        x = _check_dtype_uint8(x)
+        assert d == codec.sa_code_size()
+        assert codec.d == index.d
+        if weights is not None:
+            weights = np.ascontiguousarray(weights, dtype='float32')
+            assert weights.shape == (n, )
+            self.train_encoded_c(n, swig_ptr(x), codec,
+                                 index, swig_ptr(weights))
+        else:
+            self.train_encoded_c(n, swig_ptr(x), codec, index)
+
+    replace_method(the_class, 'train', replacement_train)
+    replace_method(the_class, 'train_encoded', replacement_train_encoded)
+
+
+def handle_Clustering1D(the_class):
+
+    def replacement_train_exact(self, x):
+        """Perform clustering on a set of 1D vectors.
+
+        Parameters
+        ----------
+        x : array_like
+            Training vectors, shape (n, 1). `dtype` must be float32.
+        """
+        n, d = x.shape
+        x = np.ascontiguousarray(x, dtype='float32')
+        assert d == self.d
+        self.train_exact_c(n, swig_ptr(x))
+
+    replace_method(the_class, 'train_exact', replacement_train_exact)
+
+
+def handle_Quantizer(the_class):
+
+    def replacement_train(self, x):
+        """ Train the quantizer on a set of training vectors.
+
+        Parameters
+        ----------
+        x : array_like
+            Training vectors, shape (n, self.d). `dtype` must be float32.
+        """
+        n, d = x.shape
+        x = np.ascontiguousarray(x, dtype='float32')
+        assert d == self.d
+        self.train_c(n, swig_ptr(x))
+
+    def replacement_compute_codes(self, x):
+        """ Compute the codes corresponding to a set of vectors.
+
+        Parameters
+        ----------
+        x : array_like
+            Vectors to encode, shape (n, self.d). `dtype` must be float32.
+
+        Returns
+        -------
+        codes : array_like
+            Corresponding code for each vector, shape (n, self.code_size)
+            and `dtype` uint8.
+        """
+        n, d = x.shape
+        x = np.ascontiguousarray(x, dtype='float32')
+        assert d == self.d
+        codes = np.empty((n, self.code_size), dtype='uint8')
+        self.compute_codes_c(swig_ptr(x), swig_ptr(codes), n)
+        return codes
+
+    def replacement_decode(self, codes):
+        """Reconstruct an approximation of vectors given their codes.
+
+        Parameters
+        ----------
+        codes : array_like
+            Codes to decode, shape (n, self.code_size). `dtype` must be uint8.
+
+        Returns
+        -------
+            Reconstructed vectors for each code, shape `(n, d)` and `dtype` float32.
+        """
+        n, cs = codes.shape
+        codes = _check_dtype_uint8(codes)
+        assert cs == self.code_size
+        x = np.empty((n, self.d), dtype='float32')
+        self.decode_c(swig_ptr(codes), swig_ptr(x), n)
+        return x
+
+    replace_method(the_class, 'train', replacement_train)
+    replace_method(the_class, 'compute_codes', replacement_compute_codes)
+    replace_method(the_class, 'decode', replacement_decode)
+
+
+def handle_NSG(the_class):
+
+    def replacement_build(self, x, graph):
+        n, d = x.shape
+        assert d == self.d
+        assert graph.ndim == 2
+        assert graph.shape[0] == n
+        K = graph.shape[1]
+        x = np.ascontiguousarray(x, dtype='float32')
+        graph = np.ascontiguousarray(graph, dtype='int64')
+        self.build_c(n, swig_ptr(x), swig_ptr(graph), K)
+
+    replace_method(the_class, 'build', replacement_build)
+
+
+def handle_Index(the_class):
+
+    def replacement_add(self, x):
+        """Adds vectors to the index.
+        The index must be trained before vectors can be added to it.
+        The vectors are implicitly numbered in sequence. When `n` vectors are
+        added to the index, they are given ids `ntotal`, `ntotal + 1`, ..., `ntotal + n - 1`.
+
+        Parameters
+        ----------
+        x : array_like
+            Query vectors, shape (n, d) where d is appropriate for the index.
+            `dtype` must be float32.
+        """
+
+        n, d = x.shape
+        assert d == self.d
+        x = np.ascontiguousarray(x, dtype='float32')
+        self.add_c(n, swig_ptr(x))
+
+    def replacement_add_with_ids(self, x, ids):
+        """Adds vectors with arbitrary ids to the index (not all indexes support this).
+        The index must be trained before vectors can be added to it.
+        Vector `i` is stored in `x[i]` and has id `ids[i]`.
+
+        Parameters
+        ----------
+        x : array_like
+            Query vectors, shape (n, d) where d is appropriate for the index.
+            `dtype` must be float32.
+        ids : array_like
+            Array if ids of size n. The ids must be of type `int64`. Note that `-1` is reserved
+            in result lists to mean "not found" so it's better to not use it as an id.
+        """
+        n, d = x.shape
+        assert d == self.d
+        x = np.ascontiguousarray(x, dtype='float32')
+        ids = np.ascontiguousarray(ids, dtype='int64')
+        assert ids.shape == (n, ), 'not same nb of vectors as ids'
+        self.add_with_ids_c(n, swig_ptr(x), swig_ptr(ids))
+
+    def replacement_assign(self, x, k, labels=None):
+        """Find the k nearest neighbors of the set of vectors x in the index.
+        This is the same as the `search` method, but discards the distances.
+
+        Parameters
+        ----------
+        x : array_like
+            Query vectors, shape (n, d) where d is appropriate for the index.
+            `dtype` must be float32.
+        k : int
+            Number of nearest neighbors.
+        labels : array_like, optional
+            Labels array to store the results.
+
+        Returns
+        -------
+        labels: array_like
+            Labels of the nearest neighbors, shape (n, k).
+            When not enough results are found, the label is set to -1
+        """
+        n, d = x.shape
+        assert d == self.d
+        x = np.ascontiguousarray(x, dtype='float32')
+
+        if labels is None:
+            labels = np.empty((n, k), dtype=np.int64)
+        else:
+            assert labels.shape == (n, k)
+
+        self.assign_c(n, swig_ptr(x), swig_ptr(labels), k)
+        return labels
+
+    def replacement_train(self, x):
+        """Trains the index on a representative set of vectors.
+        The index must be trained before vectors can be added to it.
+
+        Parameters
+        ----------
+        x : array_like
+            Query vectors, shape (n, d) where d is appropriate for the index.
+            `dtype` must be float32.
+        """
+        n, d = x.shape
+        assert d == self.d
+        x = np.ascontiguousarray(x, dtype='float32')
+        self.train_c(n, swig_ptr(x))
+
+    def replacement_search(self, x, k, *, params=None, D=None, I=None):
+        """Find the k nearest neighbors of the set of vectors x in the index.
+
+        Parameters
+        ----------
+        x : array_like
+            Query vectors, shape (n, d) where d is appropriate for the index.
+            `dtype` must be float32.
+        k : int
+            Number of nearest neighbors.
+        params : SearchParameters
+            Search parameters of the current search (overrides the class-level params)
+        D : array_like, optional
+            Distance array to store the result.
+        I : array_like, optional
+            Labels array to store the results.
+
+        Returns
+        -------
+        D : array_like
+            Distances of the nearest neighbors, shape (n, k). When not enough results are found
+            the label is set to +Inf or -Inf.
+        I : array_like
+            Labels of the nearest neighbors, shape (n, k).
+            When not enough results are found, the label is set to -1
+        """
+
+        n, d = x.shape
+        x = np.ascontiguousarray(x, dtype='float32')
+        assert d == self.d
+
+        assert k > 0
+
+        if D is None:
+            D = np.empty((n, k), dtype=np.float32)
+        else:
+            assert D.shape == (n, k)
+
+        if I is None:
+            I = np.empty((n, k), dtype=np.int64)
+        else:
+            assert I.shape == (n, k)
+
+        self.search_c(n, swig_ptr(x), k, swig_ptr(D), swig_ptr(I), params)
+        return D, I
+
+    def replacement_search_and_reconstruct(self, x, k, *, params=None, D=None, I=None, R=None):
+        """Find the k nearest neighbors of the set of vectors x in the index,
+        and return an approximation of these vectors.
+
+        Parameters
+        ----------
+        x : array_like
+            Query vectors, shape (n, d) where d is appropriate for the index.
+            `dtype` must be float32.
+        k : int
+            Number of nearest neighbors.
+        params : SearchParameters
+            Search parameters of the current search (overrides the class-level params)
+        D : array_like, optional
+            Distance array to store the result.
+        I : array_like, optional
+            Labels array to store the result.
+        R : array_like, optional
+            reconstruction array to store
+
+        Returns
+        -------
+        D : array_like
+            Distances of the nearest neighbors, shape (n, k). When not enough results are found
+            the label is set to +Inf or -Inf.
+        I : array_like
+            Labels of the nearest neighbors, shape (n, k). When not enough results are found,
+            the label is set to -1
+        R : array_like
+            Approximate (reconstructed) nearest neighbor vectors, shape (n, k, d).
+        """
+        n, d = x.shape
+        assert d == self.d
+        x = np.ascontiguousarray(x, dtype='float32')
+
+        assert k > 0
+
+        if D is None:
+            D = np.empty((n, k), dtype=np.float32)
+        else:
+            assert D.shape == (n, k)
+
+        if I is None:
+            I = np.empty((n, k), dtype=np.int64)
+        else:
+            assert I.shape == (n, k)
+
+        if R is None:
+            R = np.empty((n, k, d), dtype=np.float32)
+        else:
+            assert R.shape == (n, k, d)
+
+        self.search_and_reconstruct_c(
+            n, swig_ptr(x),
+            k, swig_ptr(D),
+            swig_ptr(I), swig_ptr(R), params
+        )
+        return D, I, R
+
+    def replacement_remove_ids(self, x):
+        """Remove some ids from the index.
+        This is a O(ntotal) operation by default, so could be expensive.
+
+        Parameters
+        ----------
+        x : array_like or faiss.IDSelector
+            Either an IDSelector that returns True for vectors to remove, or a
+            list of ids to reomove (1D array of int64). When `x` is a list,
+            it is wrapped into an IDSelector.
+
+        Returns
+        -------
+        n_remove: int
+            number of vectors that were removed
+        """
+        if isinstance(x, IDSelector):
+            sel = x
+        else:
+            assert x.ndim == 1
+            index_ivf = try_extract_index_ivf(self)
+            x = np.ascontiguousarray(x, dtype='int64')
+            if index_ivf and index_ivf.direct_map.type == DirectMap.Hashtable:
+                sel = IDSelectorArray(x.size, swig_ptr(x))
+            else:
+                sel = IDSelectorBatch(x.size, swig_ptr(x))
+        return self.remove_ids_c(sel)
+
+    def replacement_reconstruct(self, key, x=None):
+        """Approximate reconstruction of one vector from the index.
+
+        Parameters
+        ----------
+        key : int
+            Id of the vector to reconstruct
+        x : array_like, optional
+            pre-allocated array to store the results
+
+        Returns
+        -------
+        x : array_like reconstructed vector, size `self.d`, `dtype`=float32
+        """
+        if x is None:
+            x = np.empty(self.d, dtype=np.float32)
+        else:
+            assert x.shape == (self.d, )
+
+        self.reconstruct_c(key, swig_ptr(x))
+        return x
+
+    def replacement_reconstruct_batch(self, key, x=None):
+        """Approximate reconstruction of several vectors from the index.
+
+        Parameters
+        ----------
+        key : array of ints
+            Ids of the vectors to reconstruct
+        x : array_like, optional
+            pre-allocated array to store the results
+
+        Returns
+        -------
+        x : array_like
+            reconstrcuted vectors, size `len(key), self.d`
+        """
+        key = np.ascontiguousarray(key, dtype='int64')
+        n, = key.shape
+        if x is None:
+            x = np.empty((n, self.d), dtype=np.float32)
+        else:
+            assert x.shape == (n, self.d)
+        self.reconstruct_batch_c(n, swig_ptr(key), swig_ptr(x))
+        return x
+
+    def replacement_reconstruct_n(self, n0, ni, x=None):
+        """Approximate reconstruction of vectors `n0` ... `n0 + ni - 1` from the index.
+        Missing vectors trigger an exception.
+
+        Parameters
+        ----------
+        n0 : int
+            Id of the first vector to reconstruct
+        ni : int
+            Number of vectors to reconstruct
+        x : array_like, optional
+            pre-allocated array to store the results
+
+        Returns
+        -------
+        x : array_like
+            Reconstructed vectors, size (`ni`, `self.d`), `dtype`=float32
+        """
+        if x is None:
+            x = np.empty((ni, self.d), dtype=np.float32)
+        else:
+            assert x.shape == (ni, self.d)
+
+        self.reconstruct_n_c(n0, ni, swig_ptr(x))
+        return x
+
+    def replacement_update_vectors(self, keys, x):
+        n = keys.size
+        assert keys.shape == (n, )
+        assert x.shape == (n, self.d)
+        x = np.ascontiguousarray(x, dtype='float32')
+        keys = np.ascontiguousarray(keys, dtype='int64')
+        self.update_vectors_c(n, swig_ptr(keys), swig_ptr(x))
+
+    # No support passed-in for output buffers
+    def replacement_range_search(self, x, thresh, *, params=None):
+        """Search vectors that are within a distance of the query vectors.
+
+        Parameters
+        ----------
+        x : array_like
+            Query vectors, shape (n, d) where d is appropriate for the index.
+            `dtype` must be float32.
+        thresh : float
+            Threshold to select neighbors. All elements within this radius are returned,
+            except for maximum inner product indexes, where the elements above the
+            threshold are returned
+        params : SearchParameters
+            Search parameters of the current search (overrides the class-level params)
+
+
+        Returns
+        -------
+        lims: array_like
+            Startring index of the results for each query vector, size n+1.
+        D : array_like
+            Distances of the nearest neighbors, shape `lims[n]`. The distances for
+            query i are in `D[lims[i]:lims[i+1]]`.
+        I : array_like
+            Labels of nearest neighbors, shape `lims[n]`. The labels for query i
+            are in `I[lims[i]:lims[i+1]]`.
+
+        """
+        n, d = x.shape
+        assert d == self.d
+        x = np.ascontiguousarray(x, dtype='float32')
+
+        res = RangeSearchResult(n)
+        self.range_search_c(n, swig_ptr(x), thresh, res, params)
+        # get pointers and copy them
+        lims = rev_swig_ptr(res.lims, n + 1).copy()
+        nd = int(lims[-1])
+        D = rev_swig_ptr(res.distances, nd).copy()
+        I = rev_swig_ptr(res.labels, nd).copy()
+        return lims, D, I
+
+    def replacement_sa_encode(self, x, codes=None):
+        n, d = x.shape
+        assert d == self.d
+        x = np.ascontiguousarray(x, dtype='float32')
+
+        if codes is None:
+            codes = np.empty((n, self.sa_code_size()), dtype=np.uint8)
+        else:
+            assert codes.shape == (n, self.sa_code_size())
+
+        self.sa_encode_c(n, swig_ptr(x), swig_ptr(codes))
+        return codes
+
+    def replacement_sa_decode(self, codes, x=None):
+        n, cs = codes.shape
+        assert cs == self.sa_code_size()
+        codes = _check_dtype_uint8(codes)
+
+        if x is None:
+            x = np.empty((n, self.d), dtype=np.float32)
+        else:
+            assert x.shape == (n, self.d)
+
+        self.sa_decode_c(n, swig_ptr(codes), swig_ptr(x))
+        return x
+
+    def replacement_add_sa_codes(self, codes, ids=None):
+        n, cs = codes.shape
+        assert cs == self.sa_code_size()
+        codes = _check_dtype_uint8(codes)
+
+        if ids is not None:
+            assert ids.shape == (n,)
+            ids = swig_ptr(ids)
+        self.add_sa_codes_c(n, swig_ptr(codes), ids)
+
+    replace_method(the_class, 'add', replacement_add)
+    replace_method(the_class, 'add_with_ids', replacement_add_with_ids)
+    replace_method(the_class, 'assign', replacement_assign)
+    replace_method(the_class, 'train', replacement_train)
+    replace_method(the_class, 'search', replacement_search)
+    replace_method(the_class, 'remove_ids', replacement_remove_ids)
+    replace_method(the_class, 'reconstruct', replacement_reconstruct)
+    replace_method(the_class, 'reconstruct_batch',
+                   replacement_reconstruct_batch)
+    replace_method(the_class, 'reconstruct_n', replacement_reconstruct_n)
+    replace_method(the_class, 'range_search', replacement_range_search)
+    replace_method(the_class, 'update_vectors', replacement_update_vectors,
+                   ignore_missing=True)
+    replace_method(the_class, 'search_and_reconstruct',
+                   replacement_search_and_reconstruct, ignore_missing=True)
+    replace_method(the_class, 'sa_encode', replacement_sa_encode)
+    replace_method(the_class, 'sa_decode', replacement_sa_decode)
+    replace_method(the_class, 'add_sa_codes', replacement_add_sa_codes,
+                   ignore_missing=True)
+
+    # get/set state for pickle
+    # the data is serialized to std::vector -> numpy array -> python bytes
+    # so not very efficient for now.
+
+    def index_getstate(self):
+        return {"this": faiss.serialize_index(self).tobytes()}
+
+    def index_setstate(self, st):
+        index2 = faiss.deserialize_index(np.frombuffer(st["this"], dtype="uint8"))
+        self.this = index2.this
+
+    the_class.__getstate__ = index_getstate
+    the_class.__setstate__ = index_setstate
+
+
+def handle_IndexBinary(the_class):
+
+    def replacement_add(self, x):
+        n, d = x.shape
+        x = _check_dtype_uint8(x)
+        assert d * 8 == self.d
+        self.add_c(n, swig_ptr(x))
+
+    def replacement_add_with_ids(self, x, ids):
+        n, d = x.shape
+        x = _check_dtype_uint8(x)
+        ids = np.ascontiguousarray(ids, dtype='int64')
+        assert d * 8 == self.d
+        assert ids.shape == (n, ), 'not same nb of vectors as ids'
+        self.add_with_ids_c(n, swig_ptr(x), swig_ptr(ids))
+
+    def replacement_train(self, x):
+        n, d = x.shape
+        x = _check_dtype_uint8(x)
+        assert d * 8 == self.d
+        self.train_c(n, swig_ptr(x))
+
+    def replacement_reconstruct(self, key):
+        x = np.empty(self.d // 8, dtype=np.uint8)
+        self.reconstruct_c(key, swig_ptr(x))
+        return x
+
+    def replacement_search(self, x, k):
+        x = _check_dtype_uint8(x)
+        n, d = x.shape
+        assert d * 8 == self.d
+        assert k > 0
+        distances = np.empty((n, k), dtype=np.int32)
+        labels = np.empty((n, k), dtype=np.int64)
+        self.search_c(n, swig_ptr(x),
+                      k, swig_ptr(distances),
+                      swig_ptr(labels))
+        return distances, labels
+
+    def replacement_range_search(self, x, thresh):
+        n, d = x.shape
+        x = _check_dtype_uint8(x)
+        assert d * 8 == self.d
+        res = RangeSearchResult(n)
+        self.range_search_c(n, swig_ptr(x), thresh, res)
+        # get pointers and copy them
+        lims = rev_swig_ptr(res.lims, n + 1).copy()
+        nd = int(lims[-1])
+        D = rev_swig_ptr(res.distances, nd).copy()
+        I = rev_swig_ptr(res.labels, nd).copy()
+        return lims, D, I
+
+    def replacement_remove_ids(self, x):
+        if isinstance(x, IDSelector):
+            sel = x
+        else:
+            assert x.ndim == 1
+            x = np.ascontiguousarray(x, dtype='int64')
+            sel = IDSelectorBatch(x.size, swig_ptr(x))
+        return self.remove_ids_c(sel)
+
+    replace_method(the_class, 'add', replacement_add)
+    replace_method(the_class, 'add_with_ids', replacement_add_with_ids)
+    replace_method(the_class, 'train', replacement_train)
+    replace_method(the_class, 'search', replacement_search)
+    replace_method(the_class, 'range_search', replacement_range_search)
+    replace_method(the_class, 'reconstruct', replacement_reconstruct)
+    replace_method(the_class, 'remove_ids', replacement_remove_ids)
+
+
+def handle_VectorTransform(the_class):
+
+    def apply_method(self, x):
+        n, d = x.shape
+        x = np.ascontiguousarray(x, dtype='float32')
+        assert d == self.d_in
+        y = np.empty((n, self.d_out), dtype=np.float32)
+        self.apply_noalloc(n, swig_ptr(x), swig_ptr(y))
+        return y
+
+    def replacement_reverse_transform(self, x):
+        n, d = x.shape
+        x = np.ascontiguousarray(x, dtype='float32')
+        assert d == self.d_out
+        y = np.empty((n, self.d_in), dtype=np.float32)
+        self.reverse_transform_c(n, swig_ptr(x), swig_ptr(y))
+        return y
+
+    def replacement_vt_train(self, x):
+        n, d = x.shape
+        x = np.ascontiguousarray(x, dtype='float32')
+        assert d == self.d_in
+        self.train_c(n, swig_ptr(x))
+
+    replace_method(the_class, 'train', replacement_vt_train)
+    # apply is reserved in Pyton...
+    the_class.apply_py = apply_method
+    the_class.apply = apply_method
+    replace_method(the_class, 'reverse_transform',
+                   replacement_reverse_transform)
+
+
+def handle_AutoTuneCriterion(the_class):
+    def replacement_set_groundtruth(self, D, I):
+        if D:
+            assert I.shape == D.shape
+        self.nq, self.gt_nnn = I.shape
+        self.set_groundtruth_c(
+            self.gt_nnn, swig_ptr(D) if D else None, swig_ptr(I))
+
+    def replacement_evaluate(self, D, I):
+        assert I.shape == D.shape
+        assert I.shape == (self.nq, self.nnn)
+        return self.evaluate_c(swig_ptr(D), swig_ptr(I))
+
+    replace_method(the_class, 'set_groundtruth', replacement_set_groundtruth)
+    replace_method(the_class, 'evaluate', replacement_evaluate)
+
+
+def handle_ParameterSpace(the_class):
+    def replacement_explore(self, index, xq, crit):
+        assert xq.shape == (crit.nq, index.d)
+        xq = np.ascontiguousarray(xq, dtype='float32')
+        ops = OperatingPoints()
+        self.explore_c(index, crit.nq, swig_ptr(xq),
+                       crit, ops)
+        return ops
+    replace_method(the_class, 'explore', replacement_explore)
+
+
+def handle_MatrixStats(the_class):
+    original_init = the_class.__init__
+
+    def replacement_init(self, m):
+        assert len(m.shape) == 2
+        m = np.ascontiguousarray(m, dtype='float32')
+        original_init(self, m.shape[0], m.shape[1], swig_ptr(m))
+
+    the_class.__init__ = replacement_init
+
+
+def handle_IOWriter(the_class):
+
+    def write_bytes(self, b):
+        return self(swig_ptr(b), 1, len(b))
+
+    the_class.write_bytes = write_bytes
+
+
+def handle_IOReader(the_class):
+
+    def read_bytes(self, totsz):
+        buf = bytearray(totsz)
+        was_read = self(swig_ptr(buf), 1, len(buf))
+        return bytes(buf[:was_read])
+
+    the_class.read_bytes = read_bytes
+
+
+def handle_IndexRowwiseMinMax(the_class):
+    def replacement_train_inplace(self, x):
+        """Trains the index on a representative set of vectors inplace.
+        The index must be trained before vectors can be added to it.
+
+        This call WILL change the values in the input array, because
+        of two scaling proceduces being performed inplace.
+
+        Parameters
+        ----------
+        x : array_like
+            Query vectors, shape (n, d) where d is appropriate for the index.
+            `dtype` must be float32.
+        """
+        n, d = x.shape
+        assert d == self.d
+        x = np.ascontiguousarray(x, dtype='float32')
+        self.train_inplace_c(n, swig_ptr(x))
+
+    replace_method(the_class, 'train_inplace', replacement_train_inplace)
+
+
+######################################################
+# MapLong2Long interface
+######################################################
+
+
+def handle_MapLong2Long(the_class):
+
+    def replacement_map_add(self, keys, vals):
+        n, = keys.shape
+        assert (n,) == keys.shape
+        self.add_c(n, swig_ptr(keys), swig_ptr(vals))
+
+    def replacement_map_search_multiple(self, keys):
+        n, = keys.shape
+        vals = np.empty(n, dtype='int64')
+        self.search_multiple_c(n, swig_ptr(keys), swig_ptr(vals))
+        return vals
+
+    replace_method(the_class, 'add', replacement_map_add)
+    replace_method(the_class, 'search_multiple',
+                replacement_map_search_multiple)
+
+
+def handle_SearchParameters(the_class):
+    """ this wrapper is to enable initializations of the form
+    SearchParametersXX(a=3, b=SearchParamsYY)
+    This also requires the enclosing class to keep a reference on the
+    sub-object
+    """
+    the_class.original_init = the_class.__init__
+
+    def replacement_init(self, **args):
+        self.original_init()
+        self.referenced_objects = []
+        for k, v in args.items():
+            assert hasattr(self, k)
+            setattr(self, k, v)
+            if inspect.isclass(v):
+                self.referenced_objects.append(v)
+
+    the_class.__init__ = replacement_init
diff --git a/faiss/python/extra_wrappers.py b/faiss/python/extra_wrappers.py
new file mode 100644
index 000000000..2af39b23b
--- /dev/null
+++ b/faiss/python/extra_wrappers.py
@@ -0,0 +1,362 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# @nolint
+
+# not linting this file because it imports * from swigfaiss, which
+# causes a ton of useless warnings.
+
+import numpy as np
+
+from faiss.loader import *
+
+import faiss
+
+###########################################
+# Wrapper for a few functions
+###########################################
+
+
+def kmin(array, k):
+    """return k smallest values (and their indices) of the lines of a
+    float32 array"""
+    array = np.ascontiguousarray(array, dtype='float32')
+    m, n = array.shape
+    I = np.zeros((m, k), dtype='int64')
+    D = np.zeros((m, k), dtype='float32')
+    ha = faiss.float_maxheap_array_t()
+    ha.ids = swig_ptr(I)
+    ha.val = swig_ptr(D)
+    ha.nh = m
+    ha.k = k
+    ha.heapify()
+    ha.addn(n, swig_ptr(array))
+    ha.reorder()
+    return D, I
+
+
+def kmax(array, k):
+    """return k largest values (and their indices) of the lines of a
+    float32 array"""
+    array = np.ascontiguousarray(array, dtype='float32')
+    m, n = array.shape
+    I = np.zeros((m, k), dtype='int64')
+    D = np.zeros((m, k), dtype='float32')
+    ha = faiss.float_minheap_array_t()
+    ha.ids = swig_ptr(I)
+    ha.val = swig_ptr(D)
+    ha.nh = m
+    ha.k = k
+    ha.heapify()
+    ha.addn(n, swig_ptr(array))
+    ha.reorder()
+    return D, I
+
+
+def pairwise_distances(xq, xb, mt=METRIC_L2, metric_arg=0):
+    """compute the whole pairwise distance matrix between two sets of
+    vectors"""
+    xq = np.ascontiguousarray(xq, dtype='float32')
+    xb = np.ascontiguousarray(xb, dtype='float32')
+    nq, d = xq.shape
+    nb, d2 = xb.shape
+    assert d == d2
+    dis = np.empty((nq, nb), dtype='float32')
+    if mt == METRIC_L2:
+        pairwise_L2sqr(
+            d, nq, swig_ptr(xq),
+            nb, swig_ptr(xb),
+            swig_ptr(dis))
+    else:
+        pairwise_extra_distances(
+            d, nq, swig_ptr(xq),
+            nb, swig_ptr(xb),
+            mt, metric_arg,
+            swig_ptr(dis))
+    return dis
+
+
+def rand(n, seed=12345):
+    res = np.empty(n, dtype='float32')
+    float_rand(swig_ptr(res), res.size, seed)
+    return res
+
+
+def randint(n, seed=12345, vmax=None):
+    res = np.empty(n, dtype='int64')
+    if vmax is None:
+        int64_rand(swig_ptr(res), res.size, seed)
+    else:
+        int64_rand_max(swig_ptr(res), res.size, vmax, seed)
+    return res
+
+
+lrand = randint
+
+
+def randn(n, seed=12345):
+    res = np.empty(n, dtype='float32')
+    float_randn(swig_ptr(res), res.size, seed)
+    return res
+
+
+rand_smooth_vectors_c = rand_smooth_vectors
+
+
+def rand_smooth_vectors(n, d, seed=1234):
+    res = np.empty((n, d), dtype='float32')
+    rand_smooth_vectors_c(n, d, swig_ptr(res), seed)
+    return res
+
+
+def eval_intersection(I1, I2):
+    """ size of intersection between each line of two result tables"""
+    I1 = np.ascontiguousarray(I1, dtype='int64')
+    I2 = np.ascontiguousarray(I2, dtype='int64')
+    n = I1.shape[0]
+    assert I2.shape[0] == n
+    k1, k2 = I1.shape[1], I2.shape[1]
+    ninter = 0
+    for i in range(n):
+        ninter += ranklist_intersection_size(
+            k1, swig_ptr(I1[i]), k2, swig_ptr(I2[i]))
+    return ninter
+
+
+def normalize_L2(x):
+    fvec_renorm_L2(x.shape[1], x.shape[0], swig_ptr(x))
+
+
+###########################################
+# ResultHeap
+###########################################
+
+class ResultHeap:
+    """Accumulate query results from a sliced dataset. The final result will
+    be in self.D, self.I."""
+
+    def __init__(self, nq, k, keep_max=False):
+        " nq: number of query vectors, k: number of results per query "
+        self.I = np.zeros((nq, k), dtype='int64')
+        self.D = np.zeros((nq, k), dtype='float32')
+        self.nq, self.k = nq, k
+        if keep_max:
+            heaps = float_minheap_array_t()
+        else:
+            heaps = float_maxheap_array_t()
+        heaps.k = k
+        heaps.nh = nq
+        heaps.val = swig_ptr(self.D)
+        heaps.ids = swig_ptr(self.I)
+        heaps.heapify()
+        self.heaps = heaps
+
+    def add_result(self, D, I):
+        """D, I do not need to be in a particular order (heap or sorted)"""
+        nq, kd = D.shape
+        D = np.ascontiguousarray(D, dtype='float32')
+        I = np.ascontiguousarray(I, dtype='int64')
+        assert I.shape == (nq, kd)
+        assert nq == self.nq
+        self.heaps.addn_with_ids(
+            kd, swig_ptr(D),
+            swig_ptr(I), kd)
+
+    def finalize(self):
+        self.heaps.reorder()
+
+
+
+######################################################
+# KNN function
+######################################################
+
+def knn(xq, xb, k, metric=METRIC_L2):
+    """
+    Compute the k nearest neighbors of a vector without constructing an index
+
+
+    Parameters
+    ----------
+    xq : array_like
+        Query vectors, shape (nq, d) where d is appropriate for the index.
+        `dtype` must be float32.
+    xb : array_like
+        Database vectors, shape (nb, d) where d is appropriate for the index.
+        `dtype` must be float32.
+    k : int
+        Number of nearest neighbors.
+    distance_type : MetricType, optional
+        distance measure to use (either METRIC_L2 or METRIC_INNER_PRODUCT)
+
+    Returns
+    -------
+    D : array_like
+        Distances of the nearest neighbors, shape (nq, k)
+    I : array_like
+        Labels of the nearest neighbors, shape (nq, k)
+    """
+    xq = np.ascontiguousarray(xq, dtype='float32')
+    xb = np.ascontiguousarray(xb, dtype='float32')
+    nq, d = xq.shape
+    nb, d2 = xb.shape
+    assert d == d2
+
+    I = np.empty((nq, k), dtype='int64')
+    D = np.empty((nq, k), dtype='float32')
+
+    if metric == METRIC_L2:
+        knn_L2sqr(
+            swig_ptr(xq), swig_ptr(xb),
+            d, nq, nb, k, swig_ptr(D), swig_ptr(I)
+        )
+    elif metric == METRIC_INNER_PRODUCT:
+        knn_inner_product(
+            swig_ptr(xq), swig_ptr(xb),
+            d, nq, nb, k, swig_ptr(D), swig_ptr(I)
+        )
+    else:
+        raise NotImplementedError("only L2 and INNER_PRODUCT are supported")
+    return D, I
+
+
+###########################################
+# Kmeans object
+###########################################
+
+
+class Kmeans:
+    """Object that performs k-means clustering and manages the centroids.
+    The `Kmeans` class is essentially a wrapper around the C++ `Clustering` object.
+
+    Parameters
+    ----------
+    d : int
+       dimension of the vectors to cluster
+    k : int
+       number of clusters
+    gpu: bool or int, optional
+       False: don't use GPU
+       True: use all GPUs
+       number: use this many GPUs
+    progressive_dim_steps:
+        use a progressive dimension clustering (with that number of steps)
+
+    Subsequent parameters are fields of the Clustring object. The most important are:
+
+    niter: int, optional
+       clustering iterations
+    nredo: int, optional
+       redo clustering this many times and keep best
+    verbose: bool, optional
+    spherical: bool, optional
+       do we want normalized centroids?
+    int_centroids: bool, optional
+       round centroids coordinates to integer
+    seed: int, optional
+       seed for the random number generator
+
+    """
+
+    def __init__(self, d, k, **kwargs):
+        """d: input dimension, k: nb of centroids. Additional
+         parameters are passed on the ClusteringParameters object,
+         including niter=25, verbose=False, spherical = False
+        """
+        self.d = d
+        self.k = k
+        self.gpu = False
+        if "progressive_dim_steps" in kwargs:
+            self.cp = ProgressiveDimClusteringParameters()
+        else:
+            self.cp = ClusteringParameters()
+        for k, v in kwargs.items():
+            if k == 'gpu':
+                if v == True or v == -1:
+                    v = get_num_gpus()
+                self.gpu = v
+            else:
+                # if this raises an exception, it means that it is a non-existent field
+                getattr(self.cp, k)
+                setattr(self.cp, k, v)
+        self.centroids = None
+
+    def train(self, x, weights=None, init_centroids=None):
+        """ Perform k-means clustering.
+        On output of the function call:
+
+        - the centroids are in the centroids field of size (`k`, `d`).
+
+        - the objective value at each iteration is in the array obj (size `niter`)
+
+        - detailed optimization statistics are in the array iteration_stats.
+
+        Parameters
+        ----------
+        x : array_like
+            Training vectors, shape (n, d), `dtype` must be float32 and n should
+            be larger than the number of clusters `k`.
+        weights : array_like
+            weight associated to each vector, shape `n`
+        init_centroids : array_like
+            initial set of centroids, shape (n, d)
+
+        Returns
+        -------
+        final_obj: float
+            final optimization objective
+
+        """
+        x = np.ascontiguousarray(x, dtype='float32')
+        n, d = x.shape
+        assert d == self.d
+
+        if self.cp.__class__ == ClusteringParameters:
+            # regular clustering
+            clus = Clustering(d, self.k, self.cp)
+            if init_centroids is not None:
+                nc, d2 = init_centroids.shape
+                assert d2 == d
+                faiss.copy_array_to_vector(init_centroids.ravel(), clus.centroids)
+            if self.cp.spherical:
+                self.index = IndexFlatIP(d)
+            else:
+                self.index = IndexFlatL2(d)
+            if self.gpu:
+                self.index = faiss.index_cpu_to_all_gpus(self.index, ngpu=self.gpu)
+            clus.train(x, self.index, weights)
+        else:
+            # not supported for progressive dim
+            assert weights is None
+            assert init_centroids is None
+            assert not self.cp.spherical
+            clus = ProgressiveDimClustering(d, self.k, self.cp)
+            if self.gpu:
+                fac = GpuProgressiveDimIndexFactory(ngpu=self.gpu)
+            else:
+                fac = ProgressiveDimIndexFactory()
+            clus.train(n, swig_ptr(x), fac)
+
+        centroids = faiss.vector_float_to_array(clus.centroids)
+
+        self.centroids = centroids.reshape(self.k, d)
+        stats = clus.iteration_stats
+        stats = [stats.at(i) for i in range(stats.size())]
+        self.obj = np.array([st.obj for st in stats])
+        # copy all the iteration_stats objects to a python array
+        stat_fields = 'obj time time_search imbalance_factor nsplit'.split()
+        self.iteration_stats = [
+            {field: getattr(st, field) for field in stat_fields}
+            for st in stats
+        ]
+        return self.obj[-1] if self.obj.size > 0 else 0.0
+
+    def assign(self, x):
+        x = np.ascontiguousarray(x, dtype='float32')
+        assert self.centroids is not None, "should train before assigning"
+        self.index.reset()
+        self.index.add(self.centroids)
+        D, I = self.index.search(x, 1)
+        return D.ravel(), I.ravel()
diff --git a/faiss/python/gpu_wrappers.py b/faiss/python/gpu_wrappers.py
new file mode 100644
index 000000000..f9bb21295
--- /dev/null
+++ b/faiss/python/gpu_wrappers.py
@@ -0,0 +1,263 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# @nolint
+
+# not linting this file because it imports * from swigfaiss, which
+# causes a ton of useless warnings.
+
+import numpy as np
+
+from faiss.loader import *
+
+
+###########################################
+# GPU functions
+###########################################
+
+
+def index_cpu_to_gpu_multiple_py(resources, index, co=None, gpus=None):
+    """ builds the C++ vectors for the GPU indices and the
+    resources. Handles the case where the resources are assigned to
+    the list of GPUs """
+    if gpus is None:
+        gpus = range(len(resources))
+    vres = GpuResourcesVector()
+    vdev = Int32Vector()
+    for i, res in zip(gpus, resources):
+        vdev.push_back(i)
+        vres.push_back(res)
+    index = index_cpu_to_gpu_multiple(vres, vdev, index, co)
+    return index
+
+
+def index_cpu_to_all_gpus(index, co=None, ngpu=-1):
+    index_gpu = index_cpu_to_gpus_list(index, co=co, gpus=None, ngpu=ngpu)
+    return index_gpu
+
+
+def index_cpu_to_gpus_list(index, co=None, gpus=None, ngpu=-1):
+    """ Here we can pass list of GPU ids as a parameter or ngpu to
+    use first n GPU's. gpus mut be a list or None"""
+    if (gpus is None) and (ngpu == -1):  # All blank
+        gpus = range(get_num_gpus())
+    elif (gpus is None) and (ngpu != -1):  # Get number of GPU's only
+        gpus = range(ngpu)
+    res = [StandardGpuResources() for _ in gpus]
+    index_gpu = index_cpu_to_gpu_multiple_py(res, index, co, gpus)
+    return index_gpu
+
+# allows numpy ndarray usage with bfKnn
+
+
+def knn_gpu(res, xq, xb, k, D=None, I=None, metric=METRIC_L2):
+    """
+    Compute the k nearest neighbors of a vector on one GPU without constructing an index
+
+    Parameters
+    ----------
+    res : StandardGpuResources
+        GPU resources to use during computation
+    xq : array_like
+        Query vectors, shape (nq, d) where d is appropriate for the index.
+        `dtype` must be float32.
+    xb : array_like
+        Database vectors, shape (nb, d) where d is appropriate for the index.
+        `dtype` must be float32.
+    k : int
+        Number of nearest neighbors.
+    D : array_like, optional
+        Output array for distances of the nearest neighbors, shape (nq, k)
+    I : array_like, optional
+        Output array for the nearest neighbors, shape (nq, k)
+    distance_type : MetricType, optional
+        distance measure to use (either METRIC_L2 or METRIC_INNER_PRODUCT)
+
+    Returns
+    -------
+    D : array_like
+        Distances of the nearest neighbors, shape (nq, k)
+    I : array_like
+        Labels of the nearest neighbors, shape (nq, k)
+    """
+    nq, d = xq.shape
+    if xq.flags.c_contiguous:
+        xq_row_major = True
+    elif xq.flags.f_contiguous:
+        xq = xq.T
+        xq_row_major = False
+    else:
+        xq = np.ascontiguousarray(xq, dtype='float32')
+        xq_row_major = True
+
+    xq_ptr = swig_ptr(xq)
+
+    if xq.dtype == np.float32:
+        xq_type = DistanceDataType_F32
+    elif xq.dtype == np.float16:
+        xq_type = DistanceDataType_F16
+    else:
+        raise TypeError('xq must be f32 or f16')
+
+    nb, d2 = xb.shape
+    assert d2 == d
+    if xb.flags.c_contiguous:
+        xb_row_major = True
+    elif xb.flags.f_contiguous:
+        xb = xb.T
+        xb_row_major = False
+    else:
+        xb = np.ascontiguousarray(xb, dtype='float32')
+        xb_row_major = True
+
+    xb_ptr = swig_ptr(xb)
+
+    if xb.dtype == np.float32:
+        xb_type = DistanceDataType_F32
+    elif xb.dtype == np.float16:
+        xb_type = DistanceDataType_F16
+    else:
+        raise TypeError('xb must be float32 or float16')
+
+    if D is None:
+        D = np.empty((nq, k), dtype=np.float32)
+    else:
+        assert D.shape == (nq, k)
+        # interface takes void*, we need to check this
+        assert D.dtype == np.float32
+
+    D_ptr = swig_ptr(D)
+
+    if I is None:
+        I = np.empty((nq, k), dtype=np.int64)
+    else:
+        assert I.shape == (nq, k)
+
+    I_ptr = swig_ptr(I)
+
+    if I.dtype == np.int64:
+        I_type = IndicesDataType_I64
+    elif I.dtype == I.dtype == np.int32:
+        I_type = IndicesDataType_I32
+    else:
+        raise TypeError('I must be i64 or i32')
+
+    args = GpuDistanceParams()
+    args.metric = metric
+    args.k = k
+    args.dims = d
+    args.vectors = xb_ptr
+    args.vectorsRowMajor = xb_row_major
+    args.vectorType = xb_type
+    args.numVectors = nb
+    args.queries = xq_ptr
+    args.queriesRowMajor = xq_row_major
+    args.queryType = xq_type
+    args.numQueries = nq
+    args.outDistances = D_ptr
+    args.outIndices = I_ptr
+    args.outIndicesType = I_type
+
+    # no stream synchronization needed, inputs and outputs are guaranteed to
+    # be on the CPU (numpy arrays)
+    bfKnn(res, args)
+
+    return D, I
+
+# allows numpy ndarray usage with bfKnn for all pairwise distances
+
+
+def pairwise_distance_gpu(res, xq, xb, D=None, metric=METRIC_L2):
+    """
+    Compute all pairwise distances between xq and xb on one GPU without constructing an index
+
+    Parameters
+    ----------
+    res : StandardGpuResources
+        GPU resources to use during computation
+    xq : array_like
+        Query vectors, shape (nq, d) where d is appropriate for the index.
+        `dtype` must be float32.
+    xb : array_like
+        Database vectors, shape (nb, d) where d is appropriate for the index.
+        `dtype` must be float32.
+    D : array_like, optional
+        Output array for all pairwise distances, shape (nq, nb)
+    distance_type : MetricType, optional
+        distance measure to use (either METRIC_L2 or METRIC_INNER_PRODUCT)
+
+    Returns
+    -------
+    D : array_like
+        All pairwise distances, shape (nq, nb)
+    """
+    nq, d = xq.shape
+    if xq.flags.c_contiguous:
+        xq_row_major = True
+    elif xq.flags.f_contiguous:
+        xq = xq.T
+        xq_row_major = False
+    else:
+        raise TypeError(
+            'xq matrix should be row (C) or column-major (Fortran)')
+
+    xq_ptr = swig_ptr(xq)
+
+    if xq.dtype == np.float32:
+        xq_type = DistanceDataType_F32
+    elif xq.dtype == np.float16:
+        xq_type = DistanceDataType_F16
+    else:
+        xq = np.ascontiguousarray(xb, dtype='float32')
+        xq_row_major = True
+
+    nb, d2 = xb.shape
+    assert d2 == d
+    if xb.flags.c_contiguous:
+        xb_row_major = True
+    elif xb.flags.f_contiguous:
+        xb = xb.T
+        xb_row_major = False
+    else:
+        xb = np.ascontiguousarray(xb, dtype='float32')
+        xb_row_major = True
+
+    xb_ptr = swig_ptr(xb)
+
+    if xb.dtype == np.float32:
+        xb_type = DistanceDataType_F32
+    elif xb.dtype == np.float16:
+        xb_type = DistanceDataType_F16
+    else:
+        raise TypeError('xb must be float32 or float16')
+
+    if D is None:
+        D = np.empty((nq, nb), dtype=np.float32)
+    else:
+        assert D.shape == (nq, nb)
+        # interface takes void*, we need to check this
+        assert D.dtype == np.float32
+
+    D_ptr = swig_ptr(D)
+
+    args = GpuDistanceParams()
+    args.metric = metric
+    args.k = -1  # selects all pairwise distances
+    args.dims = d
+    args.vectors = xb_ptr
+    args.vectorsRowMajor = xb_row_major
+    args.vectorType = xb_type
+    args.numVectors = nb
+    args.queries = xq_ptr
+    args.queriesRowMajor = xq_row_major
+    args.queryType = xq_type
+    args.numQueries = nq
+    args.outDistances = D_ptr
+
+    # no stream synchronization needed, inputs and outputs are guaranteed to
+    # be on the CPU (numpy arrays)
+    bfKnn(res, args)
+
+    return D
diff --git a/faiss/python/setup.py b/faiss/python/setup.py
index 9ef3a0343..a1c8b2583 100644
--- a/faiss/python/setup.py
+++ b/faiss/python/setup.py
@@ -15,6 +15,10 @@ os.mkdir("faiss")
 shutil.copytree("contrib", "faiss/contrib")
 shutil.copyfile("__init__.py", "faiss/__init__.py")
 shutil.copyfile("loader.py", "faiss/loader.py")
+shutil.copyfile("class_wrappers.py", "faiss/class_wrappers.py")
+shutil.copyfile("gpu_wrappers.py", "faiss/gpu_wrappers.py")
+shutil.copyfile("extra_wrappers.py", "faiss/extra_wrappers.py")
+shutil.copyfile("array_conversions.py", "faiss/array_conversions.py")
 
 ext = ".pyd" if platform.system() == 'Windows' else ".so"
 prefix = "Release/" * (platform.system() == 'Windows')