faiss-gpu-raft package (#2992)

Summary: Pull Request resolved: https://github.com/facebookresearch/faiss/pull/2992 Reviewed By: mdouze Differential Revision: D48391366 Pulled By: algoriddle fbshipit-source-id: 94b7f62afc8a09a9feaea47bf60e5358d89fcde5
2023-08-16 09:30:41 -07:00 · 2023-08-16 09:30:41 -07:00 · 2768fb38b2
parent c09992bc8a
commit 2768fb38b2
18 changed files with 283 additions and 52 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -17,11 +17,6 @@ executors:
    machine:
      image: ubuntu-2004:current
    resource_class: arm.medium
-  macosx-x86_64-cpu:
-    environment:
-      CONDA_ARCH: MacOSX-x86_64
-    macos:
-      xcode: 11.7.0  # max supported for conda build, https://circleci.com/docs/using-macos#supported-xcode-versions
  macosx-arm64-cpu:
    environment:
      CONDA_ARCH: MacOSX-arm64
@ -66,6 +61,9 @@ jobs:
      cuda:
        type: string
        default: ""
+      raft:
+        type: string
+        default: ""
      cuda_archs:
        type: string
        default: ""
@ -93,6 +91,8 @@ jobs:
      - run:
          name: Install conda build tools
          command: |
+            conda config --set solver libmamba
+            # conda config --set verbosity 3
            conda update -y -q conda
            conda install -y -q conda-build
      - when:
@ -105,14 +105,16 @@ jobs:
                  conda config --set anaconda_upload yes
      - when:
          condition:
-              not: << parameters.label >>
+            and:
+              - not: << parameters.label >>
+              - not: << parameters.cuda >>
          steps:
            - run:
                name: Conda build (CPU)
                no_output_timeout: 30m
                command: |
                  cd conda
-                  conda build faiss --python 3.10 -c pytorch -c pkgs/main -c conda-forge
+                  conda build faiss --python 3.11 -c pytorch
      - when:
          condition:
            and:
@ -124,12 +126,28 @@ jobs:
                no_output_timeout: 30m
                command: |
                  cd conda
-                  conda build faiss --user pytorch --label <<parameters.label>> -c pytorch -c pkgs/main -c conda-forge
+                  conda build faiss --user pytorch --label <<parameters.label>> -c pytorch
+      - when:
+          condition:
+            and:
+              - not: << parameters.label >>
+              - << parameters.cuda >>
+              - not: << parameters.raft >>
+          steps:
+            - run:
+                name: Conda build (GPU)
+                no_output_timeout: 60m
+                command: |
+                  sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
+                  cd conda
+                  conda build faiss-gpu --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
+                      -c pytorch -c nvidia
      - when:
          condition:
            and:
              - << parameters.label >>
              - << parameters.cuda >>
+              - not: << parameters.raft >>
          steps:
            - run:
                name: Conda build (GPU) w/ anaconda upload
@ -138,7 +156,37 @@ jobs:
                  sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
                  cd conda
                  conda build faiss-gpu --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
-                      --user pytorch --label <<parameters.label>> -c pytorch -c nvidia -c pkgs/main -c conda-forge
+                      --user pytorch --label <<parameters.label>> -c pytorch -c nvidia
+      - when:
+          condition:
+            and:
+              - not: << parameters.label >>
+              - << parameters.cuda >>
+              - << parameters.raft >>
+          steps:
+            - run:
+                name: Conda build (GPU w/ RAFT)
+                no_output_timeout: 60m
+                command: |
+                  sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
+                  cd conda
+                  conda build faiss-gpu-raft --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
+                      -c pytorch -c nvidia -c rapidsai -c conda-forge
+      - when:
+          condition:
+            and:
+              - << parameters.label >>
+              - << parameters.cuda >>
+              - << parameters.raft >>
+          steps:
+            - run:
+                name: Conda build (GPU w/ RAFT) w/ anaconda upload
+                no_output_timeout: 60m
+                command: |
+                  sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
+                  cd conda
+                  conda build faiss-gpu-raft --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
+                      --user pytorch --label <<parameters.label>> -c pytorch -c nvidia -c rapidsai -c conda-forge

  build_cmake:
    parameters:
@ -180,7 +228,7 @@ jobs:
          command: |
            conda config --set solver libmamba
            conda update -y -q conda
-            conda install -y -q pkgs/main::python=3.10 cmake make swig mkl=2023 mkl-devel=2023 numpy scipy pytest gxx_linux-64 sysroot_linux-64 -c pkgs/main -c conda-forge
+            conda install -y -q python=3.11 cmake make swig mkl=2023 mkl-devel=2023 numpy scipy pytest gxx_linux-64 sysroot_linux-64
      - when:
          condition:
            equal: [ "ON", << parameters.raft >> ]
@ -282,9 +330,6 @@ workflows:
      - build_conda:
          name: Linux x86_64 (conda)
          exec: linux-x86_64-cpu
-      - build_conda:
-          name: OSX x86_64 (conda)
-          exec: macosx-x86_64-cpu
      - build_conda:
          name: Windows x86_64 (conda)
          exec: windows-x86_64-cpu
@ -313,17 +358,21 @@ workflows:
            branches:
              ignore: /.*/
      - build_conda:
-          name: Windows x86_64 packages
-          exec: windows-x86_64-cpu
+          name: Linux x86_64 GPU w/ RAFT packages (CUDA 11.4)
+          exec: linux-x86_64-gpu
          label: main
+          raft: "ON"
+          cuda: "11.4"
+          cuda_archs: "60;61;70;72;75;80;86"
+          compiler_version: "11.2"
          filters:
            tags:
              only: /^v.*/
            branches:
              ignore: /.*/
      - build_conda:
-          name: OSX x86_64 packages
-          exec: macosx-x86_64-cpu
+          name: Windows x86_64 packages
+          exec: windows-x86_64-cpu
          label: main
          filters:
            tags:
@ -373,10 +422,6 @@ workflows:
          name: Windows x86_64 nightlies
          exec: windows-x86_64-cpu
          label: nightly
-      - build_conda:
-          name: OSX x86_64 nightlies
-          exec: macosx-x86_64-cpu
-          label: nightly
      - build_conda:
          name: OSX arm64 nightlies
          exec: macosx-arm64-cpu
--- a/conda/conda_build_config.yaml
+++ b/conda/conda_build_config.yaml
@ -1,4 +1,4 @@
 python:
-  - 3.8  # [not x86_64 or not osx]
  - 3.9
  - 3.10
+  - 3.11
--- a/conda/faiss-gpu-raft/build-lib.sh
+++ b/conda/faiss-gpu-raft/build-lib.sh
@ -0,0 +1,26 @@
+#!/bin/sh
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -e
+
+
+# Build libfaiss.so/libfaiss_avx2.so.
+cmake -B _build \
+      -DBUILD_SHARED_LIBS=ON \
+      -DBUILD_TESTING=OFF \
+      -DFAISS_OPT_LEVEL=avx2 \
+      -DFAISS_ENABLE_GPU=ON \
+      -DFAISS_ENABLE_RAFT=ON \
+      -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHS}" \
+      -DFAISS_ENABLE_PYTHON=OFF \
+      -DBLA_VENDOR=Intel10_64lp \
+      -DCMAKE_INSTALL_LIBDIR=lib \
+      -DCMAKE_BUILD_TYPE=Release .
+
+make -C _build -j$(nproc) faiss faiss_avx2
+
+cmake --install _build --prefix $PREFIX
+cmake --install _build --prefix _libfaiss_stage/
--- a/conda/faiss-gpu-raft/build-pkg.sh
+++ b/conda/faiss-gpu-raft/build-pkg.sh
@ -0,0 +1,24 @@
+#!/bin/sh
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -e
+
+
+# Build swigfaiss.so/swigfaiss_avx2.so.
+cmake -B _build_python_${PY_VER} \
+      -Dfaiss_ROOT=_libfaiss_stage/ \
+      -DFAISS_OPT_LEVEL=avx2 \
+      -DFAISS_ENABLE_GPU=ON \
+      -DFAISS_ENABLE_RAFT=ON \
+      -DCMAKE_BUILD_TYPE=Release \
+      -DPython_EXECUTABLE=$PYTHON \
+      faiss/python
+
+make -C _build_python_${PY_VER} -j$(nproc) swigfaiss swigfaiss_avx2
+
+# Build actual python module.
+cd _build_python_${PY_VER}/
+$PYTHON setup.py install --single-version-externally-managed --record=record.txt --prefix=$PREFIX
--- a/conda/faiss-gpu-raft/meta.yaml
+++ b/conda/faiss-gpu-raft/meta.yaml
@ -0,0 +1,104 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+{% set version = environ.get('GIT_DESCRIBE_TAG').lstrip('v') %}
+{% set suffix = "_nightly" if environ.get('PACKAGE_TYPE') == 'nightly' else "" %}
+{% set number = GIT_DESCRIBE_NUMBER %}
+
+package:
+  name: faiss-pkg
+  version: {{ version }}
+
+build:
+  number: {{ number }}
+
+about:
+  home: https://github.com/facebookresearch/faiss
+  license: MIT
+  license_family: MIT
+  license_file: LICENSE
+  summary: A library for efficient similarity search and clustering of dense vectors.
+
+source:
+  git_url: ../../
+
+outputs:
+  - name: libfaiss
+    script: build-lib.sh  # [x86_64 and not win and not osx]
+    script: build-lib-osx.sh  # [x86_64 and osx]
+    script: build-lib-arm64.sh  # [not x86_64]
+    script: build-lib.bat  # [win]
+    build:
+      string: "h{{ PKG_HASH }}_{{ number }}_cuda{{ cudatoolkit }}_raft{{ suffix }}"
+      run_exports:
+        - {{ pin_compatible('libfaiss', exact=True) }}
+      script_env:
+        - CUDA_ARCHS
+    requirements:
+      build:
+        - {{ compiler('cxx') }}
+        - sysroot_linux-64  # [linux64]
+        - llvm-openmp  # [osx]
+        - cmake >=3.23.1
+        - make  # [not win]
+        - mkl-devel =2023  # [x86_64]
+      host:
+        - mkl =2023  # [x86_64]
+        - openblas  # [not x86_64]
+        - cudatoolkit {{ cudatoolkit }}
+        - libraft =23.08
+      run:
+        - mkl =2023  # [x86_64]
+        - openblas  # [not x86_64]
+        - {{ pin_compatible('cudatoolkit', max_pin='x.x') }}
+        - libraft =23.08
+    test:
+      requires:
+        - conda-build
+      commands:
+        - test -f $PREFIX/lib/libfaiss$SHLIB_EXT       # [not win]
+        - test -f $PREFIX/lib/libfaiss_avx2$SHLIB_EXT  # [x86_64 and not win]
+        - conda inspect linkages -p $PREFIX $PKG_NAME  # [not win]
+        - conda inspect objects -p $PREFIX $PKG_NAME   # [osx]
+
+  - name: faiss-gpu-raft
+    script: build-pkg.sh  # [x86_64 and not win and not osx]
+    script: build-pkg-osx.sh  # [x86_64 and osx]
+    script: build-pkg-arm64.sh # [not x86_64]
+    script: build-pkg.bat  # [win]
+    build:
+      string: "py{{ PY_VER }}_h{{ PKG_HASH }}_{{ number }}_cuda{{ cudatoolkit }}{{ suffix }}"
+    requirements:
+      build:
+        - {{ compiler('cxx') }}
+        - sysroot_linux-64 =2.17 # [linux64]
+        - swig
+        - cmake >=3.23.1
+        - make  # [not win]
+      host:
+        - python {{ python }}
+        - numpy >=1.19,<2
+        - {{ pin_subpackage('libfaiss', exact=True) }}
+      run:
+        - python {{ python }}
+        - numpy >=1.19,<2
+        - {{ pin_subpackage('libfaiss', exact=True) }}
+    test:
+      requires:
+        - numpy
+        - scipy
+        - pytorch
+      commands:
+        - python -X faulthandler -m unittest discover -v -s tests/ -p "test_*"
+        - python -X faulthandler -m unittest discover -v -s tests/ -p "torch_*"
+        - cp tests/common_faiss_tests.py faiss/gpu/test
+        - python -X faulthandler -m unittest discover -v -s faiss/gpu/test/ -p "test_*"
+        - python -X faulthandler -m unittest discover -v -s faiss/gpu/test/ -p "torch_*"
+        - sh test_cpu_dispatch.sh  # [linux64]
+      files:
+        - test_cpu_dispatch.sh  # [linux64]
+      source_files:
+        - tests/
+        - faiss/gpu/test/
--- a/conda/faiss-gpu/build-lib.sh
+++ b/conda/faiss-gpu/build-lib.sh
@ -13,6 +13,7 @@ cmake -B _build \
      -DBUILD_TESTING=OFF \
      -DFAISS_OPT_LEVEL=avx2 \
      -DFAISS_ENABLE_GPU=ON \
+      -DFAISS_ENABLE_RAFT=OFF \
      -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHS}" \
      -DFAISS_ENABLE_PYTHON=OFF \
      -DBLA_VENDOR=Intel10_64lp \
--- a/conda/faiss-gpu/build-pkg.sh
+++ b/conda/faiss-gpu/build-pkg.sh
@ -12,6 +12,7 @@ cmake -B _build_python_${PY_VER} \
      -Dfaiss_ROOT=_libfaiss_stage/ \
      -DFAISS_OPT_LEVEL=avx2 \
      -DFAISS_ENABLE_GPU=ON \
+      -DFAISS_ENABLE_RAFT=OFF \
      -DCMAKE_BUILD_TYPE=Release \
      -DPython_EXECUTABLE=$PYTHON \
      faiss/python
--- a/conda/faiss-gpu/install-cmake.sh
+++ b/conda/faiss-gpu/install-cmake.sh
@ -6,5 +6,5 @@

 set -e

-wget -O - https://github.com/Kitware/CMake/releases/download/v3.17.1/cmake-3.17.1-Linux-x86_64.tar.gz | tar xzf -
-cp -R cmake-3.17.1-Linux-x86_64/* $PREFIX
+FAISS_DISABLE_CPU_FEATURES=AVX2 LD_DEBUG=libs python -c "import faiss" 2>&1 | grep libfaiss.so
+LD_DEBUG=libs python -c "import faiss" 2>&1 | grep libfaiss_avx2.so
--- a/conda/faiss-gpu/meta.yaml
+++ b/conda/faiss-gpu/meta.yaml
@ -26,7 +26,8 @@ source:

 outputs:
  - name: libfaiss
-    script: build-lib.sh  # [x86_64 and not win]
+    script: build-lib.sh  # [x86_64 and not win and not osx]
+    script: build-lib-osx.sh  # [x86_64 and osx]
    script: build-lib-arm64.sh  # [not x86_64]
    script: build-lib.bat  # [win]
    build:
@ -38,7 +39,7 @@ outputs:
    requirements:
      build:
        - {{ compiler('cxx') }}
-        - sysroot_linux-64 =2.17  # [linux64]
+        - sysroot_linux-64  # [linux64]
        - llvm-openmp  # [osx]
        - cmake >=3.23.1
        - make  # [not win]
@ -61,7 +62,8 @@ outputs:
        - conda inspect objects -p $PREFIX $PKG_NAME   # [osx]

  - name: faiss-gpu
-    script: build-pkg.sh  # [x86_64 and not win]
+    script: build-pkg.sh  # [x86_64 and not win and not osx]
+    script: build-pkg-osx.sh  # [x86_64 and osx]
    script: build-pkg-arm64.sh # [not x86_64]
    script: build-pkg.bat  # [win]
    build:
--- a/conda/faiss/install-cmake.sh
+++ b/conda/faiss/install-cmake.sh
@ -1,10 +0,0 @@
-#!/bin/sh#
-# Copyright (c) Facebook, Inc. and its affiliates.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-set -e
-
-wget -O - https://github.com/Kitware/CMake/releases/download/v3.17.1/cmake-3.17.1-Linux-x86_64.tar.gz | tar xzf -
-cp -R cmake-3.17.1-Linux-x86_64/* $PREFIX
--- a/conda/faiss/meta.yaml
+++ b/conda/faiss/meta.yaml
@ -37,7 +37,7 @@ outputs:
    requirements:
      build:
        - {{ compiler('cxx') }}
-        - sysroot_linux-64 =2.17  # [linux64]
+        - sysroot_linux-64  # [linux64]
        - llvm-openmp  # [osx]
        - cmake >=3.23.1
        - make  # [not win]
--- a/faiss/gpu/test/test_gpu_basics.py
+++ b/faiss/gpu/test/test_gpu_basics.py
@ -274,16 +274,16 @@ class TestKnn(unittest.TestCase):
        else:
            faiss.bfKnn(res, params)

-        self.assertTrue(np.allclose(ref_d, out_d, atol=1e-5))
-        self.assertGreaterEqual((out_i == ref_i).sum(), ref_i.size)
+        np.testing.assert_allclose(ref_d, out_d, atol=1e-5)
+        np.testing.assert_array_equal(out_i, ref_i)

-        out_d, out_i = faiss.knn_gpu(
-            res, qs, xs, k, device=gpu_id,
+        faiss.knn_gpu(
+            res, qs, xs, k, out_d, out_i, device=gpu_id,
            vectorsMemoryLimit=vectorsMemoryLimit,
            queriesMemoryLimit=queriesMemoryLimit)

-        self.assertTrue(np.allclose(ref_d, out_d, atol=1e-5))
-        self.assertGreaterEqual((out_i == ref_i).sum(), ref_i.size)
+        np.testing.assert_allclose(ref_d, out_d, atol=1e-5)
+        np.testing.assert_array_equal(out_i, ref_i)

        # Try int32 out indices
        out_i32 = np.empty((nq, k), dtype=np.int32)
@ -292,7 +292,8 @@ class TestKnn(unittest.TestCase):

        faiss.bfKnn(res, params)

-        self.assertEqual((out_i32 == ref_i).sum(), ref_i.size)
+        np.testing.assert_allclose(ref_d, out_d, atol=1e-5)
+        np.testing.assert_array_equal(out_i32, ref_i)

        # Try float16 data/queries, i64 out indices
        xs_f16 = xs.astype(np.float16)
@ -320,7 +321,7 @@ class TestKnn(unittest.TestCase):
        faiss.bfKnn(res, params)

        self.assertGreaterEqual((out_i_f16 == ref_i_f16).sum(), ref_i_f16.size - 5)
-        self.assertTrue(np.allclose(ref_d_f16, out_d_f16, atol = 2e-3))
+        np.testing.assert_allclose(ref_d_f16, out_d_f16, atol = 2e-3)

 class TestAllPairwiseDistance(unittest.TestCase):
    def test_dist(self):
@ -381,7 +382,7 @@ class TestAllPairwiseDistance(unittest.TestCase):

            print('f32', np.abs(ref_d - out_d).max())

-            self.assertTrue(np.allclose(ref_d, out_d, atol=1e-5))
+            np.testing.assert_allclose(ref_d, out_d, atol=1e-5)

            # Try float16 data/queries
            xs_f16 = xs.astype(np.float16)
@ -414,7 +415,7 @@ class TestAllPairwiseDistance(unittest.TestCase):

            print('f16', np.abs(ref_d_f16 - out_d_f16).max())

-            self.assertTrue(np.allclose(ref_d_f16, out_d_f16, atol = 4e-3))
+            np.testing.assert_allclose(ref_d_f16, out_d_f16, atol = 4e-3)



--- a/faiss/gpu/test/test_raft.py
+++ b/faiss/gpu/test/test_raft.py
@ -58,11 +58,11 @@ class TestBfKnn(unittest.TestCase):

        index_gpu.add(xb[2000:])
        Dnew, Inew = index_gpu.search(ds.get_queries(), 13)
-        np.testing.assert_allclose(Dref, Dnew, atol=1e-5)
+        np.testing.assert_allclose(Dref, Dnew, atol=1e-4)
        np.testing.assert_array_equal(Iref, Inew)

        # copy back to CPU
        index2 = faiss.index_gpu_to_cpu(index_gpu)
        Dnew, Inew = index2.search(ds.get_queries(), 13)
-        np.testing.assert_allclose(Dref, Dnew, atol=1e-5)
+        np.testing.assert_allclose(Dref, Dnew, atol=1e-4)
        np.testing.assert_array_equal(Iref, Inew)
--- a/faiss/python/CMakeLists.txt
+++ b/faiss/python/CMakeLists.txt
@ -94,6 +94,9 @@ endif()

 if(FAISS_ENABLE_GPU)
  find_package(CUDAToolkit REQUIRED)
+  if(FAISS_ENABLE_RAFT)
+    find_package(raft COMPONENTS compiled distributed)
+  endif()
  target_link_libraries(swigfaiss PRIVATE CUDA::cudart $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
  target_link_libraries(swigfaiss_avx2 PRIVATE CUDA::cudart $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
 endif()
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -29,6 +29,7 @@ set(FAISS_TEST_SRC
  test_heap.cpp
  test_code_distance.cpp
  test_hnsw.cpp
+  test_partitioning.cpp
 )

 add_executable(faiss_test ${FAISS_TEST_SRC})
--- a/tests/test_contrib.py
+++ b/tests/test_contrib.py
@ -217,7 +217,7 @@ class TestInspect(unittest.TestCase):
        Yref = X @ A.T + b
        lt = inspect_tools.make_LinearTransform_matrix(A, b)
        Ynew = lt.apply(X)
-        np.testing.assert_equal(Yref, Ynew)
+        np.testing.assert_allclose(Yref, Ynew, rtol=1e-06)

    def test_NSG_neighbors(self):
        # FIXME number of elements to add should be >> 100
--- a/tests/test_local_search_quantizer.py
+++ b/tests/test_local_search_quantizer.py
@ -582,7 +582,7 @@ class TestProductLocalSearchQuantizer(unittest.TestCase):
        lut_ref = lut_ref.reshape(nq, codebook_size)

        # max rtoal in OSX: 2.87e-6
-        np.testing.assert_allclose(lut, lut_ref, rtol=5e-06)
+        np.testing.assert_allclose(lut, lut_ref, rtol=1e-04)


 class TestIndexProductLocalSearchQuantizer(unittest.TestCase):
--- a/tests/test_partitioning.cpp
+++ b/tests/test_partitioning.cpp
@ -0,0 +1,33 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <faiss/utils/AlignedTable.h>
+#include <faiss/utils/partitioning.h>
+
+using namespace faiss;
+
+typedef AlignedTable<uint16_t> AlignedTableUint16;
+
+// TODO: This test fails when Faiss is compiled with
+// GCC 13.2 from conda-forge with AVX2 enabled. This may be
+// a GCC bug that needs to be investigated further.
+// As of 16-AUG-2023 the Faiss conda packages are built
+// with GCC 11.2, so the published binaries are not affected.
+TEST(TestPartitioning, TestPartitioningBigRange) {
+    auto n = 1024;
+    AlignedTableUint16 tab(n);
+    for (auto i = 0; i < n; i++) {
+        tab[i] = i * 64;
+    }
+    int32_t hist[16]{};
+    simd_histogram_16(tab.get(), n, 0, 12, hist);
+    for (auto i = 0; i < 16; i++) {
+        ASSERT_EQ(hist[i], 64);
+    }
+}