faiss-gpu-raft package (#2992)

Summary: Pull Request resolved: https://github.com/facebookresearch/faiss/pull/2992

Reviewed By: mdouze

Differential Revision: D48391366

Pulled By: algoriddle

fbshipit-source-id: 94b7f62afc8a09a9feaea47bf60e5358d89fcde5
pull/3007/head
Gergely Szilvasy 2023-08-16 09:30:41 -07:00 committed by Facebook GitHub Bot
parent c09992bc8a
commit 2768fb38b2
18 changed files with 283 additions and 52 deletions

View File

@ -17,11 +17,6 @@ executors:
machine:
image: ubuntu-2004:current
resource_class: arm.medium
macosx-x86_64-cpu:
environment:
CONDA_ARCH: MacOSX-x86_64
macos:
xcode: 11.7.0 # max supported for conda build, https://circleci.com/docs/using-macos#supported-xcode-versions
macosx-arm64-cpu:
environment:
CONDA_ARCH: MacOSX-arm64
@ -66,6 +61,9 @@ jobs:
cuda:
type: string
default: ""
raft:
type: string
default: ""
cuda_archs:
type: string
default: ""
@ -93,6 +91,8 @@ jobs:
- run:
name: Install conda build tools
command: |
conda config --set solver libmamba
# conda config --set verbosity 3
conda update -y -q conda
conda install -y -q conda-build
- when:
@ -105,14 +105,16 @@ jobs:
conda config --set anaconda_upload yes
- when:
condition:
not: << parameters.label >>
and:
- not: << parameters.label >>
- not: << parameters.cuda >>
steps:
- run:
name: Conda build (CPU)
no_output_timeout: 30m
command: |
cd conda
conda build faiss --python 3.10 -c pytorch -c pkgs/main -c conda-forge
conda build faiss --python 3.11 -c pytorch
- when:
condition:
and:
@ -124,12 +126,28 @@ jobs:
no_output_timeout: 30m
command: |
cd conda
conda build faiss --user pytorch --label <<parameters.label>> -c pytorch -c pkgs/main -c conda-forge
conda build faiss --user pytorch --label <<parameters.label>> -c pytorch
- when:
condition:
and:
- not: << parameters.label >>
- << parameters.cuda >>
- not: << parameters.raft >>
steps:
- run:
name: Conda build (GPU)
no_output_timeout: 60m
command: |
sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
cd conda
conda build faiss-gpu --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
-c pytorch -c nvidia
- when:
condition:
and:
- << parameters.label >>
- << parameters.cuda >>
- not: << parameters.raft >>
steps:
- run:
name: Conda build (GPU) w/ anaconda upload
@ -138,7 +156,37 @@ jobs:
sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
cd conda
conda build faiss-gpu --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
--user pytorch --label <<parameters.label>> -c pytorch -c nvidia -c pkgs/main -c conda-forge
--user pytorch --label <<parameters.label>> -c pytorch -c nvidia
- when:
condition:
and:
- not: << parameters.label >>
- << parameters.cuda >>
- << parameters.raft >>
steps:
- run:
name: Conda build (GPU w/ RAFT)
no_output_timeout: 60m
command: |
sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
cd conda
conda build faiss-gpu-raft --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
-c pytorch -c nvidia -c rapidsai -c conda-forge
- when:
condition:
and:
- << parameters.label >>
- << parameters.cuda >>
- << parameters.raft >>
steps:
- run:
name: Conda build (GPU w/ RAFT) w/ anaconda upload
no_output_timeout: 60m
command: |
sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
cd conda
conda build faiss-gpu-raft --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
--user pytorch --label <<parameters.label>> -c pytorch -c nvidia -c rapidsai -c conda-forge
build_cmake:
parameters:
@ -180,7 +228,7 @@ jobs:
command: |
conda config --set solver libmamba
conda update -y -q conda
conda install -y -q pkgs/main::python=3.10 cmake make swig mkl=2023 mkl-devel=2023 numpy scipy pytest gxx_linux-64 sysroot_linux-64 -c pkgs/main -c conda-forge
conda install -y -q python=3.11 cmake make swig mkl=2023 mkl-devel=2023 numpy scipy pytest gxx_linux-64 sysroot_linux-64
- when:
condition:
equal: [ "ON", << parameters.raft >> ]
@ -282,9 +330,6 @@ workflows:
- build_conda:
name: Linux x86_64 (conda)
exec: linux-x86_64-cpu
- build_conda:
name: OSX x86_64 (conda)
exec: macosx-x86_64-cpu
- build_conda:
name: Windows x86_64 (conda)
exec: windows-x86_64-cpu
@ -313,17 +358,21 @@ workflows:
branches:
ignore: /.*/
- build_conda:
name: Windows x86_64 packages
exec: windows-x86_64-cpu
name: Linux x86_64 GPU w/ RAFT packages (CUDA 11.4)
exec: linux-x86_64-gpu
label: main
raft: "ON"
cuda: "11.4"
cuda_archs: "60;61;70;72;75;80;86"
compiler_version: "11.2"
filters:
tags:
only: /^v.*/
branches:
ignore: /.*/
- build_conda:
name: OSX x86_64 packages
exec: macosx-x86_64-cpu
name: Windows x86_64 packages
exec: windows-x86_64-cpu
label: main
filters:
tags:
@ -373,10 +422,6 @@ workflows:
name: Windows x86_64 nightlies
exec: windows-x86_64-cpu
label: nightly
- build_conda:
name: OSX x86_64 nightlies
exec: macosx-x86_64-cpu
label: nightly
- build_conda:
name: OSX arm64 nightlies
exec: macosx-arm64-cpu

View File

@ -1,4 +1,4 @@
python:
- 3.8 # [not x86_64 or not osx]
- 3.9
- 3.10
- 3.11

View File

@ -0,0 +1,26 @@
#!/bin/sh
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
set -e
# Build libfaiss.so/libfaiss_avx2.so.
cmake -B _build \
-DBUILD_SHARED_LIBS=ON \
-DBUILD_TESTING=OFF \
-DFAISS_OPT_LEVEL=avx2 \
-DFAISS_ENABLE_GPU=ON \
-DFAISS_ENABLE_RAFT=ON \
-DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHS}" \
-DFAISS_ENABLE_PYTHON=OFF \
-DBLA_VENDOR=Intel10_64lp \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_BUILD_TYPE=Release .
make -C _build -j$(nproc) faiss faiss_avx2
cmake --install _build --prefix $PREFIX
cmake --install _build --prefix _libfaiss_stage/

View File

@ -0,0 +1,24 @@
#!/bin/sh
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
set -e
# Build swigfaiss.so/swigfaiss_avx2.so.
cmake -B _build_python_${PY_VER} \
-Dfaiss_ROOT=_libfaiss_stage/ \
-DFAISS_OPT_LEVEL=avx2 \
-DFAISS_ENABLE_GPU=ON \
-DFAISS_ENABLE_RAFT=ON \
-DCMAKE_BUILD_TYPE=Release \
-DPython_EXECUTABLE=$PYTHON \
faiss/python
make -C _build_python_${PY_VER} -j$(nproc) swigfaiss swigfaiss_avx2
# Build actual python module.
cd _build_python_${PY_VER}/
$PYTHON setup.py install --single-version-externally-managed --record=record.txt --prefix=$PREFIX

View File

@ -0,0 +1,104 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
{% set version = environ.get('GIT_DESCRIBE_TAG').lstrip('v') %}
{% set suffix = "_nightly" if environ.get('PACKAGE_TYPE') == 'nightly' else "" %}
{% set number = GIT_DESCRIBE_NUMBER %}
package:
name: faiss-pkg
version: {{ version }}
build:
number: {{ number }}
about:
home: https://github.com/facebookresearch/faiss
license: MIT
license_family: MIT
license_file: LICENSE
summary: A library for efficient similarity search and clustering of dense vectors.
source:
git_url: ../../
outputs:
- name: libfaiss
script: build-lib.sh # [x86_64 and not win and not osx]
script: build-lib-osx.sh # [x86_64 and osx]
script: build-lib-arm64.sh # [not x86_64]
script: build-lib.bat # [win]
build:
string: "h{{ PKG_HASH }}_{{ number }}_cuda{{ cudatoolkit }}_raft{{ suffix }}"
run_exports:
- {{ pin_compatible('libfaiss', exact=True) }}
script_env:
- CUDA_ARCHS
requirements:
build:
- {{ compiler('cxx') }}
- sysroot_linux-64 # [linux64]
- llvm-openmp # [osx]
- cmake >=3.23.1
- make # [not win]
- mkl-devel =2023 # [x86_64]
host:
- mkl =2023 # [x86_64]
- openblas # [not x86_64]
- cudatoolkit {{ cudatoolkit }}
- libraft =23.08
run:
- mkl =2023 # [x86_64]
- openblas # [not x86_64]
- {{ pin_compatible('cudatoolkit', max_pin='x.x') }}
- libraft =23.08
test:
requires:
- conda-build
commands:
- test -f $PREFIX/lib/libfaiss$SHLIB_EXT # [not win]
- test -f $PREFIX/lib/libfaiss_avx2$SHLIB_EXT # [x86_64 and not win]
- conda inspect linkages -p $PREFIX $PKG_NAME # [not win]
- conda inspect objects -p $PREFIX $PKG_NAME # [osx]
- name: faiss-gpu-raft
script: build-pkg.sh # [x86_64 and not win and not osx]
script: build-pkg-osx.sh # [x86_64 and osx]
script: build-pkg-arm64.sh # [not x86_64]
script: build-pkg.bat # [win]
build:
string: "py{{ PY_VER }}_h{{ PKG_HASH }}_{{ number }}_cuda{{ cudatoolkit }}{{ suffix }}"
requirements:
build:
- {{ compiler('cxx') }}
- sysroot_linux-64 =2.17 # [linux64]
- swig
- cmake >=3.23.1
- make # [not win]
host:
- python {{ python }}
- numpy >=1.19,<2
- {{ pin_subpackage('libfaiss', exact=True) }}
run:
- python {{ python }}
- numpy >=1.19,<2
- {{ pin_subpackage('libfaiss', exact=True) }}
test:
requires:
- numpy
- scipy
- pytorch
commands:
- python -X faulthandler -m unittest discover -v -s tests/ -p "test_*"
- python -X faulthandler -m unittest discover -v -s tests/ -p "torch_*"
- cp tests/common_faiss_tests.py faiss/gpu/test
- python -X faulthandler -m unittest discover -v -s faiss/gpu/test/ -p "test_*"
- python -X faulthandler -m unittest discover -v -s faiss/gpu/test/ -p "torch_*"
- sh test_cpu_dispatch.sh # [linux64]
files:
- test_cpu_dispatch.sh # [linux64]
source_files:
- tests/
- faiss/gpu/test/

View File

@ -13,6 +13,7 @@ cmake -B _build \
-DBUILD_TESTING=OFF \
-DFAISS_OPT_LEVEL=avx2 \
-DFAISS_ENABLE_GPU=ON \
-DFAISS_ENABLE_RAFT=OFF \
-DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHS}" \
-DFAISS_ENABLE_PYTHON=OFF \
-DBLA_VENDOR=Intel10_64lp \

View File

@ -12,6 +12,7 @@ cmake -B _build_python_${PY_VER} \
-Dfaiss_ROOT=_libfaiss_stage/ \
-DFAISS_OPT_LEVEL=avx2 \
-DFAISS_ENABLE_GPU=ON \
-DFAISS_ENABLE_RAFT=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DPython_EXECUTABLE=$PYTHON \
faiss/python

View File

@ -6,5 +6,5 @@
set -e
wget -O - https://github.com/Kitware/CMake/releases/download/v3.17.1/cmake-3.17.1-Linux-x86_64.tar.gz | tar xzf -
cp -R cmake-3.17.1-Linux-x86_64/* $PREFIX
FAISS_DISABLE_CPU_FEATURES=AVX2 LD_DEBUG=libs python -c "import faiss" 2>&1 | grep libfaiss.so
LD_DEBUG=libs python -c "import faiss" 2>&1 | grep libfaiss_avx2.so

View File

@ -26,7 +26,8 @@ source:
outputs:
- name: libfaiss
script: build-lib.sh # [x86_64 and not win]
script: build-lib.sh # [x86_64 and not win and not osx]
script: build-lib-osx.sh # [x86_64 and osx]
script: build-lib-arm64.sh # [not x86_64]
script: build-lib.bat # [win]
build:
@ -38,7 +39,7 @@ outputs:
requirements:
build:
- {{ compiler('cxx') }}
- sysroot_linux-64 =2.17 # [linux64]
- sysroot_linux-64 # [linux64]
- llvm-openmp # [osx]
- cmake >=3.23.1
- make # [not win]
@ -61,7 +62,8 @@ outputs:
- conda inspect objects -p $PREFIX $PKG_NAME # [osx]
- name: faiss-gpu
script: build-pkg.sh # [x86_64 and not win]
script: build-pkg.sh # [x86_64 and not win and not osx]
script: build-pkg-osx.sh # [x86_64 and osx]
script: build-pkg-arm64.sh # [not x86_64]
script: build-pkg.bat # [win]
build:

View File

@ -1,10 +0,0 @@
#!/bin/sh#
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
set -e
wget -O - https://github.com/Kitware/CMake/releases/download/v3.17.1/cmake-3.17.1-Linux-x86_64.tar.gz | tar xzf -
cp -R cmake-3.17.1-Linux-x86_64/* $PREFIX

View File

@ -37,7 +37,7 @@ outputs:
requirements:
build:
- {{ compiler('cxx') }}
- sysroot_linux-64 =2.17 # [linux64]
- sysroot_linux-64 # [linux64]
- llvm-openmp # [osx]
- cmake >=3.23.1
- make # [not win]

View File

@ -274,16 +274,16 @@ class TestKnn(unittest.TestCase):
else:
faiss.bfKnn(res, params)
self.assertTrue(np.allclose(ref_d, out_d, atol=1e-5))
self.assertGreaterEqual((out_i == ref_i).sum(), ref_i.size)
np.testing.assert_allclose(ref_d, out_d, atol=1e-5)
np.testing.assert_array_equal(out_i, ref_i)
out_d, out_i = faiss.knn_gpu(
res, qs, xs, k, device=gpu_id,
faiss.knn_gpu(
res, qs, xs, k, out_d, out_i, device=gpu_id,
vectorsMemoryLimit=vectorsMemoryLimit,
queriesMemoryLimit=queriesMemoryLimit)
self.assertTrue(np.allclose(ref_d, out_d, atol=1e-5))
self.assertGreaterEqual((out_i == ref_i).sum(), ref_i.size)
np.testing.assert_allclose(ref_d, out_d, atol=1e-5)
np.testing.assert_array_equal(out_i, ref_i)
# Try int32 out indices
out_i32 = np.empty((nq, k), dtype=np.int32)
@ -292,7 +292,8 @@ class TestKnn(unittest.TestCase):
faiss.bfKnn(res, params)
self.assertEqual((out_i32 == ref_i).sum(), ref_i.size)
np.testing.assert_allclose(ref_d, out_d, atol=1e-5)
np.testing.assert_array_equal(out_i32, ref_i)
# Try float16 data/queries, i64 out indices
xs_f16 = xs.astype(np.float16)
@ -320,7 +321,7 @@ class TestKnn(unittest.TestCase):
faiss.bfKnn(res, params)
self.assertGreaterEqual((out_i_f16 == ref_i_f16).sum(), ref_i_f16.size - 5)
self.assertTrue(np.allclose(ref_d_f16, out_d_f16, atol = 2e-3))
np.testing.assert_allclose(ref_d_f16, out_d_f16, atol = 2e-3)
class TestAllPairwiseDistance(unittest.TestCase):
def test_dist(self):
@ -381,7 +382,7 @@ class TestAllPairwiseDistance(unittest.TestCase):
print('f32', np.abs(ref_d - out_d).max())
self.assertTrue(np.allclose(ref_d, out_d, atol=1e-5))
np.testing.assert_allclose(ref_d, out_d, atol=1e-5)
# Try float16 data/queries
xs_f16 = xs.astype(np.float16)
@ -414,7 +415,7 @@ class TestAllPairwiseDistance(unittest.TestCase):
print('f16', np.abs(ref_d_f16 - out_d_f16).max())
self.assertTrue(np.allclose(ref_d_f16, out_d_f16, atol = 4e-3))
np.testing.assert_allclose(ref_d_f16, out_d_f16, atol = 4e-3)

View File

@ -58,11 +58,11 @@ class TestBfKnn(unittest.TestCase):
index_gpu.add(xb[2000:])
Dnew, Inew = index_gpu.search(ds.get_queries(), 13)
np.testing.assert_allclose(Dref, Dnew, atol=1e-5)
np.testing.assert_allclose(Dref, Dnew, atol=1e-4)
np.testing.assert_array_equal(Iref, Inew)
# copy back to CPU
index2 = faiss.index_gpu_to_cpu(index_gpu)
Dnew, Inew = index2.search(ds.get_queries(), 13)
np.testing.assert_allclose(Dref, Dnew, atol=1e-5)
np.testing.assert_allclose(Dref, Dnew, atol=1e-4)
np.testing.assert_array_equal(Iref, Inew)

View File

@ -94,6 +94,9 @@ endif()
if(FAISS_ENABLE_GPU)
find_package(CUDAToolkit REQUIRED)
if(FAISS_ENABLE_RAFT)
find_package(raft COMPONENTS compiled distributed)
endif()
target_link_libraries(swigfaiss PRIVATE CUDA::cudart $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
target_link_libraries(swigfaiss_avx2 PRIVATE CUDA::cudart $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
endif()

View File

@ -29,6 +29,7 @@ set(FAISS_TEST_SRC
test_heap.cpp
test_code_distance.cpp
test_hnsw.cpp
test_partitioning.cpp
)
add_executable(faiss_test ${FAISS_TEST_SRC})

View File

@ -217,7 +217,7 @@ class TestInspect(unittest.TestCase):
Yref = X @ A.T + b
lt = inspect_tools.make_LinearTransform_matrix(A, b)
Ynew = lt.apply(X)
np.testing.assert_equal(Yref, Ynew)
np.testing.assert_allclose(Yref, Ynew, rtol=1e-06)
def test_NSG_neighbors(self):
# FIXME number of elements to add should be >> 100

View File

@ -582,7 +582,7 @@ class TestProductLocalSearchQuantizer(unittest.TestCase):
lut_ref = lut_ref.reshape(nq, codebook_size)
# max rtoal in OSX: 2.87e-6
np.testing.assert_allclose(lut, lut_ref, rtol=5e-06)
np.testing.assert_allclose(lut, lut_ref, rtol=1e-04)
class TestIndexProductLocalSearchQuantizer(unittest.TestCase):

View File

@ -0,0 +1,33 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <gtest/gtest.h>
#include <faiss/utils/AlignedTable.h>
#include <faiss/utils/partitioning.h>
using namespace faiss;
typedef AlignedTable<uint16_t> AlignedTableUint16;
// TODO: This test fails when Faiss is compiled with
// GCC 13.2 from conda-forge with AVX2 enabled. This may be
// a GCC bug that needs to be investigated further.
// As of 16-AUG-2023 the Faiss conda packages are built
// with GCC 11.2, so the published binaries are not affected.
TEST(TestPartitioning, TestPartitioningBigRange) {
auto n = 1024;
AlignedTableUint16 tab(n);
for (auto i = 0; i < n; i++) {
tab[i] = i * 64;
}
int32_t hist[16]{};
simd_histogram_16(tab.get(), n, 0, 12, hist);
for (auto i = 0; i < 16; i++) {
ASSERT_EQ(hist[i], 64);
}
}