GPU builds without Docker (#2803)

Summary:
1. GPU builds use CircleCI base image, no docker
2. Switched to CUDA 11.4 (used to be 11.3)
3. Merged all build jobs into two parameterized targets: `build_cmake` and `build_conda`.
4. Cleaned up test execution, fixed bug of Python GPU tests not running on PRs

Pull Request resolved: https://github.com/facebookresearch/faiss/pull/2803

Reviewed By: mlomeli1

Differential Revision: D44541714

Pulled By: algoriddle

fbshipit-source-id: aa09ae638ecb6ef556d42f27a4bfaddad7355e50
This commit is contained in:
Gergely Szilvasy 2023-03-30 10:45:54 -07:00 committed by Facebook GitHub Bot
parent 2686183ee5
commit 7bf645e344
6 changed files with 167 additions and 308 deletions

View File

@ -1,28 +0,0 @@
FROM nvidia/cuda:10.2-devel-ubuntu18.04
# Install python3, wget, and openblas.
RUN apt-get update && \
apt-get install -y python3-dev python3-pip libopenblas-dev wget libpcre3-dev
# Install swig 4.0.2.
RUN wget -nv -O - https://sourceforge.net/projects/swig/files/swig/swig-4.0.2/swig-4.0.2.tar.gz/download | tar zxf - && cd swig-4.0.2 && ./configure && make -j && make install
# Install recent CMake.
RUN wget -nv -O - https://github.com/Kitware/CMake/releases/download/v3.17.1/cmake-3.17.1-Linux-x86_64.tar.gz | tar xzf - --strip-components=1 -C /usr
# Install numpy/scipy/pytorch for python tests.
RUN pip3 install numpy scipy torch
COPY . /faiss
WORKDIR /faiss
RUN cmake -B build \
-DFAISS_ENABLE_GPU=ON \
-DFAISS_ENABLE_C_API=ON \
-DFAISS_ENABLE_PYTHON=ON \
-DBUILD_TESTING=ON \
-DCMAKE_CUDA_FLAGS="-gencode arch=compute_75,code=sm_75" \
.
RUN make -C build -j8

View File

@ -5,6 +5,12 @@ executors:
docker: docker:
- image: continuumio/miniconda3 - image: continuumio/miniconda3
resource_class: medium+ resource_class: medium+
linux-x86_64-gpu:
environment:
CONDA_ARCH: Linux-x86_64
machine:
image: linux-cuda-11:2023.02.1
resource_class: gpu.nvidia.medium
linux-arm64-cpu: linux-arm64-cpu:
environment: environment:
CONDA_ARCH: Linux-aarch64 CONDA_ARCH: Linux-aarch64
@ -25,8 +31,8 @@ executors:
windows-x86_64-cpu: windows-x86_64-cpu:
machine: machine:
image: windows-server-2019-vs2019:stable image: windows-server-2019-vs2019:stable
resource_class: windows.medium
shell: bash.exe shell: bash.exe
resource_class: windows.medium
jobs: jobs:
format: format:
@ -54,11 +60,25 @@ jobs:
build_conda: build_conda:
parameters: parameters:
label:
type: string
default: ""
cuda:
type: string
default: ""
cuda_archs:
type: string
default: ""
compiler_version:
type: string
default: ""
exec: exec:
type: executor type: executor
executor: << parameters.exec >> executor: << parameters.exec >>
environment: environment:
OMP_NUM_THREADS: 10 OMP_NUM_THREADS: 10
PACKAGE_TYPE: <<parameters.label>>
CUDA_ARCHS: <<parameters.cuda_archs>>
steps: steps:
- checkout - checkout
- run: - run:
@ -73,118 +93,52 @@ jobs:
- run: - run:
name: Install conda build tools name: Install conda build tools
command: | command: |
conda update -y conda conda update -y -q conda
conda install -y -q conda-build conda install -y -q conda-build
- run: - when:
name: Build/test condition: << parameters.label >>
no_output_timeout: 30m steps:
command: | - run:
cd conda name: Enable anaconda uploads
conda build faiss --python 3.10 -c pytorch command: |
conda install -y -q anaconda-client
deploy_conda: conda config --set anaconda_upload yes
parameters: - when:
label: condition:
type: string not: << parameters.label >>
default: main steps:
exec: - run:
type: executor name: Conda build (CPU)
executor: << parameters.exec >> no_output_timeout: 30m
steps: command: |
- checkout cd conda
- run: conda build faiss --python 3.10 -c pytorch
name: Install conda - when:
command: | condition:
if [ -n "${CONDA_ARCH}" ] and:
then - << parameters.label >>
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-${CONDA_ARCH}.sh --output miniconda.sh - not: << parameters.cuda >>
bash miniconda.sh -b -p $HOME/miniconda steps:
~/miniconda/bin/conda init - run:
fi name: Conda build (CPU) w/ anaconda upload
- run: no_output_timeout: 30m
name: Install conda build tools command: |
command: | cd conda
conda update -y conda conda build faiss --user pytorch --label <<parameters.label>> -c pytorch
conda install -y -q conda-build anaconda-client - when:
conda config --set anaconda_upload yes condition:
- run: and:
name: Build/test/upload - << parameters.label >>
no_output_timeout: 30m - << parameters.cuda >>
environment: steps:
PACKAGE_TYPE: <<parameters.label>> - run:
command: | name: Conda build (GPU) w/ anaconda upload
cd conda no_output_timeout: 60m
conda build faiss --user pytorch --label <<parameters.label>> -c pytorch command: |
sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
deploy_linux_gpu: cd conda
parameters: conda build faiss-gpu --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
label: --user pytorch --label <<parameters.label>> -c pytorch -c nvidia
type: string
default: main
cuda:
type: string
cuda_archs:
type: string
compiler_version:
type: string
machine:
resource_class: gpu.nvidia.medium
image: ubuntu-2004-cuda-11.4:202110-01
docker_layer_caching: true
steps:
- checkout
- run:
name: Build packages
command: |
docker build -t faiss -f conda/Dockerfile.cuda<<parameters.cuda>> .
docker run --gpus all \
-e PACKAGE_TYPE="<<parameters.label>>" \
-e CUDA_ARCHS="<<parameters.cuda_archs>>" \
-e ANACONDA_API_TOKEN=$ANACONDA_API_TOKEN \
faiss \
conda build faiss-gpu --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
--user pytorch --label <<parameters.label>> -c pytorch
no_output_timeout: 60m
deploy_linux_gpu_v2:
parameters:
label:
type: string
default: main
cuda:
type: string
cuda_archs:
type: string
compiler_version:
type: string
machine:
resource_class: gpu.nvidia.medium
image: linux-cuda-11:2023.02.1
steps:
- checkout
- run:
name: Install conda
command: |
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
bash miniconda.sh -b -p $HOME/miniconda
~/miniconda/bin/conda init
- run:
name: Install conda build tools
command: |
conda update -y conda
conda install -y -q conda-build
# anaconda-client
# conda config --set anaconda_upload yes
- run:
name: Build/test/upload
no_output_timeout: 60m
environment:
PACKAGE_TYPE: <<parameters.label>>
CUDA_ARCHS: <<parameters.cuda_archs>>
command: |
cd conda
conda build faiss-gpu --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
--user pytorch --label <<parameters.label>> -c pytorch -c nvidia
build_cmake: build_cmake:
parameters: parameters:
@ -193,13 +147,16 @@ jobs:
opt_level: opt_level:
type: string type: string
default: generic default: generic
gpu:
type: string
default: "OFF"
executor: << parameters.exec >> executor: << parameters.exec >>
environment: environment:
OMP_NUM_THREADS: 10 OMP_NUM_THREADS: 10
MKL_THREADING_LAYER: GNU MKL_THREADING_LAYER: GNU
steps: steps:
- checkout - checkout
- run: - run:
name: Install conda name: Install conda
command: | command: |
if [ -n "${CONDA_ARCH}" ] if [ -n "${CONDA_ARCH}" ]
@ -208,109 +165,80 @@ jobs:
bash miniconda.sh -b -p $HOME/miniconda bash miniconda.sh -b -p $HOME/miniconda
~/miniconda/bin/conda init ~/miniconda/bin/conda init
fi fi
- run: - when:
condition:
equal: [ "ON", << parameters.gpu >> ]
steps:
- run:
name: Configure CUDA
command: sudo update-alternatives --set cuda /usr/local/cuda-11.4
- run:
name: Set up environment name: Set up environment
command: | command: |
conda update -y -q conda conda update -y -q conda
conda install -y -q cmake make swig mkl numpy scipy pytest gxx_linux-64 conda install -y -q cmake make swig mkl=2021 mkl-devel=2021 numpy scipy pytest gxx_linux-64
conda install -y -q pytorch -c pytorch
- run: - run:
name: Build faiss library name: Build all targets
no_output_timeout: 30m no_output_timeout: 30m
command: | command: |
source ~/.bashrc eval "$(conda shell.bash hook)"
cmake -B build -DBUILD_TESTING=ON -DFAISS_ENABLE_GPU=OFF \ conda activate
cmake -B build \
-DBUILD_TESTING=ON \
-DBUILD_SHARED_LIBS=OFF \
-DFAISS_ENABLE_GPU=<< parameters.gpu >> \
-DFAISS_OPT_LEVEL=<< parameters.opt_level >> \ -DFAISS_OPT_LEVEL=<< parameters.opt_level >> \
-DFAISS_ENABLE_C_API=ON -DPYTHON_EXECUTABLE=$(which python3)\ -DFAISS_ENABLE_C_API=ON \
-DCMAKE_BUILD_TYPE=Release -DBLA_VENDOR=Intel10_64_dyn . -DPYTHON_EXECUTABLE=$(which python) \
make -k -C build -j$(nproc) faiss -DCMAKE_BUILD_TYPE=Release \
-DBLA_VENDOR=Intel10_64_dyn \
-DCMAKE_CUDA_FLAGS="-gencode arch=compute_75,code=sm_75" \
.
make -k -C build -j$(nproc)
- run:
name: C++ tests
command: |
export GTEST_OUTPUT="xml:$(realpath .)/test-results/googletest/"
make -C build test
- run:
name: Install Python extension
command: |
cd build/faiss/python
python setup.py install
- when:
condition:
equal: [ "OFF", << parameters.gpu >> ]
steps:
- run:
name: Python tests (CPU only)
command: |
conda install -y -q pytorch -c pytorch
pytest --junitxml=test-results/pytest/results.xml tests/test_*.py
pytest --junitxml=test-results/pytest/results-torch.xml tests/torch_*.py
- when:
condition:
equal: [ "ON", << parameters.gpu >> ]
steps:
- run:
name: Python tests (CPU + GPU)
command: |
conda install -y -q pytorch pytorch-cuda -c pytorch -c nvidia
pytest --junitxml=test-results/pytest/results.xml tests/test_*.py
pytest --junitxml=test-results/pytest/results-torch.xml tests/torch_*.py
cp tests/common_faiss_tests.py faiss/gpu/test
pytest --junitxml=test-results/pytest/results-gpu.xml faiss/gpu/test/test_*.py
pytest --junitxml=test-results/pytest/results-gpu-torch.xml faiss/gpu/test/torch_*.py
- when: - when:
condition: condition:
equal: [ "avx2", << parameters.opt_level >> ] equal: [ "avx2", << parameters.opt_level >> ]
steps: steps:
- run: - run:
name: Build faiss_avx2 library name: Test avx2 loading
no_output_timeout: 30m command: |
command: make -k -C build -j$(nproc) faiss_avx2 swigfaiss_avx2 FAISS_DISABLE_CPU_FEATURES=AVX2 LD_DEBUG=libs python -c "import faiss" 2>&1 | grep faiss.so
- run: LD_DEBUG=libs python -c "import faiss" 2>&1 | grep faiss_avx2.so
name: Test faiss library
command: |
make -C build -j$(nproc) faiss_test
export GTEST_OUTPUT="xml:$(realpath .)/test-results/googletest/"
make -C build test
- run:
name: Build python extension
command: |
make -C build -j$(nproc) swigfaiss
cd build/faiss/python
python3 setup.py build
- run:
name: Test python extension
command: |
export PYTHONPATH="$(ls -d ./build/faiss/python/build/lib*/)"
pytest --junitxml=test-results/pytest/results.xml tests/test_*.py
pytest --junitxml=test-results/pytest/results-torch.xml tests/torch_*.py
- store_test_results: - store_test_results:
path: test-results path: test-results
- run:
name: Build C API
command: |
make -k -C build -j faiss_c
build_linux_gpu:
machine:
resource_class: gpu.nvidia.medium
image: ubuntu-2004-cuda-11.4:202110-01
docker_layer_caching: true
steps:
- checkout
- run:
name: Build/test
command: |
docker build -t faiss -f .circleci/Dockerfile.faiss_gpu .
docker run --gpus all faiss make -C build test
docker run --gpus all faiss sh -c '(pwd; find)'
docker run --gpus all faiss sh -c '(cd build/faiss/python; python3 setup.py install) && cp tests/common_faiss_tests.py faiss/gpu/test && python3 -m unittest discover -s faiss/gpu/test -p "test_*"'
docker run --gpus all faiss sh -c '(cd build/faiss/python; python3 setup.py install) && cp tests/common_faiss_tests.py faiss/gpu/test && python3 -m unittest discover -s faiss/gpu/test -p "torch_*.py"'
no_output_timeout: 60m
build_linux_gpu_v2:
machine:
resource_class: gpu.nvidia.medium
image: linux-cuda-11:2023.02.1
steps:
- checkout
- run:
name: Install conda
command: |
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
bash miniconda.sh -b -p $HOME/miniconda
~/miniconda/bin/conda init
- run:
name: Set up environment
command: |
conda update -y -q conda
conda install -y -q cmake make swig mkl numpy scipy pytest gxx_linux-64
conda install -y -q pytorch -c pytorch
- run:
name: Build
no_output_timeout: 60m
command: |
source ~/.bashrc
cmake -B build \
-DFAISS_ENABLE_GPU=ON \
-DFAISS_ENABLE_C_API=ON \
-DFAISS_ENABLE_PYTHON=ON \
-DBUILD_TESTING=ON \
-DCMAKE_CUDA_FLAGS="-gencode arch=compute_75,code=sm_75" \
.
make -C build -j8
- run:
name: Test
command: |
make -C build test
(cd build/faiss/python; python3 setup.py install) && cp tests/common_faiss_tests.py faiss/gpu/test && python3 -m unittest discover -s faiss/gpu/test -p "test_*"'
(cd build/faiss/python; python3 setup.py install) && cp tests/common_faiss_tests.py faiss/gpu/test && python3 -m unittest discover -s faiss/gpu/test -p "torch_*.py"'
workflows: workflows:
version: 2 version: 2
@ -322,11 +250,13 @@ workflows:
name: Linux x86_64 (cmake) name: Linux x86_64 (cmake)
exec: linux-x86_64-cpu exec: linux-x86_64-cpu
- build_cmake: - build_cmake:
name: Linux x86_64 w/ AVX2 (cmake) name: Linux x86_64 AVX2 (cmake)
exec: linux-x86_64-cpu exec: linux-x86_64-cpu
opt_level: "avx2" opt_level: "avx2"
- build_linux_gpu: - build_cmake:
name: Linux x86_64 GPU (cmake) name: Linux x86_64 GPU (cmake)
exec: linux-x86_64-gpu
gpu: "ON"
requires: requires:
- Linux x86_64 (cmake) - Linux x86_64 (cmake)
- build_conda: - build_conda:
@ -336,7 +266,7 @@ workflows:
name: OSX x86_64 (conda) name: OSX x86_64 (conda)
exec: macosx-x86_64-cpu exec: macosx-x86_64-cpu
- build_conda: - build_conda:
name: Windows (conda) name: Windows x86_64 (conda)
exec: windows-x86_64-cpu exec: windows-x86_64-cpu
- build_conda: - build_conda:
name: OSX arm64 (conda) name: OSX arm64 (conda)
@ -344,51 +274,58 @@ workflows:
- build_conda: - build_conda:
name: Linux arm64 (conda) name: Linux arm64 (conda)
exec: linux-arm64-cpu exec: linux-arm64-cpu
- deploy_conda: - build_conda:
name: Linux x86_64 packages name: Linux x86_64 packages
exec: linux-x86_64-cpu exec: linux-x86_64-cpu
label: main
filters: filters:
tags: tags:
only: /^v.*/ only: /^v.*/
branches: branches:
ignore: /.*/ ignore: /.*/
- deploy_linux_gpu: - build_conda:
name: Linux GPU packages (CUDA 11.3) name: Linux x86_64 GPU packages (CUDA 11.4)
cuda: "11.3" exec: linux-x86_64-gpu
label: main
cuda: "11.4"
cuda_archs: "60;61;70;72;75;80;86" cuda_archs: "60;61;70;72;75;80;86"
compiler_version: "9.3" compiler_version: "11.2"
filters: filters:
tags: tags:
only: /^v.*/ only: /^v.*/
branches: branches:
ignore: /.*/ ignore: /.*/
- deploy_conda: - build_conda:
name: Windows x86_64 packages name: Windows x86_64 packages
exec: windows-x86_64-cpu exec: windows-x86_64-cpu
label: main
filters: filters:
tags: tags:
only: /^v.*/ only: /^v.*/
branches: branches:
ignore: /.*/ ignore: /.*/
- deploy_conda: - build_conda:
name: OSX x86_64 packages name: OSX x86_64 packages
exec: macosx-x86_64-cpu exec: macosx-x86_64-cpu
label: main
filters: filters:
tags: tags:
only: /^v.*/ only: /^v.*/
branches: branches:
ignore: /.*/ ignore: /.*/
- deploy_conda: - build_conda:
name: OSX arm64 packages name: OSX arm64 packages
exec: macosx-arm64-cpu exec: macosx-arm64-cpu
label: main
filters: filters:
tags: tags:
only: /^v.*/ only: /^v.*/
branches: branches:
ignore: /.*/ ignore: /.*/
- deploy_conda: - build_conda:
name: Linux arm64 packages name: Linux arm64 packages
exec: linux-arm64-cpu exec: linux-arm64-cpu
label: main
filters: filters:
tags: tags:
only: /^v.*/ only: /^v.*/
@ -404,29 +341,30 @@ workflows:
only: only:
- main - main
jobs: jobs:
- deploy_conda: - build_conda:
name: Linux x86_64 nightlies name: Linux x86_64 nightlies
exec: linux-x86_64-cpu exec: linux-x86_64-cpu
label: nightly label: nightly
- deploy_linux_gpu: - build_conda:
name: Linux x86_64 GPU nightlies (CUDA 11.3) name: Linux x86_64 GPU nightlies (CUDA 11.4)
cuda: "11.3" exec: linux-x86_64-gpu
cuda: "11.4"
cuda_archs: "60;61;70;72;75;80;86" cuda_archs: "60;61;70;72;75;80;86"
compiler_version: "9.3" compiler_version: "11.2"
label: nightly label: nightly
- deploy_conda: - build_conda:
name: Windows x86_64 nightlies name: Windows x86_64 nightlies
exec: windows-x86_64-cpu exec: windows-x86_64-cpu
label: nightly label: nightly
- deploy_conda: - build_conda:
name: OSX x86_64 nightlies name: OSX x86_64 nightlies
exec: macosx-x86_64-cpu exec: macosx-x86_64-cpu
label: nightly label: nightly
- deploy_conda: - build_conda:
name: OSX arm64 nightlies name: OSX arm64 nightlies
exec: macosx-arm64-cpu exec: macosx-arm64-cpu
label: nightly label: nightly
- deploy_conda: - build_conda:
name: Linux arm64 nightlies name: Linux arm64 nightlies
exec: linux-arm64-cpu exec: linux-arm64-cpu
label: nightly label: nightly

View File

@ -1,29 +0,0 @@
FROM nvidia/cuda:8.0-devel-centos7
# Install MKL
RUN yum-config-manager --add-repo https://yum.repos.intel.com/mkl/setup/intel-mkl.repo
RUN rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
RUN yum install -y intel-mkl-2019.3-062
ENV LD_LIBRARY_PATH /opt/intel/mkl/lib/intel64:$LD_LIBRARY_PATH
ENV LIBRARY_PATH /opt/intel/mkl/lib/intel64:$LIBRARY_PATH
ENV LD_PRELOAD /usr/lib64/libgomp.so.1:/opt/intel/mkl/lib/intel64/libmkl_def.so:\
/opt/intel/mkl/lib/intel64/libmkl_avx2.so:/opt/intel/mkl/lib/intel64/libmkl_core.so:\
/opt/intel/mkl/lib/intel64/libmkl_intel_lp64.so:/opt/intel/mkl/lib/intel64/libmkl_gnu_thread.so
# Install necessary build tools
RUN yum install -y gcc-c++ make swig3
# Install necessary headers/libs
RUN yum install -y python-devel numpy
COPY . /opt/faiss
WORKDIR /opt/faiss
# --with-cuda=/usr/local/cuda-8.0
RUN ./configure --prefix=/usr --libdir=/usr/lib64 --without-cuda
RUN make -j $(nproc)
RUN make -C python
RUN make test
RUN make install
RUN make -C demos demo_ivfpq_indexing && ./demos/demo_ivfpq_indexing

View File

@ -1,24 +0,0 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
FROM nvidia/cuda:11.3.1-devel-centos8
RUN sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-*
RUN sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-*
RUN yum update -y --nogpgcheck
RUN yum install -y --nogpgcheck wget git libcublas-devel-11-3
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3-latest-Linux-x86_64.sh -b -p ~/miniconda3
ENV PATH="/root/miniconda3/condabin:${PATH}"
RUN conda install -y -q conda-build anaconda-client
RUN conda config --set anaconda_upload yes
COPY ./ faiss
WORKDIR /faiss/conda

View File

@ -79,9 +79,11 @@ outputs:
- scipy - scipy
- pytorch - pytorch
commands: commands:
- python -m unittest discover tests/ - python -X faulthandler -m unittest discover -v -s tests/ -p "test_*"
- python -X faulthandler -m unittest discover -v -s tests/ -p "torch_*"
- cp tests/common_faiss_tests.py faiss/gpu/test - cp tests/common_faiss_tests.py faiss/gpu/test
- python -m unittest discover faiss/gpu/test/ - python -X faulthandler -m unittest discover -v -s faiss/gpu/test/ -p "test_*"
- python -X faulthandler -m unittest discover -v -s faiss/gpu/test/ -p "torch_*"
- sh test_cpu_dispatch.sh # [linux] - sh test_cpu_dispatch.sh # [linux]
files: files:
- test_cpu_dispatch.sh # [linux] - test_cpu_dispatch.sh # [linux]

View File

@ -80,8 +80,8 @@ outputs:
- scipy - scipy
- pytorch - pytorch
commands: commands:
- python -X faulthandler -m unittest discover -v -s tests -p "test_*" - python -X faulthandler -m unittest discover -v -s tests/ -p "test_*"
- python -X faulthandler -m unittest discover -v -s tests -p "torch_*" - python -X faulthandler -m unittest discover -v -s tests/ -p "torch_*"
- sh test_cpu_dispatch.sh # [linux64] - sh test_cpu_dispatch.sh # [linux64]
files: files:
- test_cpu_dispatch.sh # [linux64] - test_cpu_dispatch.sh # [linux64]