GPU builds without Docker (#2803)

Summary: 1. GPU builds use CircleCI base image, no docker 2. Switched to CUDA 11.4 (used to be 11.3) 3. Merged all build jobs into two parameterized targets: `build_cmake` and `build_conda`. 4. Cleaned up test execution, fixed bug of Python GPU tests not running on PRs Pull Request resolved: https://github.com/facebookresearch/faiss/pull/2803 Reviewed By: mlomeli1 Differential Revision: D44541714 Pulled By: algoriddle fbshipit-source-id: aa09ae638ecb6ef556d42f27a4bfaddad7355e50
2025-06-03 21:54:02 +08:00 · 2023-03-30 10:45:54 -07:00 · 2023-03-30 10:45:54 -07:00 · 7bf645e344
commit 7bf645e344
parent 2686183ee5
6 changed files with 167 additions and 308 deletions
--- a/.circleci/Dockerfile.faiss_gpu
+++ b/.circleci/Dockerfile.faiss_gpu
@ -1,28 +0,0 @@
 FROM nvidia/cuda:10.2-devel-ubuntu18.04
 # Install python3, wget, and openblas.
 RUN apt-get update && \
        apt-get install -y python3-dev python3-pip libopenblas-dev wget libpcre3-dev
 # Install swig 4.0.2.
 RUN wget -nv -O - https://sourceforge.net/projects/swig/files/swig/swig-4.0.2/swig-4.0.2.tar.gz/download | tar zxf - && cd swig-4.0.2 && ./configure && make -j && make install
 # Install recent CMake.
 RUN wget -nv -O - https://github.com/Kitware/CMake/releases/download/v3.17.1/cmake-3.17.1-Linux-x86_64.tar.gz | tar xzf - --strip-components=1 -C /usr
 # Install numpy/scipy/pytorch for python tests.
 RUN pip3 install numpy scipy torch
 COPY . /faiss
 WORKDIR /faiss
 RUN cmake -B build \
        -DFAISS_ENABLE_GPU=ON \
        -DFAISS_ENABLE_C_API=ON \
        -DFAISS_ENABLE_PYTHON=ON \
        -DBUILD_TESTING=ON \
        -DCMAKE_CUDA_FLAGS="-gencode arch=compute_75,code=sm_75" \
        .
 RUN make -C build -j8
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -5,6 +5,12 @@ executors:
    docker:
      - image: continuumio/miniconda3
    resource_class: medium+
  linux-x86_64-gpu:
    environment:
      CONDA_ARCH: Linux-x86_64
    machine:
      image: linux-cuda-11:2023.02.1
    resource_class: gpu.nvidia.medium
  linux-arm64-cpu:
    environment:
      CONDA_ARCH: Linux-aarch64
@ -25,8 +31,8 @@ executors:
  windows-x86_64-cpu:
    machine:
      image: windows-server-2019-vs2019:stable
      resource_class: windows.medium
      shell: bash.exe
    resource_class: windows.medium
 jobs:
  format:
@ -54,11 +60,25 @@ jobs:
  build_conda:
    parameters:
      label:
        type: string
        default: ""
      cuda:
        type: string
        default: ""
      cuda_archs:
        type: string
        default: ""
      compiler_version:
        type: string
        default: ""
      exec:
        type: executor
    executor: << parameters.exec >>
    environment:
      OMP_NUM_THREADS: 10
      PACKAGE_TYPE: <<parameters.label>>
      CUDA_ARCHS: <<parameters.cuda_archs>>
    steps:
      - checkout
      - run:
@ -73,118 +93,52 @@ jobs:
      - run:
          name: Install conda build tools
          command: |
-            conda update -y conda
+            conda update -y -q conda
            conda install -y -q conda-build
-      - run:
+      - when:
-          name: Build/test
+          condition: << parameters.label >>
-          no_output_timeout: 30m
+          steps:
-          command: |
+            - run:
-            cd conda
+                name: Enable anaconda uploads
-            conda build faiss --python 3.10 -c pytorch
+                command: |
-
+                  conda install -y -q anaconda-client
-  deploy_conda:
+                  conda config --set anaconda_upload yes
-    parameters:
+      - when:
-      label:
+          condition:
-        type: string
+              not: << parameters.label >>
-        default: main
+          steps:
-      exec:
+            - run:
-        type: executor
+                name: Conda build (CPU)
-    executor: << parameters.exec >>
+                no_output_timeout: 30m
-    steps:
+                command: |
-      - checkout
+                  cd conda
-      - run:
+                  conda build faiss --python 3.10 -c pytorch
-          name: Install conda
+      - when:
-          command: |
+          condition:
-            if [ -n "${CONDA_ARCH}" ]
+            and:
-            then
+              - << parameters.label >>
-              curl https://repo.anaconda.com/miniconda/Miniconda3-latest-${CONDA_ARCH}.sh --output miniconda.sh
+              - not: << parameters.cuda >>
-              bash miniconda.sh -b -p $HOME/miniconda
+          steps:
-              ~/miniconda/bin/conda init
+            - run:
-            fi
+                name: Conda build (CPU) w/ anaconda upload
-      - run:
+                no_output_timeout: 30m
-          name: Install conda build tools
+                command: |
-          command: |
+                  cd conda
-            conda update -y conda
+                  conda build faiss --user pytorch --label <<parameters.label>> -c pytorch
-            conda install -y -q conda-build anaconda-client
+      - when:
-            conda config --set anaconda_upload yes
+          condition:
-      - run:
+            and:
-          name: Build/test/upload
+              - << parameters.label >>
-          no_output_timeout: 30m
+              - << parameters.cuda >>
-          environment:
+          steps:
-            PACKAGE_TYPE: <<parameters.label>>
+            - run:
-          command: |
+                name: Conda build (GPU) w/ anaconda upload
-            cd conda
+                no_output_timeout: 60m
-            conda build faiss --user pytorch --label <<parameters.label>> -c pytorch
+                command: |
-
+                  sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
-  deploy_linux_gpu:
+                  cd conda
-    parameters:
+                  conda build faiss-gpu --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
-      label:
+                      --user pytorch --label <<parameters.label>> -c pytorch -c nvidia
        type: string
        default: main
      cuda:
        type: string
      cuda_archs:
        type: string
      compiler_version:
        type: string
    machine:
      resource_class: gpu.nvidia.medium
      image: ubuntu-2004-cuda-11.4:202110-01
      docker_layer_caching: true
    steps:
      - checkout
      - run:
          name: Build packages
          command: |
            docker build -t faiss -f conda/Dockerfile.cuda<<parameters.cuda>> .
            docker run --gpus all \
              -e PACKAGE_TYPE="<<parameters.label>>" \
              -e CUDA_ARCHS="<<parameters.cuda_archs>>" \
              -e ANACONDA_API_TOKEN=$ANACONDA_API_TOKEN \
              faiss \
              conda build faiss-gpu --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
                --user pytorch --label <<parameters.label>> -c pytorch
          no_output_timeout: 60m
  deploy_linux_gpu_v2:
    parameters:
      label:
        type: string
        default: main
      cuda:
        type: string
      cuda_archs:
        type: string
      compiler_version:
        type: string
    machine:
      resource_class: gpu.nvidia.medium
      image: linux-cuda-11:2023.02.1
    steps:
      - checkout
      - run:
          name: Install conda
          command: |
            curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
            bash miniconda.sh -b -p $HOME/miniconda
            ~/miniconda/bin/conda init
      - run:
          name: Install conda build tools
          command: |
            conda update -y conda
            conda install -y -q conda-build 
            # anaconda-client
            # conda config --set anaconda_upload yes
      - run:
          name: Build/test/upload
          no_output_timeout: 60m
          environment:
            PACKAGE_TYPE: <<parameters.label>>
            CUDA_ARCHS: <<parameters.cuda_archs>>
          command: |
            cd conda
            conda build faiss-gpu --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
                --user pytorch --label <<parameters.label>> -c pytorch -c nvidia
  build_cmake:
    parameters:
@ -193,13 +147,16 @@ jobs:
      opt_level:
        type: string
        default: generic
      gpu:
        type: string
        default: "OFF"
    executor: << parameters.exec >>
    environment:
      OMP_NUM_THREADS: 10
      MKL_THREADING_LAYER: GNU
    steps:
      - checkout
-      - run: 
+      - run:
          name: Install conda
          command: |
            if [ -n "${CONDA_ARCH}" ]
@ -208,109 +165,80 @@ jobs:
              bash miniconda.sh -b -p $HOME/miniconda
              ~/miniconda/bin/conda init
            fi
-      - run: 
+      - when:
          condition:
            equal: [ "ON", << parameters.gpu >> ]
          steps:
            - run:
                name: Configure CUDA
                command: sudo update-alternatives --set cuda /usr/local/cuda-11.4
      - run:
          name: Set up environment
          command: |
            conda update -y -q conda
-            conda install -y -q cmake make swig mkl numpy scipy pytest gxx_linux-64
+            conda install -y -q cmake make swig mkl=2021 mkl-devel=2021 numpy scipy pytest gxx_linux-64
            conda install -y -q pytorch -c pytorch
      - run:
-          name: Build faiss library
+          name: Build all targets
          no_output_timeout: 30m
          command: |
-            source ~/.bashrc
+            eval "$(conda shell.bash hook)"
-            cmake -B build -DBUILD_TESTING=ON -DFAISS_ENABLE_GPU=OFF \
+            conda activate
            cmake -B build \
                  -DBUILD_TESTING=ON \
                  -DBUILD_SHARED_LIBS=OFF \
                  -DFAISS_ENABLE_GPU=<< parameters.gpu >> \
                  -DFAISS_OPT_LEVEL=<< parameters.opt_level >> \
-                  -DFAISS_ENABLE_C_API=ON -DPYTHON_EXECUTABLE=$(which python3)\
+                  -DFAISS_ENABLE_C_API=ON \
-                  -DCMAKE_BUILD_TYPE=Release -DBLA_VENDOR=Intel10_64_dyn .
+                  -DPYTHON_EXECUTABLE=$(which python) \
-            make -k -C build -j$(nproc) faiss
+                  -DCMAKE_BUILD_TYPE=Release \
                  -DBLA_VENDOR=Intel10_64_dyn \
                  -DCMAKE_CUDA_FLAGS="-gencode arch=compute_75,code=sm_75" \
                  .
            make -k -C build -j$(nproc)
      - run:
          name: C++ tests
          command: |
            export GTEST_OUTPUT="xml:$(realpath .)/test-results/googletest/"
            make -C build test
      - run:
          name: Install Python extension
          command: |
            cd build/faiss/python
            python setup.py install
      - when:
          condition:
            equal: [ "OFF", << parameters.gpu >> ]
          steps:
            - run:
                name: Python tests (CPU only)
                command: |
                  conda install -y -q pytorch -c pytorch
                  pytest --junitxml=test-results/pytest/results.xml tests/test_*.py
                  pytest --junitxml=test-results/pytest/results-torch.xml tests/torch_*.py
      - when:
          condition:
            equal: [ "ON", << parameters.gpu >> ]
          steps:
            - run:
                name: Python tests (CPU + GPU)
                command: |
                  conda install -y -q pytorch pytorch-cuda -c pytorch -c nvidia
                  pytest --junitxml=test-results/pytest/results.xml tests/test_*.py
                  pytest --junitxml=test-results/pytest/results-torch.xml tests/torch_*.py
                  cp tests/common_faiss_tests.py faiss/gpu/test
                  pytest --junitxml=test-results/pytest/results-gpu.xml faiss/gpu/test/test_*.py
                  pytest --junitxml=test-results/pytest/results-gpu-torch.xml faiss/gpu/test/torch_*.py
      - when:
          condition:
            equal: [ "avx2", << parameters.opt_level >> ]
          steps:
            - run:
-                name: Build faiss_avx2 library
+                name: Test avx2 loading
-                no_output_timeout: 30m
+                command: |
-                command: make -k -C build -j$(nproc) faiss_avx2 swigfaiss_avx2
+                  FAISS_DISABLE_CPU_FEATURES=AVX2 LD_DEBUG=libs python -c "import faiss" 2>&1 | grep faiss.so
-      - run:
+                  LD_DEBUG=libs python -c "import faiss" 2>&1 | grep faiss_avx2.so
          name: Test faiss library
          command: |
            make -C build -j$(nproc) faiss_test
            export GTEST_OUTPUT="xml:$(realpath .)/test-results/googletest/"
            make -C build test
      - run:
          name: Build python extension
          command: |
            make -C build -j$(nproc) swigfaiss
            cd build/faiss/python
            python3 setup.py build
      - run:
          name: Test python extension
          command: |
            export PYTHONPATH="$(ls -d ./build/faiss/python/build/lib*/)"
            pytest --junitxml=test-results/pytest/results.xml tests/test_*.py
            pytest --junitxml=test-results/pytest/results-torch.xml tests/torch_*.py
      - store_test_results:
          path: test-results
      - run:
          name: Build C API
          command: |
            make -k -C build -j faiss_c
  build_linux_gpu:
    machine:
      resource_class: gpu.nvidia.medium
      image: ubuntu-2004-cuda-11.4:202110-01
      docker_layer_caching: true
    steps:
      - checkout
      - run:
          name: Build/test
          command: |
            docker build -t faiss -f .circleci/Dockerfile.faiss_gpu .
            docker run --gpus all faiss make -C build test
            docker run --gpus all faiss sh -c '(pwd; find)'
            docker run --gpus all faiss sh -c '(cd build/faiss/python; python3 setup.py install) && cp tests/common_faiss_tests.py faiss/gpu/test && python3 -m unittest discover -s faiss/gpu/test -p "test_*"'
            docker run --gpus all faiss sh -c '(cd build/faiss/python; python3 setup.py install) && cp tests/common_faiss_tests.py faiss/gpu/test && python3 -m unittest discover -s faiss/gpu/test -p "torch_*.py"'
          no_output_timeout: 60m
  build_linux_gpu_v2:
    machine:
      resource_class: gpu.nvidia.medium
      image: linux-cuda-11:2023.02.1
    steps:
      - checkout
      - run:
          name: Install conda
          command: |
            curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
            bash miniconda.sh -b -p $HOME/miniconda
            ~/miniconda/bin/conda init
      - run: 
          name: Set up environment
          command: |
            conda update -y -q conda
            conda install -y -q cmake make swig mkl numpy scipy pytest gxx_linux-64
            conda install -y -q pytorch -c pytorch
      - run:
          name: Build
          no_output_timeout: 60m
          command: |
            source ~/.bashrc
            cmake -B build \
                    -DFAISS_ENABLE_GPU=ON \
                    -DFAISS_ENABLE_C_API=ON \
                    -DFAISS_ENABLE_PYTHON=ON \
                    -DBUILD_TESTING=ON \
                    -DCMAKE_CUDA_FLAGS="-gencode arch=compute_75,code=sm_75" \
                    .
            make -C build -j8
      - run:
          name: Test
          command: |
            make -C build test
            (cd build/faiss/python; python3 setup.py install) && cp tests/common_faiss_tests.py faiss/gpu/test && python3 -m unittest discover -s faiss/gpu/test -p "test_*"'
            (cd build/faiss/python; python3 setup.py install) && cp tests/common_faiss_tests.py faiss/gpu/test && python3 -m unittest discover -s faiss/gpu/test -p "torch_*.py"'
 workflows:
  version: 2
@ -322,11 +250,13 @@ workflows:
          name: Linux x86_64 (cmake)
          exec: linux-x86_64-cpu
      - build_cmake:
-          name: Linux x86_64 w/ AVX2 (cmake)
+          name: Linux x86_64 AVX2 (cmake)
          exec: linux-x86_64-cpu
          opt_level: "avx2"
-      - build_linux_gpu:
+      - build_cmake:
          name: Linux x86_64 GPU (cmake)
          exec: linux-x86_64-gpu
          gpu: "ON"
          requires:
            - Linux x86_64 (cmake)
      - build_conda:
@ -336,7 +266,7 @@ workflows:
          name: OSX x86_64 (conda)
          exec: macosx-x86_64-cpu
      - build_conda:
-          name: Windows (conda)
+          name: Windows x86_64 (conda)
          exec: windows-x86_64-cpu
      - build_conda:
          name: OSX arm64 (conda)
@ -344,51 +274,58 @@ workflows:
      - build_conda:
          name: Linux arm64 (conda)
          exec: linux-arm64-cpu
-      - deploy_conda:
+      - build_conda:
          name: Linux x86_64 packages
          exec: linux-x86_64-cpu
          label: main
          filters:
            tags:
              only: /^v.*/
            branches:
              ignore: /.*/
-      - deploy_linux_gpu:
+      - build_conda:
-          name: Linux GPU packages (CUDA 11.3)
+          name: Linux x86_64 GPU packages (CUDA 11.4)
-          cuda: "11.3"
+          exec: linux-x86_64-gpu
          label: main
          cuda: "11.4"
          cuda_archs: "60;61;70;72;75;80;86"
-          compiler_version: "9.3"
+          compiler_version: "11.2"
          filters:
            tags:
              only: /^v.*/
            branches:
              ignore: /.*/
-      - deploy_conda:
+      - build_conda:
          name: Windows x86_64 packages
          exec: windows-x86_64-cpu
          label: main
          filters:
            tags:
              only: /^v.*/
            branches:
              ignore: /.*/
-      - deploy_conda:
+      - build_conda:
          name: OSX x86_64 packages
          exec: macosx-x86_64-cpu
          label: main
          filters:
            tags:
              only: /^v.*/
            branches:
              ignore: /.*/
-      - deploy_conda:
+      - build_conda:
          name: OSX arm64 packages
          exec: macosx-arm64-cpu
          label: main
          filters:
            tags:
              only: /^v.*/
            branches:
              ignore: /.*/
-      - deploy_conda:
+      - build_conda:
          name: Linux arm64 packages
          exec: linux-arm64-cpu
          label: main
          filters:
            tags:
              only: /^v.*/
@ -404,29 +341,30 @@ workflows:
              only:
                - main
    jobs:
-      - deploy_conda:
+      - build_conda:
          name: Linux x86_64 nightlies
          exec: linux-x86_64-cpu
          label: nightly
-      - deploy_linux_gpu:
+      - build_conda:
-          name: Linux x86_64 GPU nightlies (CUDA 11.3)
+          name: Linux x86_64 GPU nightlies (CUDA 11.4)
-          cuda: "11.3"
+          exec: linux-x86_64-gpu
          cuda: "11.4"
          cuda_archs: "60;61;70;72;75;80;86"
-          compiler_version: "9.3"
+          compiler_version: "11.2"
          label: nightly
-      - deploy_conda:
+      - build_conda:
          name: Windows x86_64 nightlies
          exec: windows-x86_64-cpu
          label: nightly
-      - deploy_conda:
+      - build_conda:
          name: OSX x86_64 nightlies
          exec: macosx-x86_64-cpu
          label: nightly
-      - deploy_conda:
+      - build_conda:
          name: OSX arm64 nightlies
          exec: macosx-arm64-cpu
          label: nightly
-      - deploy_conda:
+      - build_conda:
          name: Linux arm64 nightlies
          exec: linux-arm64-cpu
          label: nightly
--- a/29
+++ b/29
@ -1,29 +0,0 @@
 FROM nvidia/cuda:8.0-devel-centos7
 # Install MKL
 RUN yum-config-manager --add-repo https://yum.repos.intel.com/mkl/setup/intel-mkl.repo
 RUN rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
 RUN yum install -y intel-mkl-2019.3-062
 ENV LD_LIBRARY_PATH /opt/intel/mkl/lib/intel64:$LD_LIBRARY_PATH
 ENV LIBRARY_PATH /opt/intel/mkl/lib/intel64:$LIBRARY_PATH
 ENV LD_PRELOAD /usr/lib64/libgomp.so.1:/opt/intel/mkl/lib/intel64/libmkl_def.so:\
 /opt/intel/mkl/lib/intel64/libmkl_avx2.so:/opt/intel/mkl/lib/intel64/libmkl_core.so:\
 /opt/intel/mkl/lib/intel64/libmkl_intel_lp64.so:/opt/intel/mkl/lib/intel64/libmkl_gnu_thread.so
 # Install necessary build tools
 RUN yum install -y gcc-c++ make swig3
 # Install necessary headers/libs
 RUN yum install -y python-devel numpy
 COPY . /opt/faiss
 WORKDIR /opt/faiss
 # --with-cuda=/usr/local/cuda-8.0 
 RUN ./configure --prefix=/usr --libdir=/usr/lib64 --without-cuda
 RUN make -j $(nproc)
 RUN make -C python
 RUN make test
 RUN make install
 RUN make -C demos demo_ivfpq_indexing && ./demos/demo_ivfpq_indexing
--- a/conda/Dockerfile.cuda11.3
+++ b/conda/Dockerfile.cuda11.3
@ -1,24 +0,0 @@
 # Copyright (c) Facebook, Inc. and its affiliates.
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 FROM nvidia/cuda:11.3.1-devel-centos8
 RUN sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-*
 RUN sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-*
 RUN yum update -y --nogpgcheck
 RUN yum install -y --nogpgcheck wget git libcublas-devel-11-3
 RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
        bash Miniconda3-latest-Linux-x86_64.sh -b -p ~/miniconda3
 ENV PATH="/root/miniconda3/condabin:${PATH}"
 RUN conda install -y -q conda-build anaconda-client
 RUN conda config --set anaconda_upload yes
 COPY ./ faiss
 WORKDIR /faiss/conda
--- a/conda/faiss-gpu/meta.yaml
+++ b/conda/faiss-gpu/meta.yaml
@ -79,9 +79,11 @@ outputs:
        - scipy
        - pytorch
      commands:
-        - python -m unittest discover tests/
+        - python -X faulthandler -m unittest discover -v -s tests/ -p "test_*"
        - python -X faulthandler -m unittest discover -v -s tests/ -p "torch_*"
        - cp tests/common_faiss_tests.py faiss/gpu/test
-        - python -m unittest discover faiss/gpu/test/
+        - python -X faulthandler -m unittest discover -v -s faiss/gpu/test/ -p "test_*"
        - python -X faulthandler -m unittest discover -v -s faiss/gpu/test/ -p "torch_*"
        - sh test_cpu_dispatch.sh  # [linux]
      files:
        - test_cpu_dispatch.sh  # [linux]
--- a/conda/faiss/meta.yaml
+++ b/conda/faiss/meta.yaml
@ -80,8 +80,8 @@ outputs:
        - scipy
        - pytorch
      commands:
-        - python -X faulthandler -m unittest discover -v -s tests -p "test_*"
+        - python -X faulthandler -m unittest discover -v -s tests/ -p "test_*"
-        - python -X faulthandler -m unittest discover -v -s tests -p "torch_*"
+        - python -X faulthandler -m unittest discover -v -s tests/ -p "torch_*"
        - sh test_cpu_dispatch.sh  # [linux64]
      files:
        - test_cpu_dispatch.sh  # [linux64]