faiss/gpu/GpuIndex.h

/**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */


#pragma once

#include <faiss/Index.h>
#include <faiss/gpu/utils/MemorySpace.h>

namespace faiss { namespace gpu {

class GpuResources;

struct GpuIndexConfig {
  inline GpuIndexConfig()
      : device(0),
        memorySpace(MemorySpace::Device) {
  }

  /// GPU device on which the index is resident
  int device;

  /// What memory space to use for primary storage.
  /// On Pascal and above (CC 6+) architectures, allows GPUs to use
  /// more memory than is available on the GPU.
  MemorySpace memorySpace;
};

class GpuIndex : public faiss::Index {
 public:
  GpuIndex(GpuResources* resources,
           int dims,
           faiss::MetricType metric,
           GpuIndexConfig config);

  inline int getDevice() const {
    return device_;
  }

  inline GpuResources* getResources() {
    return resources_;
  }

  /// Set the minimum data size for searches (in MiB) for which we use
  /// CPU -> GPU paging
  void setMinPagingSize(size_t size);

  /// Returns the current minimum data size for paged searches
  size_t getMinPagingSize() const;

  /// `x` can be resident on the CPU or any GPU; copies are performed
  /// as needed
  /// Handles paged adds if the add set is too large; calls addInternal_
  void add(faiss::Index::idx_t, const float* x) override;

  /// `x` and `ids` can be resident on the CPU or any GPU; copies are
  /// performed as needed
  /// Handles paged adds if the add set is too large; calls addInternal_
  void add_with_ids(Index::idx_t n,
                    const float* x,
                    const Index::idx_t* ids) override;

  /// `x`, `distances` and `labels` can be resident on the CPU or any
  /// GPU; copies are performed as needed
  void search(Index::idx_t n,
              const float* x,
              Index::idx_t k,
              float* distances,
              Index::idx_t* labels) const override;

  /// Overridden to force GPU indices to provide their own GPU-friendly
  /// implementation
  void compute_residual(const float* x,
                        float* residual,
                        Index::idx_t key) const override;

  /// Overridden to force GPU indices to provide their own GPU-friendly
  /// implementation
  void compute_residual_n(Index::idx_t n,
                          const float* xs,
                          float* residuals,
                          const Index::idx_t* keys) const override;

 protected:
  /// Does addImpl_ require IDs? If so, and no IDs are provided, we will
  /// generate them sequentially based on the order in which the IDs are added
  virtual bool addImplRequiresIDs_() const = 0;

  /// Overridden to actually perform the add
  /// All data is guaranteed to be resident on our device
  virtual void addImpl_(int n,
                        const float* x,
                        const Index::idx_t* ids) = 0;

  /// Overridden to actually perform the search
  /// All data is guaranteed to be resident on our device
  virtual void searchImpl_(int n,
                           const float* x,
                           int k,
                           float* distances,
                           Index::idx_t* labels) const = 0;

private:
  /// Handles paged adds if the add set is too large, passes to
  /// addImpl_ to actually perform the add for the current page
  void addPaged_(int n,
                 const float* x,
                 const Index::idx_t* ids);

  /// Calls addImpl_ for a single page of GPU-resident data
  void addPage_(int n,
                const float* x,
                const Index::idx_t* ids);

  /// Calls searchImpl_ for a single page of GPU-resident data
  void searchNonPaged_(int n,
                       const float* x,
                       int k,
                       float* outDistancesData,
                       Index::idx_t* outIndicesData) const;

  /// Calls searchImpl_ for a single page of GPU-resident data,
  /// handling paging of the data and copies from the CPU
  void searchFromCpuPaged_(int n,
                           const float* x,
                           int k,
                           float* outDistancesData,
                           Index::idx_t* outIndicesData) const;

 protected:
  /// Manages streams, cuBLAS handles and scratch memory for devices
  GpuResources* resources_;

  /// The GPU device we are resident on
  const int device_;

  /// The memory space of our primary storage on the GPU
  const MemorySpace memorySpace_;

  /// Size above which we page copies from the CPU to GPU
  size_t minPagedSize_;
};

} } // namespace
Initial commit 2017-02-23 06:26:44 +08:00			`/**`
Facebook sync (May 2019) + relicense (#838) Changelog: - changed license: BSD+Patents -> MIT - propagates exceptions raised in sub-indexes of IndexShards and IndexReplicas - support for searching several inverted lists in parallel (parallel_mode != 0) - better support for PQ codes where nbit != 8 or 16 - IVFSpectralHash implementation: spectral hash codes inside an IVF - 6-bit per component scalar quantizer (4 and 8 bit were already supported) - combinations of inverted lists: HStackInvertedLists and VStackInvertedLists - configurable number of threads for OnDiskInvertedLists prefetching (including 0=no prefetch) - more test and demo code compatible with Python 3 (print with parentheses) - refactored benchmark code: data loading is now in a single file 2019-05-28 22:17:22 +08:00			`* Copyright (c) Facebook, Inc. and its affiliates.`
Initial commit 2017-02-23 06:26:44 +08:00			`*`
Facebook sync (May 2019) + relicense (#838) Changelog: - changed license: BSD+Patents -> MIT - propagates exceptions raised in sub-indexes of IndexShards and IndexReplicas - support for searching several inverted lists in parallel (parallel_mode != 0) - better support for PQ codes where nbit != 8 or 16 - IVFSpectralHash implementation: spectral hash codes inside an IVF - 6-bit per component scalar quantizer (4 and 8 bit were already supported) - combinations of inverted lists: HStackInvertedLists and VStackInvertedLists - configurable number of threads for OnDiskInvertedLists prefetching (including 0=no prefetch) - more test and demo code compatible with Python 3 (print with parentheses) - refactored benchmark code: data loading is now in a single file 2019-05-28 22:17:22 +08:00			`* This source code is licensed under the MIT license found in the`
Initial commit 2017-02-23 06:26:44 +08:00			`* LICENSE file in the root directory of this source tree.`
			`*/`


			`#pragma once`

Facebook sync (2019-09-10) (#943) * Facebook sync (2019-09-10) * Fix depends Makefile target. * Add faiss symlink for new include directives. * Fix missing header. * Fix tests. * Fix Makefile. * Update depend. * Fix include directives spacing. 2019-09-21 00:59:10 +08:00			`#include <faiss/Index.h>`
			`#include <faiss/gpu/utils/MemorySpace.h>`
Initial commit 2017-02-23 06:26:44 +08:00
			`namespace faiss { namespace gpu {`

			`class GpuResources;`

Synchronization with FB version 2017-06-21 * moved most FAISS_ASSERT calls to C++ exceptions, and adjusted memory allocation to avoid mem leaks * added an IndexIVFScalarQuantizer type that offers an intermediate compression between IVFFlat and IVFPQ * support removal of indices in IndexIDMap / IndexFlat combination * various fixes in GPU code 2017-06-21 21:54:28 +08:00			`struct GpuIndexConfig {`
			`inline GpuIndexConfig()`
			`: device(0),`
			`memorySpace(MemorySpace::Device) {`
			`}`

			`/// GPU device on which the index is resident`
			`int device;`

sync with FB version 2017-11-22 various bugfixes from github issues kmean with some frozen centroids GPU better tiling for large flat datasets default AVX for vector ops 2017-11-22 21:11:28 +08:00			`/// What memory space to use for primary storage.`
Synchronization with FB version 2017-06-21 * moved most FAISS_ASSERT calls to C++ exceptions, and adjusted memory allocation to avoid mem leaks * added an IndexIVFScalarQuantizer type that offers an intermediate compression between IVFFlat and IVFPQ * support removal of indices in IndexIDMap / IndexFlat combination * various fixes in GPU code 2017-06-21 21:54:28 +08:00			`/// On Pascal and above (CC 6+) architectures, allows GPUs to use`
			`/// more memory than is available on the GPU.`
			`MemorySpace memorySpace;`
			`};`

Initial commit 2017-02-23 06:26:44 +08:00			`class GpuIndex : public faiss::Index {`
			`public:`
			`GpuIndex(GpuResources* resources,`
			`int dims,`
Synchronization with FB version 2017-06-21 * moved most FAISS_ASSERT calls to C++ exceptions, and adjusted memory allocation to avoid mem leaks * added an IndexIVFScalarQuantizer type that offers an intermediate compression between IVFFlat and IVFPQ * support removal of indices in IndexIDMap / IndexFlat combination * various fixes in GPU code 2017-06-21 21:54:28 +08:00			`faiss::MetricType metric,`
			`GpuIndexConfig config);`
Initial commit 2017-02-23 06:26:44 +08:00
Facebook sync (Mar 2019) (#756) Facebook sync (Mar 2019) - MatrixStats object - option to round coordinates during k-means optimization - alternative option for search in HNSW - moved stats and imbalance_factor of IndexIVF to InvertedLists object - range search for IVFScalarQuantizer - direct unit8 codec in ScalarQuantizer - renamed IndexProxy to IndexReplicas and moved to main Faiss - better support for PQ code assignment with external index - support for IMI2x16 (4B virtual centroids!) - support for k = 2048 search on GPU (instead of 1024) - most CUDA mem alloc failures throw exceptions instead of terminating on an assertion - support for renaming an ondisk invertedlists - interrupt computations with ctrl-C in python 2019-03-29 23:32:28 +08:00			`inline int getDevice() const {`
Initial commit 2017-02-23 06:26:44 +08:00			`return device_;`
			`}`

Facebook sync (Mar 2019) (#756) Facebook sync (Mar 2019) - MatrixStats object - option to round coordinates during k-means optimization - alternative option for search in HNSW - moved stats and imbalance_factor of IndexIVF to InvertedLists object - range search for IVFScalarQuantizer - direct unit8 codec in ScalarQuantizer - renamed IndexProxy to IndexReplicas and moved to main Faiss - better support for PQ code assignment with external index - support for IMI2x16 (4B virtual centroids!) - support for k = 2048 search on GPU (instead of 1024) - most CUDA mem alloc failures throw exceptions instead of terminating on an assertion - support for renaming an ondisk invertedlists - interrupt computations with ctrl-C in python 2019-03-29 23:32:28 +08:00			`inline GpuResources* getResources() {`
Initial commit 2017-02-23 06:26:44 +08:00			`return resources_;`
			`}`

Facebook sync (Mar 2019) (#756) Facebook sync (Mar 2019) - MatrixStats object - option to round coordinates during k-means optimization - alternative option for search in HNSW - moved stats and imbalance_factor of IndexIVF to InvertedLists object - range search for IVFScalarQuantizer - direct unit8 codec in ScalarQuantizer - renamed IndexProxy to IndexReplicas and moved to main Faiss - better support for PQ code assignment with external index - support for IMI2x16 (4B virtual centroids!) - support for k = 2048 search on GPU (instead of 1024) - most CUDA mem alloc failures throw exceptions instead of terminating on an assertion - support for renaming an ondisk invertedlists - interrupt computations with ctrl-C in python 2019-03-29 23:32:28 +08:00			`/// Set the minimum data size for searches (in MiB) for which we use`
			`/// CPU -> GPU paging`
			`void setMinPagingSize(size_t size);`

			`/// Returns the current minimum data size for paged searches`
			`size_t getMinPagingSize() const;`

Better support for low-mem GPUs avoid reading beyond the end of an array in fvec_L2sqr and related functions 2017-04-06 19:33:41 +08:00			/// `x` can be resident on the CPU or any GPU; copies are performed
			`/// as needed`
			`/// Handles paged adds if the add set is too large; calls addInternal_`
Synchronization with FB version 2017-06-21 * moved most FAISS_ASSERT calls to C++ exceptions, and adjusted memory allocation to avoid mem leaks * added an IndexIVFScalarQuantizer type that offers an intermediate compression between IVFFlat and IVFPQ * support removal of indices in IndexIDMap / IndexFlat combination * various fixes in GPU code 2017-06-21 21:54:28 +08:00			`void add(faiss::Index::idx_t, const float* x) override;`
Better support for low-mem GPUs avoid reading beyond the end of an array in fvec_L2sqr and related functions 2017-04-06 19:33:41 +08:00
			/// `x` and `ids` can be resident on the CPU or any GPU; copies are
			`/// performed as needed`
			`/// Handles paged adds if the add set is too large; calls addInternal_`
Facebook sync (Mar 2019) (#756) Facebook sync (Mar 2019) - MatrixStats object - option to round coordinates during k-means optimization - alternative option for search in HNSW - moved stats and imbalance_factor of IndexIVF to InvertedLists object - range search for IVFScalarQuantizer - direct unit8 codec in ScalarQuantizer - renamed IndexProxy to IndexReplicas and moved to main Faiss - better support for PQ code assignment with external index - support for IMI2x16 (4B virtual centroids!) - support for k = 2048 search on GPU (instead of 1024) - most CUDA mem alloc failures throw exceptions instead of terminating on an assertion - support for renaming an ondisk invertedlists - interrupt computations with ctrl-C in python 2019-03-29 23:32:28 +08:00			`void add_with_ids(Index::idx_t n,`
			`const float* x,`
			`const Index::idx_t* ids) override;`
Better support for low-mem GPUs avoid reading beyond the end of an array in fvec_L2sqr and related functions 2017-04-06 19:33:41 +08:00
			/// `x`, `distances` and `labels` can be resident on the CPU or any
			`/// GPU; copies are performed as needed`
Facebook sync (Mar 2019) (#756) Facebook sync (Mar 2019) - MatrixStats object - option to round coordinates during k-means optimization - alternative option for search in HNSW - moved stats and imbalance_factor of IndexIVF to InvertedLists object - range search for IVFScalarQuantizer - direct unit8 codec in ScalarQuantizer - renamed IndexProxy to IndexReplicas and moved to main Faiss - better support for PQ code assignment with external index - support for IMI2x16 (4B virtual centroids!) - support for k = 2048 search on GPU (instead of 1024) - most CUDA mem alloc failures throw exceptions instead of terminating on an assertion - support for renaming an ondisk invertedlists - interrupt computations with ctrl-C in python 2019-03-29 23:32:28 +08:00			`void search(Index::idx_t n,`
			`const float* x,`
			`Index::idx_t k,`
			`float* distances,`
			`Index::idx_t* labels) const override;`
Better support for low-mem GPUs avoid reading beyond the end of an array in fvec_L2sqr and related functions 2017-04-06 19:33:41 +08:00
Facebook sync (2019-09-10) (#943) * Facebook sync (2019-09-10) * Fix depends Makefile target. * Add faiss symlink for new include directives. * Fix missing header. * Fix tests. * Fix Makefile. * Update depend. * Fix include directives spacing. 2019-09-21 00:59:10 +08:00			`/// Overridden to force GPU indices to provide their own GPU-friendly`
			`/// implementation`
			`void compute_residual(const float* x,`
			`float* residual,`
			`Index::idx_t key) const override;`

			`/// Overridden to force GPU indices to provide their own GPU-friendly`
			`/// implementation`
			`void compute_residual_n(Index::idx_t n,`
			`const float* xs,`
			`float* residuals,`
			`const Index::idx_t* keys) const override;`

Better support for low-mem GPUs avoid reading beyond the end of an array in fvec_L2sqr and related functions 2017-04-06 19:33:41 +08:00			`protected:`
Facebook sync (Mar 2019) (#756) Facebook sync (Mar 2019) - MatrixStats object - option to round coordinates during k-means optimization - alternative option for search in HNSW - moved stats and imbalance_factor of IndexIVF to InvertedLists object - range search for IVFScalarQuantizer - direct unit8 codec in ScalarQuantizer - renamed IndexProxy to IndexReplicas and moved to main Faiss - better support for PQ code assignment with external index - support for IMI2x16 (4B virtual centroids!) - support for k = 2048 search on GPU (instead of 1024) - most CUDA mem alloc failures throw exceptions instead of terminating on an assertion - support for renaming an ondisk invertedlists - interrupt computations with ctrl-C in python 2019-03-29 23:32:28 +08:00			`/// Does addImpl_ require IDs? If so, and no IDs are provided, we will`
			`/// generate them sequentially based on the order in which the IDs are added`
			`virtual bool addImplRequiresIDs_() const = 0;`
Better support for low-mem GPUs avoid reading beyond the end of an array in fvec_L2sqr and related functions 2017-04-06 19:33:41 +08:00
			`/// Overridden to actually perform the add`
Facebook sync (Mar 2019) (#756) Facebook sync (Mar 2019) - MatrixStats object - option to round coordinates during k-means optimization - alternative option for search in HNSW - moved stats and imbalance_factor of IndexIVF to InvertedLists object - range search for IVFScalarQuantizer - direct unit8 codec in ScalarQuantizer - renamed IndexProxy to IndexReplicas and moved to main Faiss - better support for PQ code assignment with external index - support for IMI2x16 (4B virtual centroids!) - support for k = 2048 search on GPU (instead of 1024) - most CUDA mem alloc failures throw exceptions instead of terminating on an assertion - support for renaming an ondisk invertedlists - interrupt computations with ctrl-C in python 2019-03-29 23:32:28 +08:00			`/// All data is guaranteed to be resident on our device`
			`virtual void addImpl_(int n,`
Better support for low-mem GPUs avoid reading beyond the end of an array in fvec_L2sqr and related functions 2017-04-06 19:33:41 +08:00			`const float* x,`
			`const Index::idx_t* ids) = 0;`

			`/// Overridden to actually perform the search`
Facebook sync (Mar 2019) (#756) Facebook sync (Mar 2019) - MatrixStats object - option to round coordinates during k-means optimization - alternative option for search in HNSW - moved stats and imbalance_factor of IndexIVF to InvertedLists object - range search for IVFScalarQuantizer - direct unit8 codec in ScalarQuantizer - renamed IndexProxy to IndexReplicas and moved to main Faiss - better support for PQ code assignment with external index - support for IMI2x16 (4B virtual centroids!) - support for k = 2048 search on GPU (instead of 1024) - most CUDA mem alloc failures throw exceptions instead of terminating on an assertion - support for renaming an ondisk invertedlists - interrupt computations with ctrl-C in python 2019-03-29 23:32:28 +08:00			`/// All data is guaranteed to be resident on our device`
			`virtual void searchImpl_(int n,`
Better support for low-mem GPUs avoid reading beyond the end of an array in fvec_L2sqr and related functions 2017-04-06 19:33:41 +08:00			`const float* x,`
Facebook sync (Mar 2019) (#756) Facebook sync (Mar 2019) - MatrixStats object - option to round coordinates during k-means optimization - alternative option for search in HNSW - moved stats and imbalance_factor of IndexIVF to InvertedLists object - range search for IVFScalarQuantizer - direct unit8 codec in ScalarQuantizer - renamed IndexProxy to IndexReplicas and moved to main Faiss - better support for PQ code assignment with external index - support for IMI2x16 (4B virtual centroids!) - support for k = 2048 search on GPU (instead of 1024) - most CUDA mem alloc failures throw exceptions instead of terminating on an assertion - support for renaming an ondisk invertedlists - interrupt computations with ctrl-C in python 2019-03-29 23:32:28 +08:00			`int k,`
Better support for low-mem GPUs avoid reading beyond the end of an array in fvec_L2sqr and related functions 2017-04-06 19:33:41 +08:00			`float* distances,`
Facebook sync (Mar 2019) (#756) Facebook sync (Mar 2019) - MatrixStats object - option to round coordinates during k-means optimization - alternative option for search in HNSW - moved stats and imbalance_factor of IndexIVF to InvertedLists object - range search for IVFScalarQuantizer - direct unit8 codec in ScalarQuantizer - renamed IndexProxy to IndexReplicas and moved to main Faiss - better support for PQ code assignment with external index - support for IMI2x16 (4B virtual centroids!) - support for k = 2048 search on GPU (instead of 1024) - most CUDA mem alloc failures throw exceptions instead of terminating on an assertion - support for renaming an ondisk invertedlists - interrupt computations with ctrl-C in python 2019-03-29 23:32:28 +08:00			`Index::idx_t* labels) const = 0;`

			`private:`
			`/// Handles paged adds if the add set is too large, passes to`
			`/// addImpl_ to actually perform the add for the current page`
			`void addPaged_(int n,`
			`const float* x,`
			`const Index::idx_t* ids);`

			`/// Calls addImpl_ for a single page of GPU-resident data`
			`void addPage_(int n,`
			`const float* x,`
			`const Index::idx_t* ids);`

			`/// Calls searchImpl_ for a single page of GPU-resident data`
			`void searchNonPaged_(int n,`
			`const float* x,`
			`int k,`
			`float* outDistancesData,`
			`Index::idx_t* outIndicesData) const;`

			`/// Calls searchImpl_ for a single page of GPU-resident data,`
			`/// handling paging of the data and copies from the CPU`
			`void searchFromCpuPaged_(int n,`
			`const float* x,`
			`int k,`
			`float* outDistancesData,`
			`Index::idx_t* outIndicesData) const;`
Initial commit 2017-02-23 06:26:44 +08:00
			`protected:`
Facebook sync (Mar 2019) (#756) Facebook sync (Mar 2019) - MatrixStats object - option to round coordinates during k-means optimization - alternative option for search in HNSW - moved stats and imbalance_factor of IndexIVF to InvertedLists object - range search for IVFScalarQuantizer - direct unit8 codec in ScalarQuantizer - renamed IndexProxy to IndexReplicas and moved to main Faiss - better support for PQ code assignment with external index - support for IMI2x16 (4B virtual centroids!) - support for k = 2048 search on GPU (instead of 1024) - most CUDA mem alloc failures throw exceptions instead of terminating on an assertion - support for renaming an ondisk invertedlists - interrupt computations with ctrl-C in python 2019-03-29 23:32:28 +08:00			`/// Manages streams, cuBLAS handles and scratch memory for devices`
Initial commit 2017-02-23 06:26:44 +08:00			`GpuResources* resources_;`

			`/// The GPU device we are resident on`
Synchronization with FB version 2017-06-21 * moved most FAISS_ASSERT calls to C++ exceptions, and adjusted memory allocation to avoid mem leaks * added an IndexIVFScalarQuantizer type that offers an intermediate compression between IVFFlat and IVFPQ * support removal of indices in IndexIDMap / IndexFlat combination * various fixes in GPU code 2017-06-21 21:54:28 +08:00			`const int device_;`

			`/// The memory space of our primary storage on the GPU`
			`const MemorySpace memorySpace_;`
Facebook sync (Mar 2019) (#756) Facebook sync (Mar 2019) - MatrixStats object - option to round coordinates during k-means optimization - alternative option for search in HNSW - moved stats and imbalance_factor of IndexIVF to InvertedLists object - range search for IVFScalarQuantizer - direct unit8 codec in ScalarQuantizer - renamed IndexProxy to IndexReplicas and moved to main Faiss - better support for PQ code assignment with external index - support for IMI2x16 (4B virtual centroids!) - support for k = 2048 search on GPU (instead of 1024) - most CUDA mem alloc failures throw exceptions instead of terminating on an assertion - support for renaming an ondisk invertedlists - interrupt computations with ctrl-C in python 2019-03-29 23:32:28 +08:00
			`/// Size above which we page copies from the CPU to GPU`
			`size_t minPagedSize_;`
Initial commit 2017-02-23 06:26:44 +08:00			`};`

			`} } // namespace`