mirror of
https://github.com/facebookresearch/faiss.git
synced 2025-06-03 21:54:02 +08:00
Summary: This is a design proposal that demonstrates an approach to enabling optional support for [RAFT](https://github.com/rapidsai/raft) versions of IVF PQ and IVF Flat (and brute force w/ fused k-selection when k <= 64). There are still a few open issues and design discussions needed for the new RAFT index types to support the full range of features of that FAISS' current gpu index types. Checklist for the integration todos: - [x] Rebase on current `main` branch - [X] The raft handle has been plugged directly into the StandardGpuResources - [X] `FlatIndex` passing Googletests - [x] Use `CodePacker` to support `copyFrom()` and `copyTo()` - [X] `IVF-flat passing Googletests - [ ] Raise appropriate exceptions for operations which are not yet supported by RAFT Additional features we've discussed: - [x] Separate IVF lists into individual memory chunks - [ ] Saving/loading To build FAISS w/ optional RAFT support: ``` mkdir build cd build cmake ../ -DFAISS_ENABLE_RAFT=ON -DFAISS_ENABLE_GPU=ON make -j ``` For development/testing, we've also supplied a bash script to make things easier: `build.sh` Below is a benchmark comparing the training of IVF Flat indices for RAFT and FAISS:  The benchmark was produced using Googlebench in [this](https://github.com/tfeher/raft/tree/raft_faiss_bench) RAFT fork. We're going to provide benchmarks for the queries as well. There are still a couple bottlenecks to be removed in the IVF-Flat training implementation and we'll update the current benchmark when ready. Pull Request resolved: https://github.com/facebookresearch/faiss/pull/2521 Test Plan: `buck test mode/debuck test mode/dev-nosan //faiss/gpu/test:test_gpu_index_ivfflat` Reviewed By: algoriddle Differential Revision: D49118319 Pulled By: mdouze fbshipit-source-id: 5916108bc27154acf7c92021ba579a6ca85d730b
272 lines
9.6 KiB
Plaintext
272 lines
9.6 KiB
Plaintext
/**
|
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
*
|
|
* This source code is licensed under the MIT license found in the
|
|
* LICENSE file in the root directory of this source tree.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <faiss/Index.h>
|
|
#include <faiss/MetricType.h>
|
|
#include <faiss/gpu/GpuIndicesOptions.h>
|
|
#include <faiss/gpu/utils/DeviceTensor.cuh>
|
|
#include <faiss/gpu/utils/DeviceVector.cuh>
|
|
#include <memory>
|
|
#include <vector>
|
|
|
|
namespace faiss {
|
|
struct InvertedLists;
|
|
}
|
|
|
|
namespace faiss {
|
|
namespace gpu {
|
|
|
|
class GpuResources;
|
|
class FlatIndex;
|
|
|
|
/// Base inverted list functionality for IVFFlat and IVFPQ
|
|
class IVFBase {
|
|
public:
|
|
IVFBase(GpuResources* resources,
|
|
int dim,
|
|
idx_t nlist,
|
|
faiss::MetricType metric,
|
|
float metricArg,
|
|
bool interleavedLayout,
|
|
bool useResidual,
|
|
IndicesOptions indicesOptions,
|
|
MemorySpace space);
|
|
|
|
virtual ~IVFBase();
|
|
|
|
/// Reserve GPU memory in our inverted lists for this number of vectors
|
|
void reserveMemory(idx_t numVecs);
|
|
|
|
/// Clear out all inverted lists, but retain the coarse quantizer
|
|
/// and the product quantizer info
|
|
virtual void reset();
|
|
|
|
/// Return the number of dimensions we are indexing
|
|
idx_t getDim() const;
|
|
|
|
/// After adding vectors, one can call this to reclaim device memory
|
|
/// to exactly the amount needed. Returns space reclaimed in bytes
|
|
size_t reclaimMemory();
|
|
|
|
/// Returns the number of inverted lists
|
|
idx_t getNumLists() const;
|
|
|
|
/// For debugging purposes, return the list length of a particular
|
|
/// list
|
|
virtual idx_t getListLength(idx_t listId) const;
|
|
|
|
/// Return the list indices of a particular list back to the CPU
|
|
virtual std::vector<idx_t> getListIndices(idx_t listId) const;
|
|
|
|
/// Return the encoded vectors of a particular list back to the CPU
|
|
virtual std::vector<uint8_t> getListVectorData(idx_t listId, bool gpuFormat)
|
|
const;
|
|
|
|
/// Copy all inverted lists from a CPU representation to ourselves
|
|
virtual void copyInvertedListsFrom(const InvertedLists* ivf);
|
|
|
|
/// Copy all inverted lists from ourselves to a CPU representation
|
|
virtual void copyInvertedListsTo(InvertedLists* ivf);
|
|
|
|
/// Update our coarse quantizer with this quantizer instance; may be a CPU
|
|
/// or GPU quantizer
|
|
virtual void updateQuantizer(Index* quantizer);
|
|
|
|
/// Classify and encode/add vectors to our IVF lists.
|
|
/// The input data must be on our current device.
|
|
/// Returns the number of vectors successfully added. Vectors may
|
|
/// not be able to be added because they contain NaNs.
|
|
virtual idx_t addVectors(
|
|
Index* coarseQuantizer,
|
|
Tensor<float, 2, true>& vecs,
|
|
Tensor<idx_t, 1, true>& indices);
|
|
|
|
/// Find the approximate k nearest neigbors for `queries` against
|
|
/// our database
|
|
virtual void search(
|
|
Index* coarseQuantizer,
|
|
Tensor<float, 2, true>& queries,
|
|
int nprobe,
|
|
int k,
|
|
Tensor<float, 2, true>& outDistances,
|
|
Tensor<idx_t, 2, true>& outIndices) = 0;
|
|
|
|
/// Performs search when we are already given the IVF cells to look at
|
|
/// (GpuIndexIVF::search_preassigned implementation)
|
|
virtual void searchPreassigned(
|
|
Index* coarseQuantizer,
|
|
Tensor<float, 2, true>& vecs,
|
|
Tensor<float, 2, true>& ivfDistances,
|
|
Tensor<idx_t, 2, true>& ivfAssignments,
|
|
int k,
|
|
Tensor<float, 2, true>& outDistances,
|
|
Tensor<idx_t, 2, true>& outIndices,
|
|
bool storePairs) = 0;
|
|
|
|
protected:
|
|
/// Adds a set of codes and indices to a list, with the representation
|
|
/// coming from the CPU equivalent
|
|
virtual void addEncodedVectorsToList_(
|
|
idx_t listId,
|
|
// resident on the host
|
|
const void* codes,
|
|
// resident on the host
|
|
const idx_t* indices,
|
|
idx_t numVecs);
|
|
|
|
/// Performs search in a CPU or GPU coarse quantizer for IVF cells,
|
|
/// returning residuals as well if necessary
|
|
void searchCoarseQuantizer_(
|
|
Index* coarseQuantizer,
|
|
int nprobe,
|
|
// guaranteed resident on device
|
|
Tensor<float, 2, true>& vecs,
|
|
// Output: the distances to the closest nprobe IVF cell centroids
|
|
// for the query vectors
|
|
// size (#vecs, nprobe)
|
|
Tensor<float, 2, true>& distances,
|
|
// Output: the closest nprobe IVF cells the query vectors lie in
|
|
// size (#vecs, nprobe)
|
|
Tensor<idx_t, 2, true>& indices,
|
|
// optionally compute the residual relative to the IVF cell centroid
|
|
// if passed
|
|
// size (#vecs, nprobe, dim)
|
|
Tensor<float, 3, true>* residuals,
|
|
// optionally return the IVF cell centroids to which the input
|
|
// vectors were assigned
|
|
// size (#vecs, nprobe, dim)
|
|
Tensor<float, 3, true>* centroids);
|
|
|
|
/// Returns the number of bytes in which an IVF list containing numVecs
|
|
/// vectors is encoded on the device. Note that due to padding this is not
|
|
/// the same as the encoding size for a subset of vectors in an IVF list;
|
|
/// this is the size for an entire IVF list
|
|
virtual size_t getGpuVectorsEncodingSize_(idx_t numVecs) const = 0;
|
|
virtual size_t getCpuVectorsEncodingSize_(idx_t numVecs) const = 0;
|
|
|
|
/// Translate to our preferred GPU encoding
|
|
virtual std::vector<uint8_t> translateCodesToGpu_(
|
|
std::vector<uint8_t> codes,
|
|
idx_t numVecs) const = 0;
|
|
|
|
/// Translate from our preferred GPU encoding
|
|
virtual std::vector<uint8_t> translateCodesFromGpu_(
|
|
std::vector<uint8_t> codes,
|
|
idx_t numVecs) const = 0;
|
|
|
|
/// Append vectors to our on-device lists
|
|
virtual void appendVectors_(
|
|
Tensor<float, 2, true>& vecs,
|
|
Tensor<float, 2, true>& ivfCentroidResiduals,
|
|
Tensor<idx_t, 1, true>& indices,
|
|
Tensor<idx_t, 1, true>& uniqueLists,
|
|
Tensor<idx_t, 1, true>& vectorsByUniqueList,
|
|
Tensor<idx_t, 1, true>& uniqueListVectorStart,
|
|
Tensor<idx_t, 1, true>& uniqueListStartOffset,
|
|
Tensor<idx_t, 1, true>& listIds,
|
|
Tensor<idx_t, 1, true>& listOffset,
|
|
cudaStream_t stream) = 0;
|
|
|
|
/// Reclaim memory consumed on the device for our inverted lists
|
|
/// `exact` means we trim exactly to the memory needed
|
|
size_t reclaimMemory_(bool exact);
|
|
|
|
/// Update all device-side list pointer and size information
|
|
void updateDeviceListInfo_(cudaStream_t stream);
|
|
|
|
/// For a set of list IDs, update device-side list pointer and size
|
|
/// information
|
|
void updateDeviceListInfo_(
|
|
const std::vector<idx_t>& listIds,
|
|
cudaStream_t stream);
|
|
|
|
/// Shared function to copy indices from CPU to GPU
|
|
void addIndicesFromCpu_(idx_t listId, const idx_t* indices, idx_t numVecs);
|
|
|
|
protected:
|
|
/// Collection of GPU resources that we use
|
|
GpuResources* resources_;
|
|
|
|
/// Metric type of the index
|
|
faiss::MetricType metric_;
|
|
|
|
/// Metric arg
|
|
float metricArg_;
|
|
|
|
/// Expected dimensionality of the vectors
|
|
const int dim_;
|
|
|
|
/// Number of inverted lists we maintain
|
|
const idx_t numLists_;
|
|
|
|
/// Do we need to also compute residuals when processing vectors?
|
|
bool useResidual_;
|
|
|
|
/// Coarse quantizer centroids available on GPU
|
|
DeviceTensor<float, 2, true> ivfCentroids_;
|
|
|
|
/// Whether or not our index uses an interleaved by 32 layout:
|
|
/// The default memory layout is [vector][PQ/SQ component]:
|
|
/// (v0 d0) (v0 d1) ... (v0 dD-1) (v1 d0) (v1 d1) ...
|
|
///
|
|
/// The interleaved by 32 memory layout is:
|
|
/// [vector / 32][PQ/SQ component][vector % 32] with padding:
|
|
/// (v0 d0) (v1 d0) ... (v31 d0) (v0 d1) (v1 d1) ... (v31 dD-1) (v32 d0)
|
|
/// (v33 d0) ... so the list length is always a multiple of num quantizers *
|
|
/// 32
|
|
bool interleavedLayout_;
|
|
|
|
/// How are user indices stored on the GPU?
|
|
const IndicesOptions indicesOptions_;
|
|
|
|
/// What memory space our inverted list storage is in
|
|
const MemorySpace space_;
|
|
|
|
/// Device representation of all inverted list data
|
|
/// id -> data
|
|
DeviceVector<void*> deviceListDataPointers_;
|
|
|
|
/// Device representation of all inverted list index pointers
|
|
/// id -> data
|
|
DeviceVector<void*> deviceListIndexPointers_;
|
|
|
|
/// Device representation of all inverted list lengths
|
|
/// id -> length in number of vectors
|
|
DeviceVector<idx_t> deviceListLengths_;
|
|
|
|
/// Maximum list length seen
|
|
idx_t maxListLength_;
|
|
|
|
struct DeviceIVFList {
|
|
DeviceIVFList(GpuResources* res, const AllocInfo& info);
|
|
|
|
/// The on-device memory for this particular IVF list
|
|
DeviceVector<uint8_t> data;
|
|
|
|
/// The number of vectors encoded in this list, which may be unrelated
|
|
/// to the above allocated data size
|
|
idx_t numVecs;
|
|
};
|
|
|
|
/// Device memory for each separate list, as managed by the host.
|
|
/// Device memory as stored in DeviceVector is stored as unique_ptr
|
|
/// since deviceList*Pointers_ must remain valid despite
|
|
/// resizing (and potential re-allocation) of deviceList*_
|
|
std::vector<std::unique_ptr<DeviceIVFList>> deviceListData_;
|
|
std::vector<std::unique_ptr<DeviceIVFList>> deviceListIndices_;
|
|
|
|
/// If we are storing indices on the CPU (indicesOptions_ is
|
|
/// INDICES_CPU), then this maintains a CPU-side map of what
|
|
/// (inverted list id, offset) maps to which user index
|
|
std::vector<std::vector<idx_t>> listOffsetToUserIndex_;
|
|
};
|
|
|
|
} // namespace gpu
|
|
} // namespace faiss
|