faiss/faiss/gpu/impl/IVFBase.cuh
Corey J. Nolet edcf7438bb Integrate IVF-Flat from RAFT (#2521)
Summary:
This is a design proposal that demonstrates an approach to enabling optional support for [RAFT](https://github.com/rapidsai/raft) versions of IVF PQ and IVF Flat (and brute force w/ fused k-selection when k <= 64). There are still a few open issues and design discussions needed for the new RAFT index types to support the full range of features of that FAISS' current gpu index types.

Checklist for the integration todos:
- [x] Rebase on current `main` branch
- [X] The raft handle has been plugged directly into the StandardGpuResources
- [X] `FlatIndex` passing Googletests
- [x] Use `CodePacker` to support `copyFrom()` and `copyTo()`
- [X] `IVF-flat passing Googletests
- [ ] Raise appropriate exceptions for operations which are not yet supported by RAFT

Additional features we've discussed:
- [x] Separate IVF lists into individual memory chunks
- [ ] Saving/loading

To build FAISS w/ optional RAFT support:
```
mkdir build
cd build
cmake ../ -DFAISS_ENABLE_RAFT=ON -DFAISS_ENABLE_GPU=ON
make -j
```

For development/testing, we've also supplied a bash script to make things easier: `build.sh`

Below is a benchmark comparing the training of IVF Flat indices for RAFT and FAISS:
![image](https://user-images.githubusercontent.com/1242464/194944737-8b808f11-e28e-4556-82d1-1ea4b0707283.png)

The benchmark was produced using Googlebench in [this](https://github.com/tfeher/raft/tree/raft_faiss_bench) RAFT fork. We're going to provide benchmarks for the queries as well. There are still a couple bottlenecks to be removed in the IVF-Flat training implementation and we'll update the current benchmark when ready.

Pull Request resolved: https://github.com/facebookresearch/faiss/pull/2521

Test Plan: `buck test mode/debuck test mode/dev-nosan //faiss/gpu/test:test_gpu_index_ivfflat`

Reviewed By: algoriddle

Differential Revision: D49118319

Pulled By: mdouze

fbshipit-source-id: 5916108bc27154acf7c92021ba579a6ca85d730b
2023-10-04 23:42:30 -07:00

272 lines
9.6 KiB
Plaintext

/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <faiss/Index.h>
#include <faiss/MetricType.h>
#include <faiss/gpu/GpuIndicesOptions.h>
#include <faiss/gpu/utils/DeviceTensor.cuh>
#include <faiss/gpu/utils/DeviceVector.cuh>
#include <memory>
#include <vector>
namespace faiss {
struct InvertedLists;
}
namespace faiss {
namespace gpu {
class GpuResources;
class FlatIndex;
/// Base inverted list functionality for IVFFlat and IVFPQ
class IVFBase {
public:
IVFBase(GpuResources* resources,
int dim,
idx_t nlist,
faiss::MetricType metric,
float metricArg,
bool interleavedLayout,
bool useResidual,
IndicesOptions indicesOptions,
MemorySpace space);
virtual ~IVFBase();
/// Reserve GPU memory in our inverted lists for this number of vectors
void reserveMemory(idx_t numVecs);
/// Clear out all inverted lists, but retain the coarse quantizer
/// and the product quantizer info
virtual void reset();
/// Return the number of dimensions we are indexing
idx_t getDim() const;
/// After adding vectors, one can call this to reclaim device memory
/// to exactly the amount needed. Returns space reclaimed in bytes
size_t reclaimMemory();
/// Returns the number of inverted lists
idx_t getNumLists() const;
/// For debugging purposes, return the list length of a particular
/// list
virtual idx_t getListLength(idx_t listId) const;
/// Return the list indices of a particular list back to the CPU
virtual std::vector<idx_t> getListIndices(idx_t listId) const;
/// Return the encoded vectors of a particular list back to the CPU
virtual std::vector<uint8_t> getListVectorData(idx_t listId, bool gpuFormat)
const;
/// Copy all inverted lists from a CPU representation to ourselves
virtual void copyInvertedListsFrom(const InvertedLists* ivf);
/// Copy all inverted lists from ourselves to a CPU representation
virtual void copyInvertedListsTo(InvertedLists* ivf);
/// Update our coarse quantizer with this quantizer instance; may be a CPU
/// or GPU quantizer
virtual void updateQuantizer(Index* quantizer);
/// Classify and encode/add vectors to our IVF lists.
/// The input data must be on our current device.
/// Returns the number of vectors successfully added. Vectors may
/// not be able to be added because they contain NaNs.
virtual idx_t addVectors(
Index* coarseQuantizer,
Tensor<float, 2, true>& vecs,
Tensor<idx_t, 1, true>& indices);
/// Find the approximate k nearest neigbors for `queries` against
/// our database
virtual void search(
Index* coarseQuantizer,
Tensor<float, 2, true>& queries,
int nprobe,
int k,
Tensor<float, 2, true>& outDistances,
Tensor<idx_t, 2, true>& outIndices) = 0;
/// Performs search when we are already given the IVF cells to look at
/// (GpuIndexIVF::search_preassigned implementation)
virtual void searchPreassigned(
Index* coarseQuantizer,
Tensor<float, 2, true>& vecs,
Tensor<float, 2, true>& ivfDistances,
Tensor<idx_t, 2, true>& ivfAssignments,
int k,
Tensor<float, 2, true>& outDistances,
Tensor<idx_t, 2, true>& outIndices,
bool storePairs) = 0;
protected:
/// Adds a set of codes and indices to a list, with the representation
/// coming from the CPU equivalent
virtual void addEncodedVectorsToList_(
idx_t listId,
// resident on the host
const void* codes,
// resident on the host
const idx_t* indices,
idx_t numVecs);
/// Performs search in a CPU or GPU coarse quantizer for IVF cells,
/// returning residuals as well if necessary
void searchCoarseQuantizer_(
Index* coarseQuantizer,
int nprobe,
// guaranteed resident on device
Tensor<float, 2, true>& vecs,
// Output: the distances to the closest nprobe IVF cell centroids
// for the query vectors
// size (#vecs, nprobe)
Tensor<float, 2, true>& distances,
// Output: the closest nprobe IVF cells the query vectors lie in
// size (#vecs, nprobe)
Tensor<idx_t, 2, true>& indices,
// optionally compute the residual relative to the IVF cell centroid
// if passed
// size (#vecs, nprobe, dim)
Tensor<float, 3, true>* residuals,
// optionally return the IVF cell centroids to which the input
// vectors were assigned
// size (#vecs, nprobe, dim)
Tensor<float, 3, true>* centroids);
/// Returns the number of bytes in which an IVF list containing numVecs
/// vectors is encoded on the device. Note that due to padding this is not
/// the same as the encoding size for a subset of vectors in an IVF list;
/// this is the size for an entire IVF list
virtual size_t getGpuVectorsEncodingSize_(idx_t numVecs) const = 0;
virtual size_t getCpuVectorsEncodingSize_(idx_t numVecs) const = 0;
/// Translate to our preferred GPU encoding
virtual std::vector<uint8_t> translateCodesToGpu_(
std::vector<uint8_t> codes,
idx_t numVecs) const = 0;
/// Translate from our preferred GPU encoding
virtual std::vector<uint8_t> translateCodesFromGpu_(
std::vector<uint8_t> codes,
idx_t numVecs) const = 0;
/// Append vectors to our on-device lists
virtual void appendVectors_(
Tensor<float, 2, true>& vecs,
Tensor<float, 2, true>& ivfCentroidResiduals,
Tensor<idx_t, 1, true>& indices,
Tensor<idx_t, 1, true>& uniqueLists,
Tensor<idx_t, 1, true>& vectorsByUniqueList,
Tensor<idx_t, 1, true>& uniqueListVectorStart,
Tensor<idx_t, 1, true>& uniqueListStartOffset,
Tensor<idx_t, 1, true>& listIds,
Tensor<idx_t, 1, true>& listOffset,
cudaStream_t stream) = 0;
/// Reclaim memory consumed on the device for our inverted lists
/// `exact` means we trim exactly to the memory needed
size_t reclaimMemory_(bool exact);
/// Update all device-side list pointer and size information
void updateDeviceListInfo_(cudaStream_t stream);
/// For a set of list IDs, update device-side list pointer and size
/// information
void updateDeviceListInfo_(
const std::vector<idx_t>& listIds,
cudaStream_t stream);
/// Shared function to copy indices from CPU to GPU
void addIndicesFromCpu_(idx_t listId, const idx_t* indices, idx_t numVecs);
protected:
/// Collection of GPU resources that we use
GpuResources* resources_;
/// Metric type of the index
faiss::MetricType metric_;
/// Metric arg
float metricArg_;
/// Expected dimensionality of the vectors
const int dim_;
/// Number of inverted lists we maintain
const idx_t numLists_;
/// Do we need to also compute residuals when processing vectors?
bool useResidual_;
/// Coarse quantizer centroids available on GPU
DeviceTensor<float, 2, true> ivfCentroids_;
/// Whether or not our index uses an interleaved by 32 layout:
/// The default memory layout is [vector][PQ/SQ component]:
/// (v0 d0) (v0 d1) ... (v0 dD-1) (v1 d0) (v1 d1) ...
///
/// The interleaved by 32 memory layout is:
/// [vector / 32][PQ/SQ component][vector % 32] with padding:
/// (v0 d0) (v1 d0) ... (v31 d0) (v0 d1) (v1 d1) ... (v31 dD-1) (v32 d0)
/// (v33 d0) ... so the list length is always a multiple of num quantizers *
/// 32
bool interleavedLayout_;
/// How are user indices stored on the GPU?
const IndicesOptions indicesOptions_;
/// What memory space our inverted list storage is in
const MemorySpace space_;
/// Device representation of all inverted list data
/// id -> data
DeviceVector<void*> deviceListDataPointers_;
/// Device representation of all inverted list index pointers
/// id -> data
DeviceVector<void*> deviceListIndexPointers_;
/// Device representation of all inverted list lengths
/// id -> length in number of vectors
DeviceVector<idx_t> deviceListLengths_;
/// Maximum list length seen
idx_t maxListLength_;
struct DeviceIVFList {
DeviceIVFList(GpuResources* res, const AllocInfo& info);
/// The on-device memory for this particular IVF list
DeviceVector<uint8_t> data;
/// The number of vectors encoded in this list, which may be unrelated
/// to the above allocated data size
idx_t numVecs;
};
/// Device memory for each separate list, as managed by the host.
/// Device memory as stored in DeviceVector is stored as unique_ptr
/// since deviceList*Pointers_ must remain valid despite
/// resizing (and potential re-allocation) of deviceList*_
std::vector<std::unique_ptr<DeviceIVFList>> deviceListData_;
std::vector<std::unique_ptr<DeviceIVFList>> deviceListIndices_;
/// If we are storing indices on the CPU (indicesOptions_ is
/// INDICES_CPU), then this maintains a CPU-side map of what
/// (inverted list id, offset) maps to which user index
std::vector<std::vector<idx_t>> listOffsetToUserIndex_;
};
} // namespace gpu
} // namespace faiss