12 #include "GpuIndexIVFPQ.h"
13 #include "../ProductQuantizer.h"
14 #include "GpuIndexFlat.h"
15 #include "GpuResources.h"
16 #include "impl/IVFPQ.cuh"
17 #include "utils/CopyUtils.cuh"
18 #include "utils/DeviceUtils.h"
19 #include "../IndexFlat.h"
20 #include "../IndexIVFPQ.h"
24 namespace faiss {
namespace gpu {
28 IndicesOptions indicesOptions,
29 bool useFloat16LookupTables,
38 useFloat16LookupTables_(useFloat16LookupTables),
41 usePrecomputed_(false),
42 reserveMemoryVecs_(0),
44 #ifndef FAISS_USE_FLOAT16
45 FAISS_ASSERT(!useFloat16LookupTables_);
58 IndicesOptions indicesOptions,
59 bool useFloat16LookupTables,
68 useFloat16LookupTables_(useFloat16LookupTables),
69 subQuantizers_(subQuantizers),
70 bitsPerCode_(bitsPerCode),
71 usePrecomputed_(usePrecomputed),
72 reserveMemoryVecs_(0),
74 #ifndef FAISS_USE_FLOAT16
75 FAISS_ASSERT(!useFloat16LookupTables_);
81 FAISS_ASSERT(this->
metric_type == faiss::METRIC_L2);
87 GpuIndexIVFPQ::~GpuIndexIVFPQ() {
96 FAISS_ASSERT(index->
metric_type == faiss::METRIC_L2);
103 subQuantizers_ = index->
pq.
M;
104 bitsPerCode_ = index->
pq.
nbits;
131 useFloat16LookupTables_);
136 for (
size_t i = 0; i < index->codes.size(); ++i) {
137 auto& codes = index->codes[i];
138 auto& indices = index->
ids[i];
140 FAISS_ASSERT(indices.size() * subQuantizers_ == codes.size());
141 index_->addCodeVectorsFromCpu(i,
171 index->codes.clear();
172 index->codes.resize(
nlist_);
177 for (
int i = 0; i <
nlist_; ++i) {
184 index->
pq.
centroids.resize(devPQCentroids.numElements());
186 fromDevice<float, 3>(devPQCentroids,
190 if (usePrecomputed_) {
198 reserveMemoryVecs_ = numVecs;
207 usePrecomputed_ = enable;
218 return usePrecomputed_;
223 return useFloat16LookupTables_;
228 return subQuantizers_;
238 return utils::pow2(bitsPerCode_);
259 FAISS_ASSERT(this->
ntotal == 0);
264 GpuIndexIVFPQ::trainResidualQuantizer_(
Index::idx_t n,
const float* x) {
267 n = std::min(n, (
Index::idx_t) (1 << bitsPerCode_) * 64);
270 printf(
"computing residuals\n");
273 std::vector<Index::idx_t>
assign(n);
276 std::vector<float> residuals(n *
d);
278 for (
idx_t i = 0; i < n; i++) {
283 printf(
"training %d x %d product quantizer on %ld vectors in %dD\n",
290 pq.train(n, residuals.data());
298 useFloat16LookupTables_);
299 if (reserveMemoryVecs_) {
313 FAISS_ASSERT(index_);
317 FAISS_ASSERT(!index_);
319 trainQuantizer_(n, x);
320 trainResidualQuantizer_(n, x);
330 FAISS_ASSERT(index_);
337 auto stream =
resources_->getDefaultStreamCurrentDevice();
342 const_cast<float*
>(x),
344 {(int) n, index_->
getDim()});
365 FAISS_ASSERT(index_);
378 const_cast<float*
>(x),
379 resources_->getDefaultStream(device_),
380 {(int) n, index_->
getDim()});
385 resources_->getDefaultStream(device_),
391 resources_->getDefaultStream(device_),
401 fromDevice<float, 2>(
402 devDistances, distances, resources_->getDefaultStream(device_));
403 fromDevice<faiss::Index::idx_t, 2>(
404 devLabels, labels, resources_->getDefaultStream(device_));
408 GpuIndexIVFPQ::set_typename() {
415 FAISS_ASSERT(index_);
419 std::vector<unsigned char>
421 FAISS_ASSERT(index_);
429 FAISS_ASSERT(index_);
436 GpuIndexIVFPQ::assertSettings_()
const {
443 FAISS_ASSERT(bitsPerCode_ <= 8);
446 FAISS_ASSERT(this->
d % subQuantizers_ == 0);
453 int lookupTableSize =
sizeof(float);
454 #ifdef FAISS_USE_FLOAT16
455 if (useFloat16LookupTables_) {
456 lookupTableSize =
sizeof(half);
462 FAISS_ASSERT(lookupTableSize * subQuantizers_ * utils::pow2(bitsPerCode_)
463 <= getMaxSharedMemPerBlock(device_));
467 FAISS_ASSERT(usePrecomputed_ ||
469 this->
d / subQuantizers_));
472 FAISS_ASSERT(this->
metric_type == faiss::METRIC_L2);
std::vector< long > getListIndices(int listId) const
void precompute_table()
build precomputed table
size_t nbits
number of bits per quantization index
PolysemousTraining * polysemous_training
if NULL, use default
size_t byte_per_idx
nb bytes per code component (1 or 2)
int getDim() const
Return the number of dimensions we are indexing.
int getListLength(int listId) const
FlatIndex * getGpuData()
For internal access.
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
int getListLength(int listId) const
bool do_polysemous_training
reorder PQ centroids after training?
size_t scan_table_threshold
use table computation or on-the-fly?
std::vector< float > precomputed_table
int polysemous_ht
Hamming thresh for polysemous filtering.
int getBitsPerCode() const
Return the number of bits per PQ code.
int device_
The GPU device we are resident on.
std::vector< std::vector< long > > ids
Inverted lists for indexes.
void train(Index::idx_t n, const float *x) override
size_t max_codes
max nb of codes to visit to do a query
static bool isSupportedPQCodeLength(int size)
Returns true if we support PQ in this size.
int nprobe_
Number of inverted list probes per query.
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
const IndicesOptions indicesOptions_
How should indices be stored on the GPU?
int classifyAndAddVectors(Tensor< float, 2, true > &vecs, Tensor< long, 1, true > &indices)
void query(Tensor< float, 2, true > &queries, int nprobe, int k, Tensor< float, 2, true > &outDistances, Tensor< long, 2, true > &outIndices)
void copyFrom(const faiss::IndexIVFPQ *index)
Tensor< float, 3, true > getPQCentroids()
GpuResources * resources_
Manages streans, cuBLAS handles and scratch memory for devices.
void copyTo(faiss::IndexIVF *index) const
Copy what we have to the CPU equivalent.
long idx_t
all indices are this type
int nlist_
Number of inverted lists that we manage.
idx_t ntotal
total nb of indexed vectors
bool verbose
verbosity level
void setPrecomputedCodes(bool enable)
Enable or disable pre-computed codes.
std::vector< unsigned char > getListCodes(int listId) const
Return the list codes of a particular list back to the CPU.
void copyTo(faiss::IndexIVFPQ *index) const
bool getFloat16LookupTables() const
Are float16 residual distance lookup tables enabled?
bool by_residual
Encode residual or plain vector?
GpuIndexFlat * quantizer_
Quantizer for inverted lists.
void add_with_ids(Index::idx_t n, const float *x, const Index::idx_t *xids) override
MetricType metric_type
type of metric this index uses for search
ProductQuantizer pq
produces the codes
void search(faiss::Index::idx_t n, const float *x, faiss::Index::idx_t k, float *distances, faiss::Index::idx_t *labels) const override
size_t M
number of subquantizers
int getNumSubQuantizers() const
Return the number of sub-quantizers we are using.
std::vector< long > getListIndices(int listId) const
Return the list indices of a particular list back to the CPU.
int getCentroidsPerSubQuantizer() const
Return the number of centroids per PQ code (2^bits per code)
size_t code_size
code size per vector in bytes
void setPrecomputedCodes(bool enable)
Enable or disable pre-computed codes.
void copyFrom(const faiss::IndexIVF *index)
Copy what we need from the CPU equivalent.
bool is_trained
set if the Index does not require training, or if training is done already
void compute_residual(const float *x, float *residual, idx_t key) const
bool getPrecomputedCodes() const
Are pre-computed codes enabled?
GpuIndexIVFPQ(GpuResources *resources, int device, IndicesOptions indicesOptions, bool useFloat16LookupTables, const faiss::IndexIVFPQ *index)
Implementing class for IVFPQ on the GPU.
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
int use_precomputed_table
if by_residual, build precompute tables
std::vector< unsigned char > getListCodes(int listId) const
std::vector< float > centroids
Centroid table, size M * ksub * dsub.
static bool isSupportedNoPrecomputedSubDimSize(int dims)