10 #include "GpuIndexIVFPQ.h"
11 #include "../IndexFlat.h"
12 #include "../IndexIVFPQ.h"
13 #include "../ProductQuantizer.h"
14 #include "GpuIndexFlat.h"
15 #include "GpuResources.h"
16 #include "impl/IVFPQ.cuh"
17 #include "utils/CopyUtils.cuh"
18 #include "utils/DeviceUtils.h"
22 namespace faiss {
namespace gpu {
35 reserveMemoryVecs_(0),
37 #ifndef FAISS_USE_FLOAT16
57 subQuantizers_(subQuantizers),
58 bitsPerCode_(bitsPerCode),
59 reserveMemoryVecs_(0),
61 #ifndef FAISS_USE_FLOAT16
68 FAISS_ASSERT(this->
metric_type == faiss::METRIC_L2);
74 GpuIndexIVFPQ::~GpuIndexIVFPQ() {
83 FAISS_THROW_IF_NOT_MSG(index->
metric_type == faiss::METRIC_L2,
84 "inner product unsupported");
91 subQuantizers_ = index->
pq.
M;
125 size_t nlist = ivf ? ivf->
nlist : 0;
126 for (
size_t i = 0; i < nlist; ++i) {
130 FAISS_THROW_IF_NOT_FMT(list_size <=
131 (
size_t) std::numeric_limits<int>::max(),
132 "GPU inverted list can only support "
133 "%zu entries; %zu found",
134 (
size_t) std::numeric_limits<int>::max(),
137 index_->addCodeVectorsFromCpu(
147 FAISS_THROW_IF_NOT_MSG(ivfpqConfig_.
indicesOptions != INDICES_IVF,
148 "Cannot copy to CPU as GPU index doesn't retain "
149 "indices (INDICES_IVF)");
176 for (
int i = 0; i <
nlist_; ++i) {
179 index->
invlists->add_entries (i, ids.size(), ids.data(), codes.data());
184 index->
pq.
centroids.resize(devPQCentroids.numElements());
186 fromDevice<float, 3>(devPQCentroids,
198 reserveMemoryVecs_ = numVecs;
223 return subQuantizers_;
233 return utils::pow2(bitsPerCode_);
254 FAISS_ASSERT(this->
ntotal == 0);
259 GpuIndexIVFPQ::trainResidualQuantizer_(
Index::idx_t n,
const float* x) {
262 n = std::min(n, (
Index::idx_t) (1 << bitsPerCode_) * 64);
265 printf(
"computing residuals\n");
268 std::vector<Index::idx_t>
assign(n);
271 std::vector<float> residuals(n *
d);
273 for (idx_t i = 0; i < n; i++) {
278 printf(
"training %d x %d product quantizer on %ld vectors in %dD\n",
285 pq.train(n, residuals.data());
295 if (reserveMemoryVecs_) {
309 FAISS_ASSERT(index_);
313 FAISS_ASSERT(!index_);
315 trainQuantizer_(n, x);
316 trainResidualQuantizer_(n, x);
326 FAISS_ASSERT(index_);
334 const_cast<float*
>(x),
336 {(int) n, index_->
getDim()});
358 FAISS_ASSERT(index_);
366 const_cast<float*
>(x),
367 resources_->getDefaultStream(device_),
368 {(int) n, index_->
getDim()});
373 resources_->getDefaultStream(device_),
379 resources_->getDefaultStream(device_),
389 fromDevice<float, 2>(
390 devDistances, distances, resources_->getDefaultStream(device_));
391 fromDevice<faiss::Index::idx_t, 2>(
392 devLabels, labels, resources_->getDefaultStream(device_));
397 FAISS_ASSERT(index_);
401 std::vector<unsigned char>
403 FAISS_ASSERT(index_);
411 FAISS_ASSERT(index_);
418 GpuIndexIVFPQ::verifySettings_()
const {
422 FAISS_THROW_IF_NOT_MSG(
nlist_ > 0,
"nlist must be >0");
425 FAISS_THROW_IF_NOT_FMT(bitsPerCode_ <= 8,
426 "Bits per code must be <= 8 (passed %d)", bitsPerCode_);
429 FAISS_THROW_IF_NOT_FMT(this->
d % subQuantizers_ == 0,
430 "Number of sub-quantizers (%d) must be an "
431 "even divisor of the number of dimensions (%d)",
432 subQuantizers_, this->
d);
436 "Number of bytes per encoded vector / sub-quantizers (%d) "
442 int lookupTableSize =
sizeof(float);
443 #ifdef FAISS_USE_FLOAT16
445 lookupTableSize =
sizeof(half);
451 size_t requiredSmemSize =
452 lookupTableSize * subQuantizers_ * utils::pow2(bitsPerCode_);
453 size_t smemPerBlock = getMaxSharedMemPerBlock(device_);
455 FAISS_THROW_IF_NOT_FMT(requiredSmemSize
456 <= getMaxSharedMemPerBlock(device_),
457 "Device %d has %zu bytes of shared memory, while "
458 "%d bits per code and %d sub-quantizers requires %zu "
459 "bytes. Consider useFloat16LookupTables and/or "
461 device_, smemPerBlock, bitsPerCode_, subQuantizers_,
468 this->d / subQuantizers_),
469 "Number of dimensions per sub-quantizer (%d) "
470 "is not currently supported without precomputed codes. "
471 "Only 1, 2, 3, 4, 6, 8, 10, 12, 16, 20, 24, 28, 32 dims "
472 "per sub-quantizer are currently supported with no "
473 "precomputed codes. "
474 "Precomputed codes supports any number of dimensions, but "
475 "will involve memory overheads.",
476 this->d / subQuantizers_);
479 FAISS_THROW_IF_NOT_MSG(this->
metric_type == faiss::METRIC_L2,
480 "METRIC_INNER_PRODUCT is currently unsupported");
std::vector< long > getListIndices(int listId) const
void searchImpl_(faiss::Index::idx_t n, const float *x, faiss::Index::idx_t k, float *distances, faiss::Index::idx_t *labels) const override
Called from GpuIndex for search.
void precompute_table()
build precomputed table
size_t nbits
number of bits per quantization index
cudaStream_t getDefaultStreamCurrentDevice()
Calls getDefaultStream with the current device.
bool usePrecomputedTables
simple (default) implementation as an array of inverted lists
PolysemousTraining * polysemous_training
if NULL, use default
size_t byte_per_idx
nb bytes per code component (1 or 2)
virtual const idx_t * get_ids(size_t list_no) const =0
GpuIndexIVFPQ(GpuResources *resources, const faiss::IndexIVFPQ *index, GpuIndexIVFPQConfig config=GpuIndexIVFPQConfig())
int getDim() const
Return the number of dimensions we are indexing.
int getListLength(int listId) const
FlatIndex * getGpuData()
For internal access.
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
virtual size_t list_size(size_t list_no) const =0
get the size of a list
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
int getListLength(int listId) const
bool do_polysemous_training
reorder PQ centroids after training?
size_t scan_table_threshold
use table computation or on-the-fly?
std::vector< float > precomputed_table
int polysemous_ht
Hamming thresh for polysemous filtering.
int getBitsPerCode() const
Return the number of bits per PQ code.
bool useFloat16LookupTables
virtual cudaStream_t getDefaultStream(int device)=0
void train(Index::idx_t n, const float *x) override
static bool isSupportedPQCodeLength(int size)
Returns true if we support PQ in this size.
int nprobe_
Number of inverted list probes per query.
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
int classifyAndAddVectors(Tensor< float, 2, true > &vecs, Tensor< long, 1, true > &indices)
void query(Tensor< float, 2, true > &queries, int nprobe, int k, Tensor< float, 2, true > &outDistances, Tensor< long, 2, true > &outIndices)
const int device_
The GPU device we are resident on.
void copyFrom(const faiss::IndexIVFPQ *index)
Tensor< float, 3, true > getPQCentroids()
GpuResources * resources_
Manages streans, cuBLAS handles and scratch memory for devices.
void copyTo(faiss::IndexIVF *index) const
Copy what we have to the CPU equivalent.
long idx_t
all indices are this type
void replace_invlists(InvertedLists *il, bool own=false)
replace the inverted lists, old one is deallocated if own_invlists
int nlist_
Number of inverted lists that we manage.
void addImpl_(faiss::Index::idx_t n, const float *x, const faiss::Index::idx_t *ids) override
Called from GpuIndex for add/add_with_ids.
idx_t ntotal
total nb of indexed vectors
bool verbose
verbosity level
void setPrecomputedCodes(bool enable)
Enable or disable pre-computed codes.
std::vector< unsigned char > getListCodes(int listId) const
Return the list codes of a particular list back to the CPU.
void copyTo(faiss::IndexIVFPQ *index) const
size_t nlist
number of possible key values
const MemorySpace memorySpace_
The memory space of our primary storage on the GPU.
bool by_residual
Encode residual or plain vector?
GpuIndexFlat * quantizer_
Quantizer for inverted lists.
MetricType metric_type
type of metric this index uses for search
ProductQuantizer pq
produces the codes
InvertedLists * invlists
Acess to the actual data.
size_t M
number of subquantizers
int getNumSubQuantizers() const
Return the number of sub-quantizers we are using.
std::vector< long > getListIndices(int listId) const
Return the list indices of a particular list back to the CPU.
int getCentroidsPerSubQuantizer() const
Return the number of centroids per PQ code (2^bits per code)
void setPrecomputedCodes(bool enable)
Enable or disable pre-computed codes.
virtual const uint8_t * get_codes(size_t list_no) const =0
void copyFrom(const faiss::IndexIVF *index)
Copy what we need from the CPU equivalent.
bool is_trained
set if the Index does not require training, or if training is done already
void compute_residual(const float *x, float *residual, idx_t key) const
size_t max_codes
max nb of codes to visit to do a query
bool getPrecomputedCodes() const
Are pre-computed codes enabled?
IndicesOptions indicesOptions
Index storage options for the GPU.
Implementing class for IVFPQ on the GPU.
size_t code_size
code size per vector in bytes
MetricType
Some algorithms support both an inner product version and a L2 search version.
int use_precomputed_table
if by_residual, build precompute tables
std::vector< unsigned char > getListCodes(int listId) const
std::vector< float > centroids
Centroid table, size M * ksub * dsub.
static bool isSupportedNoPrecomputedSubDimSize(int dims)