docs/html/GpuIndexIVFPQ_8cu_source.html

 /**

  * Copyright (c) 2015-present, Facebook, Inc.

  * All rights reserved.

  *

  * This source code is licensed under the CC-by-NC license found in the

  * LICENSE file in the root directory of this source tree.

  */


 // Copyright 2004-present Facebook. All Rights Reserved.


 #include "GpuIndexIVFPQ.h"

 #include "../ProductQuantizer.h"

 #include "GpuIndexFlat.h"

 #include "GpuResources.h"

 #include "impl/IVFPQ.cuh"

 #include "utils/CopyUtils.cuh"

 #include "utils/DeviceUtils.h"

 #include "../IndexFlat.h"

 #include "../IndexIVFPQ.h"


 #include <limits>


 namespace faiss { namespace gpu {


 GpuIndexIVFPQ::GpuIndexIVFPQ(GpuResources* resources,

                              int device,

                              IndicesOptions indicesOptions,

                              bool useFloat16LookupTables,

                              const faiss::IndexIVFPQ* index) :

     GpuIndexIVF(resources,

                 device,

                 indicesOptions,

                 false, // FIXME: float 16 coarse quantizer

                 index->d,

                 index->metric_type,

                 index->nlist),

     useFloat16LookupTables_(useFloat16LookupTables),

     subQuantizers_(0),

     bitsPerCode_(0),

     usePrecomputed_(false),

     reserveMemoryVecs_(0),

     index_(nullptr) {

 #ifndef FAISS_USE_FLOAT16

   FAISS_ASSERT(!useFloat16LookupTables_);

 #endif


   copyFrom(index);

 }


 GpuIndexIVFPQ::GpuIndexIVFPQ(GpuResources* resources,

                              int device,

                              int dims,

                              int nlist,

                              int subQuantizers,

                              int bitsPerCode,

                              bool usePrecomputed,

                              IndicesOptions indicesOptions,

                              bool useFloat16LookupTables,

                              faiss::MetricType metric) :

     GpuIndexIVF(resources,

                 device,

                 indicesOptions,

                 false, // FIXME: float 16 coarse quantizer

                 dims,

                 metric,

                 nlist),

     useFloat16LookupTables_(useFloat16LookupTables),

     subQuantizers_(subQuantizers),

     bitsPerCode_(bitsPerCode),

     usePrecomputed_(usePrecomputed),

     reserveMemoryVecs_(0),

     index_(nullptr) {

 #ifndef FAISS_USE_FLOAT16

   FAISS_ASSERT(!useFloat16LookupTables_);

 #endif


   assertSettings_();


   // FIXME make IP work fully

   FAISS_ASSERT(this->metric_type == faiss::METRIC_L2);


   // We haven't trained ourselves, so don't construct the PQ index yet

   this->is_trained = false;

 }


 GpuIndexIVFPQ::~GpuIndexIVFPQ() {

   delete index_;

 }


 void

 GpuIndexIVFPQ::copyFrom(const faiss::IndexIVFPQ* index) {

   DeviceScope scope(device_);


   // FIXME: support this

   FAISS_ASSERT(index->metric_type == faiss::METRIC_L2);

   GpuIndexIVF::copyFrom(index);


   // Clear out our old data

   delete index_;

   index_ = nullptr;


   subQuantizers_ = index->pq.M;

   bitsPerCode_ = index->pq.nbits;


   // We only support this

   FAISS_ASSERT(index->pq.byte_per_idx == 1);

   FAISS_ASSERT(index->by_residual);

   FAISS_ASSERT(index->polysemous_ht == 0);

   usePrecomputed_ = index->use_precomputed_table;


   assertSettings_();


   // The other index might not be trained

   if (!index->is_trained) {

     return;

   }


   // Otherwise, we can populate ourselves from the other index

   this->is_trained = true;


   // Copy our lists as well

   // The product quantizer must have data in it

   FAISS_ASSERT(index->pq.centroids.size() > 0);

   index_ = new IVFPQ(resources_,

                      quantizer_->getGpuData(),

                      subQuantizers_,

                      bitsPerCode_,

                      (float*) index->pq.centroids.data(),

                      indicesOptions_,

                      useFloat16LookupTables_);

   // Doesn't make sense to reserve memory here

   index_->setPrecomputedCodes(usePrecomputed_);


   // Copy database vectors, if any

   for (size_t i = 0; i < index->codes.size(); ++i) {

     auto& codes = index->codes[i];

     auto& indices = index->ids[i];


     FAISS_ASSERT(indices.size() * subQuantizers_ == codes.size());

     index_->addCodeVectorsFromCpu(i,

                                   codes.data(),

                                   indices.data(),

                                   indices.size());

   }

 }


 void

 GpuIndexIVFPQ::copyTo(faiss::IndexIVFPQ* index) const {

   DeviceScope scope(device_);


   // We must have the indices in order to copy to ourselves

   FAISS_ASSERT(indicesOptions_ != INDICES_IVF);


   GpuIndexIVF::copyTo(index);


   //

   // IndexIVFPQ information

   //

   index->by_residual = true;

   index->use_precomputed_table = 0;

   index->code_size = subQuantizers_;

   index->pq = faiss::ProductQuantizer(this->d, subQuantizers_, bitsPerCode_);


   index->do_polysemous_training = false;

   index->polysemous_training = nullptr;


   index->scan_table_threshold = 0;

   index->max_codes = 0;

   index->polysemous_ht = 0;

   index->codes.clear();

   index->codes.resize(nlist_);

   index->precomputed_table.clear();


   if (index_) {

     // Copy the inverted lists

     for (int i = 0; i < nlist_; ++i) {

       index->ids[i] = getListIndices(i);

       index->codes[i] = getListCodes(i);

     }


     // Copy PQ centroids

     auto devPQCentroids = index_->getPQCentroids();

     index->pq.centroids.resize(devPQCentroids.numElements());


     fromDevice<float, 3>(devPQCentroids,

                          index->pq.centroids.data(),

                          resources_->getDefaultStream(device_));


     if (usePrecomputed_) {

       index->precompute_table();

     }

   }

 }


 void

 GpuIndexIVFPQ::reserveMemory(size_t numVecs) {

   reserveMemoryVecs_ = numVecs;

   if (index_) {

     DeviceScope scope(device_);

     index_->reserveMemory(numVecs);

   }

 }


 void

 GpuIndexIVFPQ::setPrecomputedCodes(bool enable) {

   usePrecomputed_ = enable;

   if (index_) {

     DeviceScope scope(device_);

     index_->setPrecomputedCodes(enable);

   }


   assertSettings_();

 }


 bool

 GpuIndexIVFPQ::getPrecomputedCodes() const {

   return usePrecomputed_;

 }


 bool

 GpuIndexIVFPQ::getFloat16LookupTables() const {

   return useFloat16LookupTables_;

 }


 int

 GpuIndexIVFPQ::getNumSubQuantizers() const {

   return subQuantizers_;

 }


 int

 GpuIndexIVFPQ::getBitsPerCode() const {

   return bitsPerCode_;

 }


 int

 GpuIndexIVFPQ::getCentroidsPerSubQuantizer() const {

   return utils::pow2(bitsPerCode_);

 }


 size_t

 GpuIndexIVFPQ::reclaimMemory() {

   if (index_) {

     DeviceScope scope(device_);

     return index_->reclaimMemory();

   }


   return 0;

 }


 void

 GpuIndexIVFPQ::reset() {

   if (index_) {

     DeviceScope scope(device_);


     index_->reset();

     this->ntotal = 0;

   } else {

     FAISS_ASSERT(this->ntotal == 0);

   }

 }


 void

 GpuIndexIVFPQ::trainResidualQuantizer_(Index::idx_t n, const float* x) {

   // Code largely copied from faiss::IndexIVFPQ

   // FIXME: GPUize more of this

   n = std::min(n, (Index::idx_t) (1 << bitsPerCode_) * 64);


   if (this->verbose) {

     printf("computing residuals\n");

   }


   std::vector<Index::idx_t> assign(n);

   quantizer_->assign (n, x, assign.data());


   std::vector<float> residuals(n * d);


   for (idx_t i = 0; i < n; i++) {

     quantizer_->compute_residual(x + i * d, &residuals[i * d], assign[i]);

   }


   if (this->verbose) {

     printf("training %d x %d product quantizer on %ld vectors in %dD\n",

            subQuantizers_, getCentroidsPerSubQuantizer(), n, this->d);

   }


   // Just use the CPU product quantizer to determine sub-centroids

   faiss::ProductQuantizer pq(this->d, subQuantizers_, bitsPerCode_);

   pq.verbose = this->verbose;

   pq.train(n, residuals.data());


   index_ = new IVFPQ(resources_,

                      quantizer_->getGpuData(),

                      subQuantizers_,

                      bitsPerCode_,

                      pq.centroids.data(),

                      indicesOptions_,

                      useFloat16LookupTables_);

   if (reserveMemoryVecs_) {

     index_->reserveMemory(reserveMemoryVecs_);

   }


   index_->setPrecomputedCodes(usePrecomputed_);

 }


 void

 GpuIndexIVFPQ::train(Index::idx_t n, const float* x) {

   DeviceScope scope(device_);


   if (this->is_trained) {

     FAISS_ASSERT(quantizer_->is_trained);

     FAISS_ASSERT(quantizer_->ntotal == nlist_);

     FAISS_ASSERT(index_);

     return;

   }


   FAISS_ASSERT(!index_);


   trainQuantizer_(n, x);

   trainResidualQuantizer_(n, x);


   this->is_trained = true;

 }


 void

 GpuIndexIVFPQ::add_with_ids(Index::idx_t n,

                             const float* x,

                             const Index::idx_t* xids) {

   FAISS_ASSERT(this->is_trained);

   FAISS_ASSERT(index_);


   if (n == 0) {

     return;

   }


   DeviceScope scope(device_);

   auto stream = resources_->getDefaultStreamCurrentDevice();


   auto deviceVecs =

     toDevice<float, 2>(resources_,

                        device_,

                        const_cast<float*>(x),

                        stream,

                        {(int) n, index_->getDim()});


   auto deviceIndices =

     toDevice<Index::idx_t, 1>(resources_,

                               device_,

                               const_cast<Index::idx_t*>(xids),

                               stream,

                               {(int) n});


   // Not all vectors may be able to be added (some may contain NaNs

   // etc)

   ntotal += index_->classifyAndAddVectors(deviceVecs, deviceIndices);

 }


 void

 GpuIndexIVFPQ::search(faiss::Index::idx_t n,

                       const float* x,

                       faiss::Index::idx_t k,

                       float* distances,

                       faiss::Index::idx_t* labels) const {

   FAISS_ASSERT(this->is_trained);

   FAISS_ASSERT(index_);


   if (n == 0) {

     return;

   }


   DeviceScope scope(device_);


   // Make sure arguments are on the device we desire; use temporary

   // memory allocations to move it if necessary

   auto devX =

     toDevice<float, 2>(resources_,

                        device_,

                        const_cast<float*>(x),

                        resources_->getDefaultStream(device_),

                        {(int) n, index_->getDim()});

   auto devDistances =

     toDevice<float, 2>(resources_,

                        device_,

                        distances,

                        resources_->getDefaultStream(device_),

                        {(int) n, (int) k});

   auto devLabels =

     toDevice<faiss::Index::idx_t, 2>(resources_,

                                      device_,

                                      labels,

                                      resources_->getDefaultStream(device_),

                                      {(int) n, (int) k});


   index_->query(devX,

                 nprobe_,

                 (int) k,

                 devDistances,

                 devLabels);


   // Copy back if necessary

   fromDevice<float, 2>(

     devDistances, distances, resources_->getDefaultStream(device_));

   fromDevice<faiss::Index::idx_t, 2>(

     devLabels, labels, resources_->getDefaultStream(device_));

 }


 void

 GpuIndexIVFPQ::set_typename() {

   // FIXME: implement

   FAISS_ASSERT(false);

 }


 int

 GpuIndexIVFPQ::getListLength(int listId) const {

   FAISS_ASSERT(index_);

   return index_->getListLength(listId);

 }


 std::vector<unsigned char>

 GpuIndexIVFPQ::getListCodes(int listId) const {

   FAISS_ASSERT(index_);

   DeviceScope scope(device_);


   return index_->getListCodes(listId);

 }


 std::vector<long>

 GpuIndexIVFPQ::getListIndices(int listId) const {

   FAISS_ASSERT(index_);

   DeviceScope scope(device_);


   return index_->getListIndices(listId);

 }


 void

 GpuIndexIVFPQ::assertSettings_() const {

   // Our implementation has these restrictions:


   // Must have some number of lists

   FAISS_ASSERT(nlist_ > 0);


   // up to a single byte per code

   FAISS_ASSERT(bitsPerCode_ <= 8);


   // Sub-quantizers must evenly divide dimensions available

   FAISS_ASSERT(this->d % subQuantizers_ == 0);


   // The number of bytes per encoded vector must be one we support

   FAISS_ASSERT(IVFPQ::isSupportedPQCodeLength(subQuantizers_));


   // We must have enough shared memory on the current device to store

   // our lookup distances

   int lookupTableSize = sizeof(float);

 #ifdef FAISS_USE_FLOAT16

   if (useFloat16LookupTables_) {

     lookupTableSize = sizeof(half);

   }

 #endif


   // 64 bytes per code is only supported with usage of float16, at 2^8

   // codes per subquantizer

   FAISS_ASSERT(lookupTableSize * subQuantizers_ * utils::pow2(bitsPerCode_)

                <= getMaxSharedMemPerBlock(device_));


   // If precomputed codes are disabled, we have an extra limitation in

   // terms of the number of dimensions per subquantizer

   FAISS_ASSERT(usePrecomputed_ ||

                IVFPQ::isSupportedNoPrecomputedSubDimSize(

                  this->d / subQuantizers_));


   // TODO: fully implement METRIC_INNER_PRODUCT

   FAISS_ASSERT(this->metric_type == faiss::METRIC_L2);

 }


 } } // namespace

faiss::gpu::GpuIndexIVFPQ::getListIndices
std::vector< long > getListIndices(int listId) const
Definition: GpuIndexIVFPQ.cu:428

faiss::IndexIVFPQ::precompute_table
void precompute_table()
build precomputed table
Definition: IndexIVFPQ.cpp:376

faiss::gpu::GpuIndexIVFPQ::reset
void reset() override
Definition: GpuIndexIVFPQ.cu:252

faiss::ProductQuantizer::nbits
size_t nbits
number of bits per quantization index
Definition: ProductQuantizer.h:30

faiss::IndexIVFPQ::polysemous_training
PolysemousTraining * polysemous_training
if NULL, use default
Definition: IndexIVFPQ.h:37

faiss::ProductQuantizer::byte_per_idx
size_t byte_per_idx
nb bytes per code component (1 or 2)
Definition: ProductQuantizer.h:34

faiss::gpu::IVFBase::getDim
int getDim() const
Return the number of dimensions we are indexing.
Definition: IVFBase.cu:99

faiss::gpu::IVFBase::getListLength
int getListLength(int listId) const
Definition: IVFBase.cu:199

faiss::gpu::GpuIndexFlat::getGpuData
FlatIndex * getGpuData()
For internal access.
Definition: GpuIndexFlat.h:100

faiss::Index::assign
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:24

faiss::gpu::IVFBase::reserveMemory
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
Definition: IVFBase.cu:44

faiss::gpu::GpuIndexIVFPQ::getListLength
int getListLength(int listId) const
Definition: GpuIndexIVFPQ.cu:414

faiss::IndexIVFPQ::do_polysemous_training
bool do_polysemous_training
reorder PQ centroids after training?
Definition: IndexIVFPQ.h:36

faiss::IndexIVFPQ::scan_table_threshold
size_t scan_table_threshold
use table computation or on-the-fly?
Definition: IndexIVFPQ.h:40

faiss::IndexIVFPQ::precomputed_table
std::vector< float > precomputed_table
Definition: IndexIVFPQ.h:48

faiss::IndexIVFPQ::polysemous_ht
int polysemous_ht
Hamming thresh for polysemous filtering.
Definition: IndexIVFPQ.h:42

faiss::gpu::GpuResources
Definition: GpuResources.h:24

faiss::gpu::GpuIndexIVFPQ::getBitsPerCode
int getBitsPerCode() const
Return the number of bits per PQ code.
Definition: GpuIndexIVFPQ.cu:232

faiss::gpu::GpuIndex::device_
int device_
The GPU device we are resident on.
Definition: GpuIndex.h:43

faiss::IndexIVF::ids
std::vector< std::vector< long > > ids
Inverted lists for indexes.
Definition: IndexIVF.h:56

faiss::gpu::GpuIndexIVF
Definition: GpuIndexIVF.h:25

faiss::Index::d
int d
vector dimension
Definition: Index.h:66

faiss::gpu::GpuIndexIVFPQ::train
void train(Index::idx_t n, const float *x) override
Definition: GpuIndexIVFPQ.cu:307

faiss::IndexIVFPQ::max_codes
size_t max_codes
max nb of codes to visit to do a query
Definition: IndexIVFPQ.h:41

faiss::gpu::IVFPQ::isSupportedPQCodeLength
static bool isSupportedPQCodeLength(int size)
Returns true if we support PQ in this size.
Definition: IVFPQ.cu:71

faiss::gpu::GpuIndexIVFPQ::reclaimMemory
size_t reclaimMemory()
Definition: GpuIndexIVFPQ.cu:242

faiss::IndexIVFPQ
Definition: IndexIVFPQ.h:30

faiss::gpu::GpuIndexIVF::nprobe_
int nprobe_
Number of inverted list probes per query.
Definition: GpuIndexIVF.h:91

faiss::gpu::GpuIndexIVFPQ::reserveMemory
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
Definition: GpuIndexIVFPQ.cu:197

faiss::gpu::GpuIndexIVF::indicesOptions_
const IndicesOptions indicesOptions_
How should indices be stored on the GPU?
Definition: GpuIndexIVF.h:81

faiss::gpu::IVFPQ::classifyAndAddVectors
int classifyAndAddVectors(Tensor< float, 2, true > &vecs, Tensor< long, 1, true > &indices)
Definition: IVFPQ.cu:119

faiss::gpu::IVFPQ::query
void query(Tensor< float, 2, true > &queries, int nprobe, int k, Tensor< float, 2, true > &outDistances, Tensor< long, 2, true > &outIndices)
Definition: IVFPQ.cu:517

faiss::gpu::GpuIndexIVFPQ::copyFrom
void copyFrom(const faiss::IndexIVFPQ *index)
Definition: GpuIndexIVFPQ.cu:92

faiss::gpu::IVFPQ::getPQCentroids
Tensor< float, 3, true > getPQCentroids()
Definition: IVFPQ.cu:592

faiss::gpu::GpuIndex::resources_
GpuResources * resources_
Manages streans, cuBLAS handles and scratch memory for devices.
Definition: GpuIndex.h:40

faiss::gpu::GpuIndexIVF::copyTo
void copyTo(faiss::IndexIVF *index) const
Copy what we have to the CPU equivalent.
Definition: GpuIndexIVF.cu:181

faiss::Index::idx_t
long idx_t
all indices are this type
Definition: Index.h:64

faiss::gpu::GpuIndexIVF::nlist_
int nlist_
Number of inverted lists that we manage.
Definition: GpuIndexIVF.h:88

faiss::Index::ntotal
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67

faiss::Index::verbose
bool verbose
verbosity level
Definition: Index.h:68

faiss::gpu::IVFPQ::setPrecomputedCodes
void setPrecomputedCodes(bool enable)
Enable or disable pre-computed codes.
Definition: IVFPQ.cu:101

faiss::gpu::IVFPQ::getListCodes
std::vector< unsigned char > getListCodes(int listId) const
Return the list codes of a particular list back to the CPU.
Definition: IVFPQ.cu:584

faiss::gpu::GpuIndexIVFPQ::copyTo
void copyTo(faiss::IndexIVFPQ *index) const
Definition: GpuIndexIVFPQ.cu:149

faiss::gpu::IVFBase::reset
void reset()
Definition: IVFBase.cu:74

faiss::gpu::GpuIndexIVFPQ::getFloat16LookupTables
bool getFloat16LookupTables() const
Are float16 residual distance lookup tables enabled?
Definition: GpuIndexIVFPQ.cu:222

faiss::gpu::DeviceScope
Definition: DeviceUtils.h:55

faiss::IndexIVFPQ::by_residual
bool by_residual
Encode residual or plain vector?
Definition: IndexIVFPQ.h:31

faiss::gpu::GpuIndexIVF::quantizer_
GpuIndexFlat * quantizer_
Quantizer for inverted lists.
Definition: GpuIndexIVF.h:97

faiss::gpu::GpuIndexIVFPQ::add_with_ids
void add_with_ids(Index::idx_t n, const float *x, const Index::idx_t *xids) override
Definition: GpuIndexIVFPQ.cu:326

faiss::Index::metric_type
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:74

faiss::IndexIVFPQ::pq
ProductQuantizer pq
produces the codes
Definition: IndexIVFPQ.h:34

faiss::gpu::GpuIndexIVFPQ::search
void search(faiss::Index::idx_t n, const float *x, faiss::Index::idx_t k, float *distances, faiss::Index::idx_t *labels) const override
Definition: GpuIndexIVFPQ.cu:359

faiss::ProductQuantizer::M
size_t M
number of subquantizers
Definition: ProductQuantizer.h:29

faiss::gpu::GpuIndexIVFPQ::getNumSubQuantizers
int getNumSubQuantizers() const
Return the number of sub-quantizers we are using.
Definition: GpuIndexIVFPQ.cu:227

faiss::gpu::IVFBase::getListIndices
std::vector< long > getListIndices(int listId) const
Return the list indices of a particular list back to the CPU.
Definition: IVFBase.cu:206

faiss::gpu::GpuIndexIVFPQ::getCentroidsPerSubQuantizer
int getCentroidsPerSubQuantizer() const
Return the number of centroids per PQ code (2^bits per code)
Definition: GpuIndexIVFPQ.cu:237

faiss::IndexIVFPQ::code_size
size_t code_size
code size per vector in bytes
Definition: IndexIVFPQ.h:33

faiss::gpu::GpuIndexIVFPQ::setPrecomputedCodes
void setPrecomputedCodes(bool enable)
Enable or disable pre-computed codes.
Definition: GpuIndexIVFPQ.cu:206

faiss::ProductQuantizer
Definition: ProductQuantizer.h:26

faiss::gpu::GpuIndexIVF::copyFrom
void copyFrom(const faiss::IndexIVF *index)
Copy what we need from the CPU equivalent.
Definition: GpuIndexIVF.cu:117

faiss::Index::is_trained
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:71

faiss::Index::compute_residual
void compute_residual(const float *x, float *residual, idx_t key) const
Definition: Index.cpp:58

faiss::gpu::GpuIndexIVFPQ::getPrecomputedCodes
bool getPrecomputedCodes() const
Are pre-computed codes enabled?
Definition: GpuIndexIVFPQ.cu:217

faiss::gpu::GpuIndexIVFPQ::GpuIndexIVFPQ
GpuIndexIVFPQ(GpuResources *resources, int device, IndicesOptions indicesOptions, bool useFloat16LookupTables, const faiss::IndexIVFPQ *index)
Definition: GpuIndexIVFPQ.cu:26

faiss::gpu::IVFBase::reclaimMemory
size_t reclaimMemory()
Definition: IVFBase.cu:104

faiss::gpu::IVFPQ
Implementing class for IVFPQ on the GPU.
Definition: IVFPQ.cuh:20

faiss::MetricType
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:44

faiss::IndexIVFPQ::use_precomputed_table
int use_precomputed_table
if by_residual, build precompute tables
Definition: IndexIVFPQ.h:32

faiss::gpu::GpuIndexIVFPQ::getListCodes
std::vector< unsigned char > getListCodes(int listId) const
Definition: GpuIndexIVFPQ.cu:420

faiss::ProductQuantizer::centroids
std::vector< float > centroids
Centroid table, size M * ksub * dsub.
Definition: ProductQuantizer.h:53

faiss::gpu::IVFPQ::isSupportedNoPrecomputedSubDimSize
static bool isSupportedNoPrecomputedSubDimSize(int dims)
Definition: IVFPQ.cu:96