docs/html/GpuAutoTune_8cpp_source.html

 /**

  * Copyright (c) 2015-present, Facebook, Inc.

  * All rights reserved.

  *

  * This source code is licensed under the BSD+Patents license found in the

  * LICENSE file in the root directory of this source tree.

  */


 // Copyright 2004-present Facebook. All Rights Reserved.

 #include "GpuAutoTune.h"


 #include "GpuIndex.h"

 #include "../FaissAssert.h"

 #include "../index_io.h"

 #include "../IndexFlat.h"

 #include "../IndexIVF.h"

 #include "../IndexIVFPQ.h"

 #include "../VectorTransform.h"

 #include "../MetaIndexes.h"

 #include "GpuIndexFlat.h"

 #include "GpuIndexIVFFlat.h"

 #include "GpuIndexIVFPQ.h"

 #include "IndexProxy.h"


 namespace faiss { namespace gpu {


 /**********************************************************

  * Cloning from/to GPU

  **********************************************************/


 struct ToCPUCloner: Cloner {


     void merge_index(Index *dst, Index *src, bool successive_ids) {

         if (auto ifl = dynamic_cast<IndexFlat *>(dst)) {

             auto ifl2 = dynamic_cast<const IndexFlat *>(src);

             FAISS_ASSERT(ifl2);

             FAISS_ASSERT(successive_ids);

             ifl->add(ifl2->ntotal, ifl2->xb.data());

         } else if(auto ifl = dynamic_cast<IndexIVFFlat *>(dst)) {

             auto ifl2 = dynamic_cast<IndexIVFFlat *>(src);

             FAISS_ASSERT(ifl2);

             ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);

         } else if(auto ifl = dynamic_cast<IndexIVFPQ *>(dst)) {

             auto ifl2 = dynamic_cast<IndexIVFPQ *>(src);

             FAISS_ASSERT(ifl2);

             ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);

         } else {

             FAISS_ASSERT(!"merging not implemented for this type of class");

         }

     }


     Index *clone_Index(const Index *index) override {

         if(auto ifl = dynamic_cast<const GpuIndexFlat *>(index)) {

             IndexFlat *res = new IndexFlat();

             ifl->copyTo(res);

             return res;

         } else if(auto ifl = dynamic_cast<const GpuIndexIVFFlat *>(index)) {

             IndexIVFFlat *res = new IndexIVFFlat();

             ifl->copyTo(res);

             return res;

         } else if(auto ipq = dynamic_cast<const GpuIndexIVFPQ *>(index)) {

             IndexIVFPQ *res = new IndexIVFPQ();

             ipq->copyTo(res);

             return res;


             // for IndexShards and IndexProxy we assume that the

             // objective is to make a single component out of them

             // (inverse op of ToGpuClonerMultiple)


         } else if(auto ish = dynamic_cast<const IndexShards *>(index)) {

             int nshard = ish->shard_indexes.size();

             FAISS_ASSERT(nshard > 0);

             Index *res = clone_Index(ish->shard_indexes[0]);

             for(int i = 1; i < ish->shard_indexes.size(); i++) {

                 Index *res_i = clone_Index(ish->shard_indexes[i]);

                 merge_index(res, res_i, ish->successive_ids);

                 delete res_i;

             }

             return res;

         } else if(auto ipr = dynamic_cast<const IndexProxy *>(index)) {

             // just clone one of the replicas

             FAISS_ASSERT(ipr->count() > 0);

             return clone_Index(ipr->at(0));

         } else {

             return Cloner::clone_Index(index);

         }

     }

 };


 faiss::Index * index_gpu_to_cpu(const faiss::Index *gpu_index)

 {

     ToCPUCloner cl;

     return cl.clone_Index(gpu_index);

 }


 GpuClonerOptions::GpuClonerOptions():

     indicesOptions(INDICES_64_BIT),

     useFloat16CoarseQuantizer(false),

     useFloat16(false),

     usePrecomputed(true),

     reserveVecs(0),

     storeTransposed(false),

     verbose(0)

 {}


 struct ToGpuCloner: faiss::Cloner, GpuClonerOptions {

     GpuResources *resources;

     int device;


     ToGpuCloner(GpuResources *resources, int device,

                 const GpuClonerOptions &options):

         GpuClonerOptions(options), resources(resources), device(device)

     {}


     Index *clone_Index(const Index *index) override {

         if(auto ifl = dynamic_cast<const IndexFlat *>(index)) {

           GpuIndexFlatConfig config;

           config.device = device;

           config.useFloat16 = useFloat16;

           config.storeTransposed = storeTransposed;


           return new GpuIndexFlat(resources, ifl, config);

         } else if(auto ifl = dynamic_cast<const faiss::IndexIVFFlat *>(index)) {

           GpuIndexIVFFlatConfig config;

           config.device = device;

           config.indicesOptions = indicesOptions;

           config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;

           config.flatConfig.storeTransposed = storeTransposed;

           config.useFloat16IVFStorage = useFloat16;


           GpuIndexIVFFlat *res =

             new GpuIndexIVFFlat(resources,

                                 ifl->d,

                                 ifl->nlist,

                                 ifl->metric_type,

                                 config);

           if(reserveVecs > 0 && ifl->ntotal == 0) {

               res->reserveMemory(reserveVecs);

           }


           res->copyFrom(ifl);

           return res;

         } else if(auto ipq = dynamic_cast<const faiss::IndexIVFPQ *>(index)) {

             if(verbose)

                 printf("  IndexIVFPQ size %ld -> GpuIndexIVFPQ "

                        "indicesOptions=%d "

                        "usePrecomputed=%d useFloat16=%d reserveVecs=%ld\n",

                        ipq->ntotal, indicesOptions, usePrecomputed,

                        useFloat16, reserveVecs);

             GpuIndexIVFPQConfig config;

             config.device = device;

             config.indicesOptions = indicesOptions;

             config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;

             config.flatConfig.storeTransposed = storeTransposed;

             config.useFloat16LookupTables = useFloat16;

             config.usePrecomputedTables = usePrecomputed;


             GpuIndexIVFPQ *res = new GpuIndexIVFPQ(resources, ipq, config);


             if(reserveVecs > 0 && ipq->ntotal == 0) {

                 res->reserveMemory(reserveVecs);

             }


             return res;

         } else {

             return Cloner::clone_Index(index);

         }

     }


 };


 faiss::Index * index_cpu_to_gpu(

        GpuResources* resources, int device,

        const faiss::Index *index,

        const GpuClonerOptions *options)

 {

     GpuClonerOptions defaults;

     ToGpuCloner cl(resources, device, options ? *options : defaults);

     return cl.clone_Index(index);

 }


 GpuMultipleClonerOptions::GpuMultipleClonerOptions(): shard(false)

 {}


 struct ToGpuClonerMultiple: faiss::Cloner, GpuMultipleClonerOptions {

     std::vector<ToGpuCloner> sub_cloners;


     ToGpuClonerMultiple(std::vector<GpuResources *> & resources,

                         std::vector<int>& devices,

                         const GpuMultipleClonerOptions &options):

         GpuMultipleClonerOptions(options)

     {

         FAISS_ASSERT(resources.size() == devices.size());

         for(int i = 0; i < resources.size(); i++) {

             sub_cloners.push_back(ToGpuCloner(

                      resources[i], devices[i], options));

         }

     }


     ToGpuClonerMultiple(const std::vector<ToGpuCloner> & sub_cloners,

                         const GpuMultipleClonerOptions &options):

         GpuMultipleClonerOptions(options),

         sub_cloners(sub_cloners)

     {}


     Index *clone_Index(const Index *index) override {

         long n = sub_cloners.size();


         if (n == 1)

             return sub_cloners[0].clone_Index(index);


         if(dynamic_cast<const IndexFlat *>(index) ||

            dynamic_cast<const faiss::IndexIVFFlat *>(index) ||

            dynamic_cast<const faiss::IndexIVFPQ *>(index)) {

             if(!shard) {

                 IndexProxy * res = new IndexProxy();

                 for(auto & sub_cloner: sub_cloners) {

                     res->addIndex(sub_cloner.clone_Index(index));

                 }

                 res->own_fields = true;

                 return res;

             } else {

                 auto index_ivfpq =

                     dynamic_cast<const faiss::IndexIVFPQ *>(index);

                 auto index_ivfflat =

                     dynamic_cast<const faiss::IndexIVFFlat *>(index);

                 FAISS_ASSERT_MSG (index_ivfpq || index_ivfflat,

                               "IndexShards implemented only for "

                               "IndexIVFFlat or IndexIVFPQ");

                 std::vector<faiss::Index*> shards(n);


                 for(long i = 0; i < n; i++) {

                     // make a shallow copy

                     long i0 = i * index->ntotal / n;

                     long i1 = (i + 1) * index->ntotal / n;

                     if(verbose)

                         printf("IndexShards shard %ld indices %ld:%ld\n",

                                i, i0, i1);


                     if(reserveVecs)

                         sub_cloners[i].reserveVecs =

                             (reserveVecs + n - 1) / n;


                     if (index_ivfpq) {

                         faiss::IndexIVFPQ idx2(

                               index_ivfpq->quantizer, index_ivfpq->d,

                               index_ivfpq->nlist, index_ivfpq->code_size,

                               index_ivfpq->pq.nbits);

                         idx2.metric_type = index_ivfpq->metric_type;

                         idx2.pq = index_ivfpq->pq;

                         idx2.nprobe = index_ivfpq->nprobe;

                         idx2.use_precomputed_table = 0;

                         idx2.is_trained = index->is_trained;

                         index_ivfpq->copy_subset_to(idx2, 0, i0, i1);

                         shards[i] = sub_cloners[i].clone_Index(&idx2);

                     } else if (index_ivfflat) {

                         faiss::IndexIVFFlat idx2(

                               index_ivfflat->quantizer, index->d,

                               index_ivfflat->nlist, index_ivfflat->metric_type);

                         idx2.nprobe = index_ivfflat->nprobe;

                         index_ivfflat->copy_subset_to(idx2, 0, i0, i1);

                         idx2.nprobe = index_ivfflat->nprobe;

                         shards[i] = sub_cloners[i].clone_Index(&idx2);

                     }

                 }

                 faiss::IndexShards *res =

                     new faiss::IndexShards(index->d, true, false);


                 for (int i = 0; i < n; i++) {

                     res->add_shard(shards[i]);

                 }

                 res->own_fields = true;

                 FAISS_ASSERT(index->ntotal == res->ntotal);

                 return res;

             }

         } else if(auto miq = dynamic_cast<const MultiIndexQuantizer *>(index)) {

             if (verbose) {

                 printf("cloning MultiIndexQuantizer: "

                        "will be valid only for search k=1\n");

             }

             const ProductQuantizer & pq = miq->pq;

             IndexSplitVectors *splitv = new IndexSplitVectors(pq.d, true);

             splitv->own_fields = true;


             for (int m = 0; m < pq.M; m++) {

                 // which GPU(s) will be assigned to this sub-quantizer


                 long i0 = m * n / pq.M;

                 long i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1;

                 std::vector<ToGpuCloner> sub_cloners_2;

                 sub_cloners_2.insert(

                       sub_cloners_2.begin(), sub_cloners.begin() + i0,

                       sub_cloners.begin() + i1);

                 ToGpuClonerMultiple cm(sub_cloners_2, *this);

                 IndexFlatL2 idxc (pq.dsub);

                 idxc.add (pq.ksub, pq.centroids.data() + m * pq.d * pq.ksub);

                 Index *idx2 = cm.clone_Index(&idxc);

                 splitv->add_sub_index(idx2);

             }

             return splitv;

         } else {

             return Cloner::clone_Index(index);

         }

     }


 };


 faiss::Index * index_cpu_to_gpu_multiple(

        std::vector<GpuResources*> & resources,

        std::vector<int> &devices,

        const faiss::Index *index,

        const GpuMultipleClonerOptions *options)

 {

     GpuMultipleClonerOptions defaults;

     ToGpuClonerMultiple cl(resources, devices, options ? *options : defaults);

     return cl.clone_Index(index);

 }


 /**********************************************************

  * Parameters to auto-tune on GpuIndex'es

  **********************************************************/


 #define DC(classname) auto ix = dynamic_cast<const classname *>(index)


 void GpuParameterSpace::initialize (const Index * index)

 {

     if (DC (IndexPreTransform)) {

         index = ix->index;

     }

     if (DC (IndexProxy)) {

         if (ix->count() == 0) return;

         index = ix->at(0);

     }

     if (DC (faiss::IndexShards)) {

         if (ix->shard_indexes.size() == 0) return;

         index = ix->shard_indexes[0];

     }

     if (DC (GpuIndexIVF)) {

         ParameterRange & pr = add_range("nprobe");

         for (int i = 0; i < 12; i++) {

             size_t nprobe = 1 << i;

             if (nprobe >= ix->getNumLists() ||

                 nprobe > 1024) break;

             pr.values.push_back (nprobe);

         }

     }

     // not sure we should call the parent initializer

 }


 #undef DC

 // non-const version

 #define DC(classname) auto *ix = dynamic_cast<classname *>(index)


 void GpuParameterSpace::set_index_parameter (

         Index * index, const std::string & name, double val) const

 {

     if (DC (IndexPreTransform)) {

         index = ix->index;

     }

     if (DC (IndexProxy)) {

         for (int i = 0; i < ix->count(); i++)

             set_index_parameter (ix->at(i), name, val);

         return;

     }

     if (DC (faiss::IndexShards)) {

         for (auto sub_index : ix->shard_indexes)

             set_index_parameter (sub_index, name, val);

         return;

     }

     if (name == "nprobe") {

       DC (GpuIndexIVF);

       FAISS_ASSERT(ix);

       ix->setNumProbes (int (val));

       return;

     }

     if (name == "use_precomputed_table") {

       DC (GpuIndexIVFPQ);

       FAISS_ASSERT(ix);

       ix->setPrecomputedCodes(bool (val));

       return;

     }


     FAISS_ASSERT_MSG (false, "unknown parameter");

 }


 } } // namespace

faiss::Cloner
Definition: index_io.h:51

faiss::IndexFlat
Definition: IndexFlat.h:23

faiss::gpu::GpuIndexIVFPQConfig::usePrecomputedTables
bool usePrecomputedTables
Definition: GpuIndexIVFPQ.h:36

faiss::gpu::GpuMultipleClonerOptions::shard
bool shard
Definition: GpuClonerOptions.h:49

faiss::gpu::GpuIndexFlatConfig::storeTransposed
bool storeTransposed
Definition: GpuIndexFlat.h:48

faiss::gpu::GpuClonerOptions::storeTransposed
bool storeTransposed
For GpuIndexFlat, store data in transposed layout?
Definition: GpuClonerOptions.h:38

faiss::IndexIVF::nprobe
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:47

faiss::gpu::GpuParameterSpace::initialize
void initialize(const faiss::Index *index) override
initialize with reasonable parameters for the index
Definition: GpuAutoTune.cpp:339

faiss::gpu::GpuClonerOptions
set some options on how to copy to GPU
Definition: GpuClonerOptions.h:17

faiss::IndexFlatL2
Definition: IndexFlat.h:79

faiss::gpu::GpuIndexIVFFlat::copyFrom
void copyFrom(const faiss::IndexIVFFlat *index)
Definition: GpuIndexIVFFlat.cu:79

faiss::ProductQuantizer::dsub
size_t dsub
dimensionality of each subvector
Definition: ProductQuantizer.h:32

faiss::gpu::GpuClonerOptions::usePrecomputed
bool usePrecomputed
use precomputed tables?
Definition: GpuClonerOptions.h:32

faiss::gpu::GpuIndexIVFFlatConfig::useFloat16IVFStorage
bool useFloat16IVFStorage
Definition: GpuIndexIVFFlat.h:29

faiss::IndexPreTransform
Definition: VectorTransform.h:239

faiss::gpu::GpuResources
Definition: GpuResources.h:23

faiss::IndexIVFFlat
Definition: IndexIVF.h:132

faiss::gpu::GpuMultipleClonerOptions
Definition: GpuClonerOptions.h:44

faiss::gpu::GpuIndexIVFPQConfig::useFloat16LookupTables
bool useFloat16LookupTables
Definition: GpuIndexIVFPQ.h:32

faiss::ParameterSpace::add_range
ParameterRange & add_range(const char *name)
add a new parameter
Definition: AutoTune.cpp:322

faiss::gpu::GpuIndexFlatConfig::useFloat16
bool useFloat16
Whether or not data is stored as float16.
Definition: GpuIndexFlat.h:35

faiss::gpu::GpuIndexConfig::device
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:27

faiss::IndexSplitVectors
Definition: MetaIndexes.h:131

faiss::gpu::GpuIndexIVF
Definition: GpuIndexIVF.h:37

faiss::gpu::IndexProxy
Definition: IndexProxy.h:24

faiss::Index::d
int d
vector dimension
Definition: Index.h:64

faiss::gpu::GpuIndexFlatConfig
Definition: GpuIndexFlat.h:27

faiss::gpu::GpuIndexIVFConfig::flatConfig
GpuIndexFlatConfig flatConfig
Configuration for the coarse quantizer object.
Definition: GpuIndexIVF.h:34

faiss::gpu::GpuParameterSpace::set_index_parameter
void set_index_parameter(faiss::Index *index, const std::string &name, double val) const override
set a combination of parameters on an index
Definition: GpuAutoTune.cpp:372

faiss::gpu::GpuIndexIVFPQConfig
Definition: GpuIndexIVFPQ.h:23

faiss::IndexIVFPQ
Definition: IndexIVFPQ.h:29

faiss::gpu::GpuIndexIVFPQ::reserveMemory
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
Definition: GpuIndexIVFPQ.cu:195

faiss::gpu::GpuIndexIVFFlat::reserveMemory
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
Definition: GpuIndexIVFFlat.cu:71

faiss::gpu::GpuClonerOptions::useFloat16
bool useFloat16
Definition: GpuClonerOptions.h:29

faiss::gpu::GpuClonerOptions::indicesOptions
IndicesOptions indicesOptions
Definition: GpuClonerOptions.h:22

faiss::ProductQuantizer::ksub
size_t ksub
number of centroids for each subquantizer
Definition: ProductQuantizer.h:35

faiss::gpu::GpuClonerOptions::useFloat16CoarseQuantizer
bool useFloat16CoarseQuantizer
is the coarse quantizer in float16?
Definition: GpuClonerOptions.h:25

faiss::Index::ntotal
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:65

faiss::IndexFlat::add
void add(idx_t n, const float *x) override
Definition: IndexFlat.cpp:30

faiss::gpu::IndexProxy::addIndex
void addIndex(faiss::Index *index)
Definition: IndexProxy.cpp:32

faiss::gpu::ToGpuClonerMultiple
Definition: GpuAutoTune.cpp:191

faiss::gpu::GpuIndexIVFPQ
IVFPQ index for the GPU.
Definition: GpuIndexIVFPQ.h:40

faiss::Index::metric_type
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:72

faiss::IndexIVFPQ::pq
ProductQuantizer pq
produces the codes
Definition: IndexIVFPQ.h:33

faiss::ProductQuantizer::M
size_t M
number of subquantizers
Definition: ProductQuantizer.h:28

faiss::Index
Definition: Index.h:60

faiss::gpu::GpuIndexFlat
Definition: GpuIndexFlat.h:54

faiss::IndexShards
Definition: MetaIndexes.h:83

faiss::gpu::ToGpuCloner
Definition: GpuAutoTune.cpp:111

faiss::ProductQuantizer
Definition: ProductQuantizer.h:25

faiss::Index::is_trained
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:69

faiss::gpu::GpuIndexIVFFlatConfig
Definition: GpuIndexIVFFlat.h:22

faiss::ParameterRange
possible values of a parameter, sorted from least to most expensive/accurate
Definition: AutoTune.h:125

faiss::ProductQuantizer::d
size_t d
size of the input vectors
Definition: ProductQuantizer.h:27

faiss::gpu::ToCPUCloner
Definition: GpuAutoTune.cpp:32

faiss::gpu::GpuIndexIVFConfig::indicesOptions
IndicesOptions indicesOptions
Index storage options for the GPU.
Definition: GpuIndexIVF.h:31

faiss::gpu::GpuClonerOptions::verbose
bool verbose
Set verbose options on the index.
Definition: GpuClonerOptions.h:41

faiss::gpu::GpuIndexIVFFlat
Definition: GpuIndexIVFFlat.h:34

faiss::gpu::GpuClonerOptions::reserveVecs
long reserveVecs
reserve vectors in the invfiles?
Definition: GpuClonerOptions.h:35

faiss::IndexIVFPQ::use_precomputed_table
int use_precomputed_table
if by_residual, build precompute tables
Definition: IndexIVFPQ.h:31

faiss::ProductQuantizer::centroids
std::vector< float > centroids
Centroid table, size M * ksub * dsub.
Definition: ProductQuantizer.h:52