/** * Copyright (c) 2015-present, Facebook, Inc. * All rights reserved. * * This source code is licensed under the BSD+Patents license found in the * LICENSE file in the root directory of this source tree. */ // Copyright 2004-present Facebook. All Rights Reserved. #include "GpuAutoTune.h" #include "GpuIndex.h" #include "../FaissAssert.h" #include "../index_io.h" #include "../IndexFlat.h" #include "../IndexIVF.h" #include "../IndexIVFPQ.h" #include "../VectorTransform.h" #include "../MetaIndexes.h" #include "GpuIndexFlat.h" #include "GpuIndexIVFFlat.h" #include "GpuIndexIVFPQ.h" #include "IndexProxy.h" namespace faiss { namespace gpu { /********************************************************** * Cloning from/to GPU **********************************************************/ struct ToCPUCloner: Cloner { void merge_index(Index *dst, Index *src, bool successive_ids) { if (auto ifl = dynamic_cast(dst)) { auto ifl2 = dynamic_cast(src); FAISS_ASSERT(ifl2); FAISS_ASSERT(successive_ids); ifl->add(ifl2->ntotal, ifl2->xb.data()); } else if(auto ifl = dynamic_cast(dst)) { auto ifl2 = dynamic_cast(src); FAISS_ASSERT(ifl2); ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0); } else if(auto ifl = dynamic_cast(dst)) { auto ifl2 = dynamic_cast(src); FAISS_ASSERT(ifl2); ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0); } else { FAISS_ASSERT(!"merging not implemented for this type of class"); } } Index *clone_Index(const Index *index) override { if(auto ifl = dynamic_cast(index)) { IndexFlat *res = new IndexFlat(); ifl->copyTo(res); return res; } else if(auto ifl = dynamic_cast(index)) { IndexIVFFlat *res = new IndexIVFFlat(); ifl->copyTo(res); return res; } else if(auto ipq = dynamic_cast(index)) { IndexIVFPQ *res = new IndexIVFPQ(); ipq->copyTo(res); return res; // for IndexShards and IndexProxy we assume that the // objective is to make a single component out of them // (inverse op of ToGpuClonerMultiple) } else if(auto ish = dynamic_cast(index)) { int nshard = ish->shard_indexes.size(); FAISS_ASSERT(nshard > 0); Index *res = clone_Index(ish->shard_indexes[0]); for(int i = 1; i < ish->shard_indexes.size(); i++) { Index *res_i = clone_Index(ish->shard_indexes[i]); merge_index(res, res_i, ish->successive_ids); delete res_i; } return res; } else if(auto ipr = dynamic_cast(index)) { // just clone one of the replicas FAISS_ASSERT(ipr->count() > 0); return clone_Index(ipr->at(0)); } else { return Cloner::clone_Index(index); } } }; faiss::Index * index_gpu_to_cpu(const faiss::Index *gpu_index) { ToCPUCloner cl; return cl.clone_Index(gpu_index); } GpuClonerOptions::GpuClonerOptions(): indicesOptions(INDICES_64_BIT), useFloat16CoarseQuantizer(false), useFloat16(false), usePrecomputed(true), reserveVecs(0), storeTransposed(false), verbose(0) {} struct ToGpuCloner: faiss::Cloner, GpuClonerOptions { GpuResources *resources; int device; ToGpuCloner(GpuResources *resources, int device, const GpuClonerOptions &options): GpuClonerOptions(options), resources(resources), device(device) {} Index *clone_Index(const Index *index) override { if(auto ifl = dynamic_cast(index)) { GpuIndexFlatConfig config; config.device = device; config.useFloat16 = useFloat16; config.storeTransposed = storeTransposed; return new GpuIndexFlat(resources, ifl, config); } else if(auto ifl = dynamic_cast(index)) { GpuIndexIVFFlatConfig config; config.device = device; config.indicesOptions = indicesOptions; config.flatConfig.useFloat16 = useFloat16CoarseQuantizer; config.flatConfig.storeTransposed = storeTransposed; config.useFloat16IVFStorage = useFloat16; GpuIndexIVFFlat *res = new GpuIndexIVFFlat(resources, ifl->d, ifl->nlist, ifl->metric_type, config); if(reserveVecs > 0 && ifl->ntotal == 0) { res->reserveMemory(reserveVecs); } res->copyFrom(ifl); return res; } else if(auto ipq = dynamic_cast(index)) { if(verbose) printf(" IndexIVFPQ size %ld -> GpuIndexIVFPQ " "indicesOptions=%d " "usePrecomputed=%d useFloat16=%d reserveVecs=%ld\n", ipq->ntotal, indicesOptions, usePrecomputed, useFloat16, reserveVecs); GpuIndexIVFPQConfig config; config.device = device; config.indicesOptions = indicesOptions; config.flatConfig.useFloat16 = useFloat16CoarseQuantizer; config.flatConfig.storeTransposed = storeTransposed; config.useFloat16LookupTables = useFloat16; config.usePrecomputedTables = usePrecomputed; GpuIndexIVFPQ *res = new GpuIndexIVFPQ(resources, ipq, config); if(reserveVecs > 0 && ipq->ntotal == 0) { res->reserveMemory(reserveVecs); } return res; } else { return Cloner::clone_Index(index); } } }; faiss::Index * index_cpu_to_gpu( GpuResources* resources, int device, const faiss::Index *index, const GpuClonerOptions *options) { GpuClonerOptions defaults; ToGpuCloner cl(resources, device, options ? *options : defaults); return cl.clone_Index(index); } GpuMultipleClonerOptions::GpuMultipleClonerOptions(): shard(false) {} struct ToGpuClonerMultiple: faiss::Cloner, GpuMultipleClonerOptions { std::vector sub_cloners; ToGpuClonerMultiple(std::vector & resources, std::vector& devices, const GpuMultipleClonerOptions &options): GpuMultipleClonerOptions(options) { FAISS_ASSERT(resources.size() == devices.size()); for(int i = 0; i < resources.size(); i++) { sub_cloners.push_back(ToGpuCloner( resources[i], devices[i], options)); } } ToGpuClonerMultiple(const std::vector & sub_cloners, const GpuMultipleClonerOptions &options): GpuMultipleClonerOptions(options), sub_cloners(sub_cloners) {} Index *clone_Index(const Index *index) override { long n = sub_cloners.size(); if (n == 1) return sub_cloners[0].clone_Index(index); if(dynamic_cast(index) || dynamic_cast(index) || dynamic_cast(index)) { if(!shard) { IndexProxy * res = new IndexProxy(); for(auto & sub_cloner: sub_cloners) { res->addIndex(sub_cloner.clone_Index(index)); } res->own_fields = true; return res; } else { auto index_ivfpq = dynamic_cast(index); auto index_ivfflat = dynamic_cast(index); FAISS_ASSERT_MSG (index_ivfpq || index_ivfflat, "IndexShards implemented only for " "IndexIVFFlat or IndexIVFPQ"); std::vector shards(n); for(long i = 0; i < n; i++) { // make a shallow copy long i0 = i * index->ntotal / n; long i1 = (i + 1) * index->ntotal / n; if(verbose) printf("IndexShards shard %ld indices %ld:%ld\n", i, i0, i1); if(reserveVecs) sub_cloners[i].reserveVecs = (reserveVecs + n - 1) / n; if (index_ivfpq) { faiss::IndexIVFPQ idx2( index_ivfpq->quantizer, index_ivfpq->d, index_ivfpq->nlist, index_ivfpq->code_size, index_ivfpq->pq.nbits); idx2.metric_type = index_ivfpq->metric_type; idx2.pq = index_ivfpq->pq; idx2.nprobe = index_ivfpq->nprobe; idx2.use_precomputed_table = 0; idx2.is_trained = index->is_trained; index_ivfpq->copy_subset_to(idx2, 0, i0, i1); shards[i] = sub_cloners[i].clone_Index(&idx2); } else if (index_ivfflat) { faiss::IndexIVFFlat idx2( index_ivfflat->quantizer, index->d, index_ivfflat->nlist, index_ivfflat->metric_type); idx2.nprobe = index_ivfflat->nprobe; index_ivfflat->copy_subset_to(idx2, 0, i0, i1); idx2.nprobe = index_ivfflat->nprobe; shards[i] = sub_cloners[i].clone_Index(&idx2); } } faiss::IndexShards *res = new faiss::IndexShards(index->d, true, false); for (int i = 0; i < n; i++) { res->add_shard(shards[i]); } res->own_fields = true; FAISS_ASSERT(index->ntotal == res->ntotal); return res; } } else if(auto miq = dynamic_cast(index)) { if (verbose) { printf("cloning MultiIndexQuantizer: " "will be valid only for search k=1\n"); } const ProductQuantizer & pq = miq->pq; IndexSplitVectors *splitv = new IndexSplitVectors(pq.d, true); splitv->own_fields = true; for (int m = 0; m < pq.M; m++) { // which GPU(s) will be assigned to this sub-quantizer long i0 = m * n / pq.M; long i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1; std::vector sub_cloners_2; sub_cloners_2.insert( sub_cloners_2.begin(), sub_cloners.begin() + i0, sub_cloners.begin() + i1); ToGpuClonerMultiple cm(sub_cloners_2, *this); IndexFlatL2 idxc (pq.dsub); idxc.add (pq.ksub, pq.centroids.data() + m * pq.d * pq.ksub); Index *idx2 = cm.clone_Index(&idxc); splitv->add_sub_index(idx2); } return splitv; } else { return Cloner::clone_Index(index); } } }; faiss::Index * index_cpu_to_gpu_multiple( std::vector & resources, std::vector &devices, const faiss::Index *index, const GpuMultipleClonerOptions *options) { GpuMultipleClonerOptions defaults; ToGpuClonerMultiple cl(resources, devices, options ? *options : defaults); return cl.clone_Index(index); } /********************************************************** * Parameters to auto-tune on GpuIndex'es **********************************************************/ #define DC(classname) auto ix = dynamic_cast(index) void GpuParameterSpace::initialize (const Index * index) { if (DC (IndexPreTransform)) { index = ix->index; } if (DC (IndexProxy)) { if (ix->count() == 0) return; index = ix->at(0); } if (DC (faiss::IndexShards)) { if (ix->shard_indexes.size() == 0) return; index = ix->shard_indexes[0]; } if (DC (GpuIndexIVF)) { ParameterRange & pr = add_range("nprobe"); for (int i = 0; i < 12; i++) { size_t nprobe = 1 << i; if (nprobe >= ix->getNumLists() || nprobe > 1024) break; pr.values.push_back (nprobe); } } // not sure we should call the parent initializer } #undef DC // non-const version #define DC(classname) auto *ix = dynamic_cast(index) void GpuParameterSpace::set_index_parameter ( Index * index, const std::string & name, double val) const { if (DC (IndexPreTransform)) { index = ix->index; } if (DC (IndexProxy)) { for (int i = 0; i < ix->count(); i++) set_index_parameter (ix->at(i), name, val); return; } if (DC (faiss::IndexShards)) { for (auto sub_index : ix->shard_indexes) set_index_parameter (sub_index, name, val); return; } if (name == "nprobe") { DC (GpuIndexIVF); FAISS_ASSERT(ix); ix->setNumProbes (int (val)); return; } if (name == "use_precomputed_table") { DC (GpuIndexIVFPQ); FAISS_ASSERT(ix); ix->setPrecomputedCodes(bool (val)); return; } FAISS_ASSERT_MSG (false, "unknown parameter"); } } } // namespace