337 lines
11 KiB
C++
337 lines
11 KiB
C++
|
|
/**
|
|
* Copyright (c) 2015-present, Facebook, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* This source code is licensed under the CC-by-NC license found in the
|
|
* LICENSE file in the root directory of this source tree.
|
|
*/
|
|
|
|
#include "GpuAutoTune.h"
|
|
|
|
|
|
#include "GpuIndex.h"
|
|
#include "../FaissAssert.h"
|
|
#include "../index_io.h"
|
|
#include "../IndexFlat.h"
|
|
#include "../IndexIVF.h"
|
|
#include "../IndexIVFPQ.h"
|
|
#include "../VectorTransform.h"
|
|
#include "../MetaIndexes.h"
|
|
#include "GpuIndexFlat.h"
|
|
#include "GpuIndexIVFFlat.h"
|
|
#include "GpuIndexIVFPQ.h"
|
|
#include "IndexProxy.h"
|
|
|
|
namespace faiss { namespace gpu {
|
|
|
|
/**********************************************************
|
|
* Cloning from/to GPU
|
|
**********************************************************/
|
|
|
|
|
|
struct ToCPUCloner: Cloner {
|
|
|
|
Index *clone_Index(const Index *index) override {
|
|
if(auto ifl = dynamic_cast<const GpuIndexFlat *>(index)) {
|
|
IndexFlat *res = new IndexFlat();
|
|
ifl->copyTo(res);
|
|
return res;
|
|
} else if(auto ifl = dynamic_cast<const GpuIndexIVFFlat *>(index)) {
|
|
IndexIVFFlat *res = new IndexIVFFlat();
|
|
ifl->copyTo(res);
|
|
return res;
|
|
} else if(auto ipq = dynamic_cast<const GpuIndexIVFPQ *>(index)) {
|
|
IndexIVFPQ *res = new IndexIVFPQ();
|
|
ipq->copyTo(res);
|
|
return res;
|
|
} else {
|
|
return Cloner::clone_Index(index);
|
|
}
|
|
}
|
|
};
|
|
|
|
faiss::Index * index_gpu_to_cpu(const faiss::Index *gpu_index)
|
|
{
|
|
ToCPUCloner cl;
|
|
return cl.clone_Index(gpu_index);
|
|
}
|
|
|
|
|
|
|
|
GpuClonerOptions::GpuClonerOptions():
|
|
indicesOptions(INDICES_64_BIT),
|
|
useFloat16CoarseQuantizer(false),
|
|
useFloat16(false),
|
|
usePrecomputed(true),
|
|
reserveVecs(0),
|
|
verbose(0)
|
|
{}
|
|
|
|
|
|
struct ToGpuCloner: faiss::Cloner, GpuClonerOptions {
|
|
GpuResources *resources;
|
|
int device;
|
|
|
|
ToGpuCloner(GpuResources *resources, int device, const GpuClonerOptions &options):
|
|
GpuClonerOptions(options), resources(resources), device(device)
|
|
{}
|
|
|
|
Index *clone_Index(const Index *index) override {
|
|
if(auto ifl = dynamic_cast<const IndexFlat *>(index)) {
|
|
return new GpuIndexFlat(resources, device, useFloat16, ifl);
|
|
} else if(auto ifl = dynamic_cast<const faiss::IndexIVFFlat *>(index)) {
|
|
GpuIndexIVFFlat *res =
|
|
new GpuIndexIVFFlat(resources,
|
|
device,
|
|
useFloat16CoarseQuantizer,
|
|
useFloat16,
|
|
ifl->d,
|
|
ifl->nlist,
|
|
indicesOptions,
|
|
ifl->metric_type);
|
|
if(reserveVecs > 0 && ifl->ntotal == 0)
|
|
res->reserveMemory(reserveVecs);
|
|
res->copyFrom(ifl);
|
|
return res;
|
|
} else if(auto ipq = dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
|
|
if(verbose)
|
|
printf(" IndexIVFPQ size %ld -> GpuIndexIVFPQ indicesOptions=%d "
|
|
"usePrecomputed=%d useFloat16=%d reserveVecs=%ld\n",
|
|
ipq->ntotal, indicesOptions, usePrecomputed,
|
|
useFloat16, reserveVecs);
|
|
GpuIndexIVFPQ *res = new GpuIndexIVFPQ(
|
|
resources, device, indicesOptions, useFloat16,
|
|
ipq);
|
|
res->setPrecomputedCodes(usePrecomputed);
|
|
if(reserveVecs > 0 && ipq->ntotal == 0)
|
|
res->reserveMemory(reserveVecs);
|
|
return res;
|
|
} else {
|
|
return Cloner::clone_Index(index);
|
|
}
|
|
}
|
|
|
|
};
|
|
|
|
|
|
faiss::Index * index_cpu_to_gpu(
|
|
GpuResources* resources, int device,
|
|
const faiss::Index *index,
|
|
const GpuClonerOptions *options)
|
|
{
|
|
GpuClonerOptions defaults;
|
|
ToGpuCloner cl(resources, device, options ? *options : defaults);
|
|
return cl.clone_Index(index);
|
|
}
|
|
|
|
GpuMultipleClonerOptions::GpuMultipleClonerOptions(): shard(false)
|
|
{}
|
|
|
|
struct ToGpuClonerMultiple: faiss::Cloner, GpuMultipleClonerOptions {
|
|
std::vector<ToGpuCloner> sub_cloners;
|
|
|
|
ToGpuClonerMultiple(std::vector<GpuResources *> & resources,
|
|
std::vector<int>& devices,
|
|
const GpuMultipleClonerOptions &options):
|
|
GpuMultipleClonerOptions(options)
|
|
{
|
|
FAISS_ASSERT(resources.size() == devices.size());
|
|
for(int i = 0; i < resources.size(); i++) {
|
|
sub_cloners.push_back(ToGpuCloner(
|
|
resources[i], devices[i], options));
|
|
}
|
|
}
|
|
|
|
|
|
ToGpuClonerMultiple(const std::vector<ToGpuCloner> & sub_cloners,
|
|
const GpuMultipleClonerOptions &options):
|
|
GpuMultipleClonerOptions(options),
|
|
sub_cloners(sub_cloners)
|
|
{}
|
|
|
|
|
|
Index *clone_Index(const Index *index) override {
|
|
long n = sub_cloners.size();
|
|
|
|
if (n == 1)
|
|
return sub_cloners[0].clone_Index(index);
|
|
|
|
if(dynamic_cast<const IndexFlat *>(index) ||
|
|
dynamic_cast<const faiss::IndexIVFFlat *>(index) ||
|
|
dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
|
|
if(!shard) {
|
|
IndexProxy * res = new IndexProxy();
|
|
for(auto & sub_cloner: sub_cloners) {
|
|
res->addIndex(sub_cloner.clone_Index(index));
|
|
}
|
|
res->own_fields = true;
|
|
return res;
|
|
} else {
|
|
auto index_ivfpq =
|
|
dynamic_cast<const faiss::IndexIVFPQ *>(index);
|
|
auto index_ivfflat =
|
|
dynamic_cast<const faiss::IndexIVFFlat *>(index);
|
|
FAISS_ASSERT (index_ivfpq || index_ivfflat ||
|
|
!"IndexShards implemented only for "
|
|
"IndexIVFFlat or IndexIVFPQ");
|
|
std::vector<faiss::Index*> shards(n);
|
|
|
|
for(long i = 0; i < n; i++) {
|
|
// make a shallow copy
|
|
long i0 = i * index->ntotal / n;
|
|
long i1 = (i + 1) * index->ntotal / n;
|
|
if(verbose)
|
|
printf("IndexShards shard %ld indices %ld:%ld\n",
|
|
i, i0, i1);
|
|
|
|
if(reserveVecs)
|
|
sub_cloners[i].reserveVecs =
|
|
(reserveVecs + n - 1) / n;
|
|
|
|
if (index_ivfpq) {
|
|
faiss::IndexIVFPQ idx2(
|
|
index_ivfpq->quantizer, index_ivfpq->d,
|
|
index_ivfpq->nlist, index_ivfpq->code_size,
|
|
index_ivfpq->pq.nbits);
|
|
idx2.pq = index_ivfpq->pq;
|
|
idx2.use_precomputed_table = 0;
|
|
idx2.is_trained = index->is_trained;
|
|
index_ivfpq->copy_subset_to(idx2, 0, i0, i1);
|
|
shards[i] = sub_cloners[i].clone_Index(&idx2);
|
|
} else if (index_ivfflat) {
|
|
faiss::IndexIVFFlat idx2(
|
|
index_ivfflat->quantizer, index->d,
|
|
index_ivfflat->nlist, index_ivfflat->metric_type);
|
|
index_ivfflat->copy_subset_to(idx2, 0, i0, i1);
|
|
shards[i] = sub_cloners[i].clone_Index(&idx2);
|
|
}
|
|
}
|
|
faiss::IndexShards *res =
|
|
new faiss::IndexShards(index->d, true, false);
|
|
|
|
for (int i = 0; i < n; i++) {
|
|
res->add_shard(shards[i]);
|
|
}
|
|
res->own_fields = true;
|
|
assert(index->ntotal == res->ntotal);
|
|
return res;
|
|
}
|
|
} else if(auto miq = dynamic_cast<const MultiIndexQuantizer *>(index)) {
|
|
if (verbose) {
|
|
printf("cloning MultiIndexQuantizer: "
|
|
"will be valid only for search k=1\n");
|
|
}
|
|
const ProductQuantizer & pq = miq->pq;
|
|
IndexSplitVectors *splitv = new IndexSplitVectors(pq.d, true);
|
|
splitv->own_fields = true;
|
|
|
|
for (int m = 0; m < pq.M; m++) {
|
|
// which GPU(s) will be assigned to this sub-quantizer
|
|
|
|
long i0 = m * n / pq.M;
|
|
long i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1;
|
|
std::vector<ToGpuCloner> sub_cloners_2;
|
|
sub_cloners_2.insert(
|
|
sub_cloners_2.begin(), sub_cloners.begin() + i0,
|
|
sub_cloners.begin() + i1);
|
|
ToGpuClonerMultiple cm(sub_cloners_2, *this);
|
|
IndexFlatL2 idxc (pq.dsub);
|
|
idxc.add (pq.ksub, pq.centroids.data() + m * pq.d * pq.ksub);
|
|
Index *idx2 = cm.clone_Index(&idxc);
|
|
splitv->add_sub_index(idx2);
|
|
}
|
|
return splitv;
|
|
} else {
|
|
return Cloner::clone_Index(index);
|
|
}
|
|
}
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
faiss::Index * index_cpu_to_gpu_multiple(
|
|
std::vector<GpuResources*> & resources,
|
|
std::vector<int> &devices,
|
|
const faiss::Index *index,
|
|
const GpuMultipleClonerOptions *options)
|
|
{
|
|
GpuMultipleClonerOptions defaults;
|
|
ToGpuClonerMultiple cl(resources, devices, options ? *options : defaults);
|
|
return cl.clone_Index(index);
|
|
}
|
|
|
|
|
|
|
|
/**********************************************************
|
|
* Parameters to auto-tune on GpuIndex'es
|
|
**********************************************************/
|
|
|
|
#define DC(classname) auto ix = dynamic_cast<const classname *>(index)
|
|
|
|
|
|
void GpuParameterSpace::initialize (const Index * index)
|
|
{
|
|
if (DC (IndexPreTransform)) {
|
|
index = ix->index;
|
|
}
|
|
if (DC (IndexProxy)) {
|
|
if (ix->count() == 0) return;
|
|
index = ix->at(0);
|
|
}
|
|
if (DC (faiss::IndexShards)) {
|
|
if (ix->shard_indexes.size() == 0) return;
|
|
index = ix->shard_indexes[0];
|
|
}
|
|
if (DC (GpuIndexIVF)) {
|
|
ParameterRange & pr = add_range("nprobe");
|
|
for (int i = 0; i < 12; i++) {
|
|
size_t nprobe = 1 << i;
|
|
if (nprobe >= ix->getNumLists() ||
|
|
nprobe > 1024) break;
|
|
pr.values.push_back (nprobe);
|
|
}
|
|
}
|
|
// not sure we should call the parent initializer
|
|
}
|
|
|
|
|
|
|
|
#undef DC
|
|
// non-const version
|
|
#define DC(classname) auto *ix = dynamic_cast<classname *>(index)
|
|
|
|
|
|
|
|
void GpuParameterSpace::set_index_parameter (
|
|
Index * index, const std::string & name, double val) const
|
|
{
|
|
if (DC (IndexPreTransform)) {
|
|
index = ix->index;
|
|
}
|
|
if (DC (IndexProxy)) {
|
|
for (int i = 0; i < ix->count(); i++)
|
|
set_index_parameter (ix->at(i), name, val);
|
|
return;
|
|
}
|
|
if (DC (faiss::IndexShards)) {
|
|
for (auto sub_index : ix->shard_indexes)
|
|
set_index_parameter (sub_index, name, val);
|
|
return;
|
|
}
|
|
if (name == "nprobe") {
|
|
DC (GpuIndexIVF);
|
|
FAISS_ASSERT(ix);
|
|
ix->setNumProbes (int (val));
|
|
return;
|
|
}
|
|
FAISS_ASSERT (!"unknown parameter");
|
|
}
|
|
|
|
|
|
|
|
|
|
} } // namespace
|