10 #include "GpuAutoTune.h"
14 #include "../FaissAssert.h"
15 #include "../index_io.h"
16 #include "../IndexFlat.h"
17 #include "../IndexIVF.h"
18 #include "../IndexIVFFlat.h"
19 #include "../IndexIVFPQ.h"
20 #include "../VectorTransform.h"
21 #include "../MetaIndexes.h"
22 #include "GpuIndexFlat.h"
23 #include "GpuIndexIVFFlat.h"
24 #include "GpuIndexIVFPQ.h"
25 #include "IndexProxy.h"
27 namespace faiss {
namespace gpu {
36 void merge_index(
Index *dst,
Index *src,
bool successive_ids) {
37 if (
auto ifl = dynamic_cast<IndexFlat *>(dst)) {
38 auto ifl2 =
dynamic_cast<const IndexFlat *
>(src);
40 FAISS_ASSERT(successive_ids);
41 ifl->add(ifl2->ntotal, ifl2->xb.data());
42 }
else if(
auto ifl = dynamic_cast<IndexIVFFlat *>(dst)) {
45 ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
46 }
else if(
auto ifl = dynamic_cast<IndexIVFPQ *>(dst)) {
49 ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
51 FAISS_ASSERT(!
"merging not implemented for this type of class");
56 Index *clone_Index(
const Index *index)
override {
57 if(
auto ifl = dynamic_cast<const GpuIndexFlat *>(index)) {
61 }
else if(
auto ifl = dynamic_cast<const GpuIndexIVFFlat *>(index)) {
65 }
else if(
auto ipq = dynamic_cast<const GpuIndexIVFPQ *>(index)) {
74 }
else if(
auto ish = dynamic_cast<const IndexShards *>(index)) {
75 int nshard = ish->shard_indexes.size();
76 FAISS_ASSERT(nshard > 0);
77 Index *res = clone_Index(ish->shard_indexes[0]);
78 for(
int i = 1; i < ish->shard_indexes.size(); i++) {
79 Index *res_i = clone_Index(ish->shard_indexes[i]);
80 merge_index(res, res_i, ish->successive_ids);
84 }
else if(
auto ipr = dynamic_cast<const IndexProxy *>(index)) {
86 FAISS_ASSERT(ipr->count() > 0);
87 return clone_Index(ipr->at(0));
89 return Cloner::clone_Index(index);
97 return cl.clone_Index(gpu_index);
111 Index *clone_Index(
const Index *index)
override {
112 if(
auto ifl = dynamic_cast<const IndexFlat *>(index)) {
119 }
else if(
auto ifl = dynamic_cast<const faiss::IndexIVFFlat *>(index)) {
139 }
else if(
auto ipq = dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
141 printf(
" IndexIVFPQ size %ld -> GpuIndexIVFPQ "
143 "usePrecomputed=%d useFloat16=%d reserveVecs=%ld\n",
162 return Cloner::clone_Index(index);
175 ToGpuCloner cl(resources, device, options ? *options : defaults);
176 return cl.clone_Index(index);
180 std::vector<ToGpuCloner> sub_cloners;
183 std::vector<int>& devices,
187 FAISS_ASSERT(resources.size() == devices.size());
188 for(
int i = 0; i < resources.size(); i++) {
190 resources[i], devices[i], options));
198 sub_cloners(sub_cloners)
205 long i0 = i * index_ivf->
ntotal / n;
206 long i1 = (i + 1) * index_ivf->
ntotal / n;
209 printf(
"IndexShards shard %ld indices %ld:%ld\n",
212 FAISS_ASSERT(idx2->
ntotal == i1 - i0);
215 printf(
"IndexShards shard %ld select modulo %ld = %ld\n",
219 FAISS_THROW_FMT (
"shard_type %d not implemented",
shard_type);
224 Index *clone_Index(
const Index *index)
override {
225 long n = sub_cloners.size();
227 return sub_cloners[0].clone_Index(index);
229 if(dynamic_cast<const IndexFlat *>(index) ||
230 dynamic_cast<const faiss::IndexIVFFlat *>(index) ||
231 dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
234 for(
auto & sub_cloner: sub_cloners) {
235 res->
addIndex(sub_cloner.clone_Index(index));
237 res->own_fields =
true;
244 FAISS_THROW_IF_NOT_MSG (index_ivfpq || index_ivfflat,
245 "IndexShards implemented only for "
246 "IndexIVFFlat or IndexIVFPQ");
247 std::vector<faiss::Index*> shards(n);
249 for(
long i = 0; i < n; i++) {
252 sub_cloners[i].reserveVecs =
257 index_ivfpq->quantizer, index_ivfpq->d,
258 index_ivfpq->nlist, index_ivfpq->code_size,
259 index_ivfpq->pq.nbits);
261 idx2.
pq = index_ivfpq->pq;
262 idx2.
nprobe = index_ivfpq->nprobe;
265 copy_ivf_shard (index_ivfpq, &idx2, n, i);
266 shards[i] = sub_cloners[i].clone_Index(&idx2);
267 }
else if (index_ivfflat) {
269 index_ivfflat->quantizer, index->
d,
270 index_ivfflat->nlist, index_ivfflat->metric_type);
271 idx2.
nprobe = index_ivfflat->nprobe;
272 idx2.
nprobe = index_ivfflat->nprobe;
273 copy_ivf_shard (index_ivfflat, &idx2, n, i);
274 shards[i] = sub_cloners[i].clone_Index(&idx2);
282 for (
int i = 0; i < n; i++) {
283 res->add_shard(shards[i]);
285 res->own_fields =
true;
289 }
else if(
auto miq = dynamic_cast<const MultiIndexQuantizer *>(index)) {
291 printf(
"cloning MultiIndexQuantizer: "
292 "will be valid only for search k=1\n");
296 splitv->own_fields =
true;
298 for (
int m = 0; m < pq.
M; m++) {
301 long i0 = m * n / pq.
M;
302 long i1 = pq.
M <= n ? (m + 1) * n / pq.
M : i0 + 1;
303 std::vector<ToGpuCloner> sub_cloners_2;
304 sub_cloners_2.insert(
305 sub_cloners_2.begin(), sub_cloners.begin() + i0,
306 sub_cloners.begin() + i1);
310 Index *idx2 = cm.clone_Index(&idxc);
311 splitv->add_sub_index(idx2);
315 return Cloner::clone_Index(index);
325 std::vector<GpuResources*> & resources,
326 std::vector<int> &devices,
332 return cl.clone_Index(index);
341 #define DC(classname) auto ix = dynamic_cast<const classname *>(index)
350 if (ix->count() == 0)
return;
354 if (ix->shard_indexes.size() == 0)
return;
355 index = ix->shard_indexes[0];
359 for (
int i = 0; i < 12; i++) {
360 size_t nprobe = 1 << i;
361 if (nprobe >= ix->getNumLists() ||
362 nprobe > 1024)
break;
363 pr.values.push_back (nprobe);
373 #define DC(classname) auto *ix = dynamic_cast<classname *>(index)
378 Index * index,
const std::string & name,
double val)
const
381 for (
int i = 0; i < ix->count(); i++)
385 if (name ==
"nprobe") {
387 ix->setNumProbes (
int (val));
391 if (name ==
"use_precomputed_table") {
393 ix->setPrecomputedCodes(
bool (val));
bool usePrecomputedTables
bool storeTransposed
For GpuIndexFlat, store data in transposed layout?
virtual void copy_subset_to(IndexIVF &other, int subset_type, long a1, long a2) const
size_t nprobe
number of probes at query time
void initialize(const faiss::Index *index) override
initialize with reasonable parameters for the index
set some options on how to copy to GPU
void copyFrom(const faiss::IndexIVFFlat *index)
size_t dsub
dimensionality of each subvector
bool usePrecomputed
use precomputed tables?
bool useFloat16IVFStorage
bool useFloat16LookupTables
ParameterRange & add_range(const char *name)
add a new parameter (or return it if it exists)
bool useFloat16
Whether or not data is stored as float16.
int device
GPU device on which the index is resident.
GpuIndexFlatConfig flatConfig
Configuration for the coarse quantizer object.
void set_index_parameter(faiss::Index *index, const std::string &name, double val) const override
set a combination of parameters on an index
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
IndicesOptions indicesOptions
size_t ksub
number of centroids for each subquantizer
int shard_type
IndexIVF::copy_subset_to subset type.
bool useFloat16CoarseQuantizer
is the coarse quantizer in float16?
idx_t ntotal
total nb of indexed vectors
void add(idx_t n, const float *x) override
void addIndex(faiss::Index *index)
virtual void set_index_parameter(Index *index, const std::string &name, double val) const
set one of the parameters
MetricType metric_type
type of metric this index uses for search
ProductQuantizer pq
produces the codes
size_t M
number of subquantizers
bool is_trained
set if the Index does not require training, or if training is done already
possible values of a parameter, sorted from least to most expensive/accurate
size_t d
size of the input vectors
IndicesOptions indicesOptions
Index storage options for the GPU.
bool verbose
Set verbose options on the index.
long reserveVecs
reserve vectors in the invfiles?
int use_precomputed_table
if by_residual, build precompute tables
std::vector< float > centroids
Centroid table, size M * ksub * dsub.