10 #include "GpuAutoTune.h"
14 #include "../FaissAssert.h"
15 #include "../index_io.h"
16 #include "../IndexFlat.h"
17 #include "../IndexIVF.h"
18 #include "../IndexIVFPQ.h"
19 #include "../VectorTransform.h"
20 #include "../MetaIndexes.h"
21 #include "GpuIndexFlat.h"
22 #include "GpuIndexIVFFlat.h"
23 #include "GpuIndexIVFPQ.h"
24 #include "IndexProxy.h"
26 namespace faiss {
namespace gpu {
35 void merge_index(
Index *dst,
Index *src,
bool successive_ids) {
36 if (
auto ifl = dynamic_cast<IndexFlat *>(dst)) {
37 auto ifl2 =
dynamic_cast<const IndexFlat *
>(src);
39 FAISS_ASSERT(successive_ids);
40 ifl->add(ifl2->ntotal, ifl2->xb.data());
41 }
else if(
auto ifl = dynamic_cast<IndexIVFFlat *>(dst)) {
44 ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
45 }
else if(
auto ifl = dynamic_cast<IndexIVFPQ *>(dst)) {
48 ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
50 FAISS_ASSERT(!
"merging not implemented for this type of class");
55 Index *clone_Index(
const Index *index)
override {
56 if(
auto ifl = dynamic_cast<const GpuIndexFlat *>(index)) {
60 }
else if(
auto ifl = dynamic_cast<const GpuIndexIVFFlat *>(index)) {
64 }
else if(
auto ipq = dynamic_cast<const GpuIndexIVFPQ *>(index)) {
73 }
else if(
auto ish = dynamic_cast<const IndexShards *>(index)) {
74 int nshard = ish->shard_indexes.size();
75 FAISS_ASSERT(nshard > 0);
76 Index *res = clone_Index(ish->shard_indexes[0]);
77 for(
int i = 1; i < ish->shard_indexes.size(); i++) {
78 Index *res_i = clone_Index(ish->shard_indexes[i]);
79 merge_index(res, res_i, ish->successive_ids);
83 }
else if(
auto ipr = dynamic_cast<const IndexProxy *>(index)) {
85 FAISS_ASSERT(ipr->count() > 0);
86 return clone_Index(ipr->at(0));
88 return Cloner::clone_Index(index);
96 return cl.clone_Index(gpu_index);
110 Index *clone_Index(
const Index *index)
override {
111 if(
auto ifl = dynamic_cast<const IndexFlat *>(index)) {
118 }
else if(
auto ifl = dynamic_cast<const faiss::IndexIVFFlat *>(index)) {
138 }
else if(
auto ipq = dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
140 printf(
" IndexIVFPQ size %ld -> GpuIndexIVFPQ "
142 "usePrecomputed=%d useFloat16=%d reserveVecs=%ld\n",
161 return Cloner::clone_Index(index);
174 ToGpuCloner cl(resources, device, options ? *options : defaults);
175 return cl.clone_Index(index);
179 std::vector<ToGpuCloner> sub_cloners;
182 std::vector<int>& devices,
186 FAISS_ASSERT(resources.size() == devices.size());
187 for(
int i = 0; i < resources.size(); i++) {
189 resources[i], devices[i], options));
197 sub_cloners(sub_cloners)
204 long i0 = i * index_ivf->
ntotal / n;
205 long i1 = (i + 1) * index_ivf->
ntotal / n;
208 printf(
"IndexShards shard %ld indices %ld:%ld\n",
211 FAISS_ASSERT(idx2->
ntotal == i1 - i0);
214 printf(
"IndexShards shard %ld select modulo %ld = %ld\n",
218 FAISS_THROW_FMT (
"shard_type %d not implemented",
shard_type);
223 Index *clone_Index(
const Index *index)
override {
224 long n = sub_cloners.size();
226 return sub_cloners[0].clone_Index(index);
228 if(dynamic_cast<const IndexFlat *>(index) ||
229 dynamic_cast<const faiss::IndexIVFFlat *>(index) ||
230 dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
233 for(
auto & sub_cloner: sub_cloners) {
234 res->
addIndex(sub_cloner.clone_Index(index));
236 res->own_fields =
true;
243 FAISS_THROW_IF_NOT_MSG (index_ivfpq || index_ivfflat,
244 "IndexShards implemented only for "
245 "IndexIVFFlat or IndexIVFPQ");
246 std::vector<faiss::Index*> shards(n);
248 for(
long i = 0; i < n; i++) {
251 sub_cloners[i].reserveVecs =
256 index_ivfpq->quantizer, index_ivfpq->d,
257 index_ivfpq->nlist, index_ivfpq->code_size,
258 index_ivfpq->pq.nbits);
260 idx2.
pq = index_ivfpq->pq;
261 idx2.
nprobe = index_ivfpq->nprobe;
264 copy_ivf_shard (index_ivfpq, &idx2, n, i);
265 shards[i] = sub_cloners[i].clone_Index(&idx2);
266 }
else if (index_ivfflat) {
268 index_ivfflat->quantizer, index->
d,
269 index_ivfflat->nlist, index_ivfflat->metric_type);
270 idx2.
nprobe = index_ivfflat->nprobe;
271 idx2.
nprobe = index_ivfflat->nprobe;
272 copy_ivf_shard (index_ivfflat, &idx2, n, i);
273 shards[i] = sub_cloners[i].clone_Index(&idx2);
281 for (
int i = 0; i < n; i++) {
282 res->add_shard(shards[i]);
284 res->own_fields =
true;
288 }
else if(
auto miq = dynamic_cast<const MultiIndexQuantizer *>(index)) {
290 printf(
"cloning MultiIndexQuantizer: "
291 "will be valid only for search k=1\n");
295 splitv->own_fields =
true;
297 for (
int m = 0; m < pq.
M; m++) {
300 long i0 = m * n / pq.
M;
301 long i1 = pq.
M <= n ? (m + 1) * n / pq.
M : i0 + 1;
302 std::vector<ToGpuCloner> sub_cloners_2;
303 sub_cloners_2.insert(
304 sub_cloners_2.begin(), sub_cloners.begin() + i0,
305 sub_cloners.begin() + i1);
309 Index *idx2 = cm.clone_Index(&idxc);
310 splitv->add_sub_index(idx2);
314 return Cloner::clone_Index(index);
324 std::vector<GpuResources*> & resources,
325 std::vector<int> &devices,
331 return cl.clone_Index(index);
340 #define DC(classname) auto ix = dynamic_cast<const classname *>(index)
349 if (ix->count() == 0)
return;
353 if (ix->shard_indexes.size() == 0)
return;
354 index = ix->shard_indexes[0];
358 for (
int i = 0; i < 12; i++) {
359 size_t nprobe = 1 << i;
360 if (nprobe >= ix->getNumLists() ||
361 nprobe > 1024)
break;
362 pr.values.push_back (nprobe);
372 #define DC(classname) auto *ix = dynamic_cast<classname *>(index)
377 Index * index,
const std::string & name,
double val)
const
380 for (
int i = 0; i < ix->count(); i++)
385 if (name ==
"nprobe") {
386 ix->setNumProbes (
int (val));
391 if (name ==
"use_precomputed_table") {
392 ix->setPrecomputedCodes(
bool (val));
bool usePrecomputedTables
bool storeTransposed
For GpuIndexFlat, store data in transposed layout?
virtual void copy_subset_to(IndexIVF &other, int subset_type, long a1, long a2) const
size_t nprobe
number of probes at query time
void initialize(const faiss::Index *index) override
initialize with reasonable parameters for the index
set some options on how to copy to GPU
void copyFrom(const faiss::IndexIVFFlat *index)
size_t dsub
dimensionality of each subvector
bool usePrecomputed
use precomputed tables?
bool useFloat16IVFStorage
bool useFloat16LookupTables
ParameterRange & add_range(const char *name)
add a new parameter
bool useFloat16
Whether or not data is stored as float16.
int device
GPU device on which the index is resident.
GpuIndexFlatConfig flatConfig
Configuration for the coarse quantizer object.
void set_index_parameter(faiss::Index *index, const std::string &name, double val) const override
set a combination of parameters on an index
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
IndicesOptions indicesOptions
size_t ksub
number of centroids for each subquantizer
int shard_type
IndexIVF::copy_subset_to subset type.
bool useFloat16CoarseQuantizer
is the coarse quantizer in float16?
idx_t ntotal
total nb of indexed vectors
void add(idx_t n, const float *x) override
void addIndex(faiss::Index *index)
virtual void set_index_parameter(Index *index, const std::string &name, double val) const
set one of the parameters
MetricType metric_type
type of metric this index uses for search
ProductQuantizer pq
produces the codes
size_t M
number of subquantizers
bool is_trained
set if the Index does not require training, or if training is done already
possible values of a parameter, sorted from least to most expensive/accurate
size_t d
size of the input vectors
IndicesOptions indicesOptions
Index storage options for the GPU.
bool verbose
Set verbose options on the index.
long reserveVecs
reserve vectors in the invfiles?
int use_precomputed_table
if by_residual, build precompute tables
std::vector< float > centroids
Centroid table, size M * ksub * dsub.