10 #include "GpuAutoTune.h"
13 #include "../FaissAssert.h"
14 #include "../index_io.h"
15 #include "../IndexFlat.h"
16 #include "../IndexIVF.h"
17 #include "../IndexIVFPQ.h"
18 #include "../VectorTransform.h"
19 #include "../MetaIndexes.h"
20 #include "GpuIndexFlat.h"
21 #include "GpuIndexIVFFlat.h"
22 #include "GpuIndexIVFPQ.h"
23 #include "IndexProxy.h"
25 namespace faiss {
namespace gpu {
34 void merge_index(
Index *dst,
Index *src,
bool successive_ids) {
35 if (
auto ifl = dynamic_cast<IndexFlat *>(dst)) {
36 auto ifl2 =
dynamic_cast<const IndexFlat *
>(src);
38 FAISS_ASSERT(successive_ids);
39 ifl->add(ifl2->ntotal, ifl2->xb.data());
40 }
else if(
auto ifl = dynamic_cast<IndexIVFFlat *>(dst)) {
43 ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
44 }
else if(
auto ifl = dynamic_cast<IndexIVFPQ *>(dst)) {
47 ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
49 FAISS_ASSERT(!
"merging not implemented for this type of class");
54 Index *clone_Index(
const Index *index)
override {
55 if(
auto ifl = dynamic_cast<const GpuIndexFlat *>(index)) {
59 }
else if(
auto ifl = dynamic_cast<const GpuIndexIVFFlat *>(index)) {
63 }
else if(
auto ipq = dynamic_cast<const GpuIndexIVFPQ *>(index)) {
72 }
else if(
auto ish = dynamic_cast<const IndexShards *>(index)) {
73 int nshard = ish->shard_indexes.size();
74 FAISS_ASSERT(nshard > 0);
75 Index *res = clone_Index(ish->shard_indexes[0]);
76 for(
int i = 1; i < ish->shard_indexes.size(); i++) {
77 Index *res_i = clone_Index(ish->shard_indexes[i]);
78 merge_index(res, res_i, ish->successive_ids);
82 }
else if(
auto ipr = dynamic_cast<const IndexProxy *>(index)) {
84 FAISS_ASSERT(ipr->count() > 0);
85 return clone_Index(ipr->at(0));
87 return Cloner::clone_Index(index);
95 return cl.clone_Index(gpu_index);
100 GpuClonerOptions::GpuClonerOptions():
101 indicesOptions(INDICES_64_BIT),
102 useFloat16CoarseQuantizer(false),
104 usePrecomputed(true),
106 storeTransposed(false),
120 Index *clone_Index(
const Index *index)
override {
121 if(
auto ifl = dynamic_cast<const IndexFlat *>(index)) {
128 }
else if(
auto ifl = dynamic_cast<const faiss::IndexIVFFlat *>(index)) {
148 }
else if(
auto ipq = dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
150 printf(
" IndexIVFPQ size %ld -> GpuIndexIVFPQ "
152 "usePrecomputed=%d useFloat16=%d reserveVecs=%ld\n",
171 return Cloner::clone_Index(index);
184 ToGpuCloner cl(resources, device, options ? *options : defaults);
185 return cl.clone_Index(index);
188 GpuMultipleClonerOptions::GpuMultipleClonerOptions(): shard(false)
192 std::vector<ToGpuCloner> sub_cloners;
195 std::vector<int>& devices,
199 FAISS_ASSERT(resources.size() == devices.size());
200 for(
int i = 0; i < resources.size(); i++) {
202 resources[i], devices[i], options));
210 sub_cloners(sub_cloners)
214 Index *clone_Index(
const Index *index)
override {
215 long n = sub_cloners.size();
218 return sub_cloners[0].clone_Index(index);
220 if(dynamic_cast<const IndexFlat *>(index) ||
221 dynamic_cast<const faiss::IndexIVFFlat *>(index) ||
222 dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
225 for(
auto & sub_cloner: sub_cloners) {
226 res->
addIndex(sub_cloner.clone_Index(index));
228 res->own_fields =
true;
235 FAISS_ASSERT_MSG (index_ivfpq || index_ivfflat,
236 "IndexShards implemented only for "
237 "IndexIVFFlat or IndexIVFPQ");
238 std::vector<faiss::Index*> shards(n);
240 for(
long i = 0; i < n; i++) {
242 long i0 = i * index->
ntotal / n;
243 long i1 = (i + 1) * index->
ntotal / n;
245 printf(
"IndexShards shard %ld indices %ld:%ld\n",
249 sub_cloners[i].reserveVecs =
254 index_ivfpq->quantizer, index_ivfpq->d,
255 index_ivfpq->nlist, index_ivfpq->code_size,
256 index_ivfpq->pq.nbits);
258 idx2.
pq = index_ivfpq->pq;
259 idx2.
nprobe = index_ivfpq->nprobe;
262 index_ivfpq->copy_subset_to(idx2, 0, i0, i1);
263 shards[i] = sub_cloners[i].clone_Index(&idx2);
264 }
else if (index_ivfflat) {
266 index_ivfflat->quantizer, index->
d,
267 index_ivfflat->nlist, index_ivfflat->metric_type);
268 idx2.
nprobe = index_ivfflat->nprobe;
269 index_ivfflat->copy_subset_to(idx2, 0, i0, i1);
270 idx2.
nprobe = index_ivfflat->nprobe;
271 shards[i] = sub_cloners[i].clone_Index(&idx2);
277 for (
int i = 0; i < n; i++) {
278 res->add_shard(shards[i]);
280 res->own_fields =
true;
284 }
else if(
auto miq = dynamic_cast<const MultiIndexQuantizer *>(index)) {
286 printf(
"cloning MultiIndexQuantizer: "
287 "will be valid only for search k=1\n");
291 splitv->own_fields =
true;
293 for (
int m = 0; m < pq.
M; m++) {
296 long i0 = m * n / pq.
M;
297 long i1 = pq.
M <= n ? (m + 1) * n / pq.
M : i0 + 1;
298 std::vector<ToGpuCloner> sub_cloners_2;
299 sub_cloners_2.insert(
300 sub_cloners_2.begin(), sub_cloners.begin() + i0,
301 sub_cloners.begin() + i1);
305 Index *idx2 = cm.clone_Index(&idxc);
306 splitv->add_sub_index(idx2);
310 return Cloner::clone_Index(index);
320 std::vector<GpuResources*> & resources,
321 std::vector<int> &devices,
327 return cl.clone_Index(index);
336 #define DC(classname) auto ix = dynamic_cast<const classname *>(index)
345 if (ix->count() == 0)
return;
349 if (ix->shard_indexes.size() == 0)
return;
350 index = ix->shard_indexes[0];
354 for (
int i = 0; i < 12; i++) {
355 size_t nprobe = 1 << i;
356 if (nprobe >= ix->getNumLists() ||
357 nprobe > 1024)
break;
358 pr.values.push_back (nprobe);
368 #define DC(classname) auto *ix = dynamic_cast<classname *>(index)
373 Index * index,
const std::string & name,
double val)
const
379 for (
int i = 0; i < ix->count(); i++)
384 for (
auto sub_index : ix->shard_indexes)
388 if (name ==
"nprobe") {
391 ix->setNumProbes (
int (val));
394 if (name ==
"use_precomputed_table") {
397 ix->setPrecomputedCodes(
bool (val));
401 FAISS_ASSERT_MSG (
false,
"unknown parameter");
bool usePrecomputedTables
bool storeTransposed
For GpuIndexFlat, store data in transposed layout?
size_t nprobe
number of probes at query time
void initialize(const faiss::Index *index) override
initialize with reasonable parameters for the index
set some options on how to copy to GPU
void copyFrom(const faiss::IndexIVFFlat *index)
size_t dsub
dimensionality of each subvector
bool usePrecomputed
use precomputed tables?
bool useFloat16IVFStorage
bool useFloat16LookupTables
ParameterRange & add_range(const char *name)
add a new parameter
bool useFloat16
Whether or not data is stored as float16.
int device
GPU device on which the index is resident.
GpuIndexFlatConfig flatConfig
Configuration for the coarse quantizer object.
void set_index_parameter(faiss::Index *index, const std::string &name, double val) const override
set a combination of parameters on an index
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
IndicesOptions indicesOptions
size_t ksub
number of centroids for each subquantizer
bool useFloat16CoarseQuantizer
is the coarse quantizer in float16?
idx_t ntotal
total nb of indexed vectors
void add(idx_t n, const float *x) override
void addIndex(faiss::Index *index)
MetricType metric_type
type of metric this index uses for search
ProductQuantizer pq
produces the codes
size_t M
number of subquantizers
bool is_trained
set if the Index does not require training, or if training is done already
possible values of a parameter, sorted from least to most expensive/accurate
size_t d
size of the input vectors
IndicesOptions indicesOptions
Index storage options for the GPU.
bool verbose
Set verbose options on the index.
long reserveVecs
reserve vectors in the invfiles?
int use_precomputed_table
if by_residual, build precompute tables
std::vector< float > centroids
Centroid table, size M * ksub * dsub.