10 #include "GpuAutoTune.h"
14 #include "../FaissAssert.h"
15 #include "../index_io.h"
16 #include "../IndexFlat.h"
17 #include "../IndexIVF.h"
18 #include "../IndexIVFPQ.h"
19 #include "../VectorTransform.h"
20 #include "../MetaIndexes.h"
21 #include "GpuIndexFlat.h"
22 #include "GpuIndexIVFFlat.h"
23 #include "GpuIndexIVFPQ.h"
24 #include "IndexProxy.h"
26 namespace faiss {
namespace gpu {
35 Index *clone_Index(
const Index *index)
override {
36 if(
auto ifl = dynamic_cast<const GpuIndexFlat *>(index)) {
40 }
else if(
auto ifl = dynamic_cast<const GpuIndexIVFFlat *>(index)) {
44 }
else if(
auto ipq = dynamic_cast<const GpuIndexIVFPQ *>(index)) {
49 return Cloner::clone_Index(index);
57 return cl.clone_Index(gpu_index);
62 GpuClonerOptions::GpuClonerOptions():
63 indicesOptions(INDICES_64_BIT),
64 useFloat16CoarseQuantizer(false),
80 Index *clone_Index(
const Index *index)
override {
81 if(
auto ifl = dynamic_cast<const IndexFlat *>(index)) {
83 }
else if(
auto ifl = dynamic_cast<const faiss::IndexIVFFlat *>(index)) {
97 }
else if(
auto ipq = dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
99 printf(
" IndexIVFPQ size %ld -> GpuIndexIVFPQ indicesOptions=%d "
100 "usePrecomputed=%d useFloat16=%d reserveVecs=%ld\n",
111 return Cloner::clone_Index(index);
124 ToGpuCloner cl(resources, device, options ? *options : defaults);
125 return cl.clone_Index(index);
128 GpuMultipleClonerOptions::GpuMultipleClonerOptions(): shard(false)
132 std::vector<ToGpuCloner> sub_cloners;
135 std::vector<int>& devices,
139 FAISS_ASSERT(resources.size() == devices.size());
140 for(
int i = 0; i < resources.size(); i++) {
142 resources[i], devices[i], options));
150 sub_cloners(sub_cloners)
154 Index *clone_Index(
const Index *index)
override {
155 long n = sub_cloners.size();
158 return sub_cloners[0].clone_Index(index);
160 if(dynamic_cast<const IndexFlat *>(index) ||
161 dynamic_cast<const faiss::IndexIVFFlat *>(index) ||
162 dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
165 for(
auto & sub_cloner: sub_cloners) {
166 res->
addIndex(sub_cloner.clone_Index(index));
168 res->own_fields =
true;
175 FAISS_ASSERT (index_ivfpq || index_ivfflat ||
176 !
"IndexShards implemented only for "
177 "IndexIVFFlat or IndexIVFPQ");
178 std::vector<faiss::Index*> shards(n);
180 for(
long i = 0; i < n; i++) {
182 long i0 = i * index->
ntotal / n;
183 long i1 = (i + 1) * index->
ntotal / n;
185 printf(
"IndexShards shard %ld indices %ld:%ld\n",
189 sub_cloners[i].reserveVecs =
194 index_ivfpq->quantizer, index_ivfpq->d,
195 index_ivfpq->nlist, index_ivfpq->code_size,
196 index_ivfpq->pq.nbits);
197 idx2.
pq = index_ivfpq->pq;
200 index_ivfpq->copy_subset_to(idx2, 0, i0, i1);
201 shards[i] = sub_cloners[i].clone_Index(&idx2);
202 }
else if (index_ivfflat) {
204 index_ivfflat->quantizer, index->
d,
205 index_ivfflat->nlist, index_ivfflat->metric_type);
206 index_ivfflat->copy_subset_to(idx2, 0, i0, i1);
207 shards[i] = sub_cloners[i].clone_Index(&idx2);
213 for (
int i = 0; i < n; i++) {
214 res->add_shard(shards[i]);
216 res->own_fields =
true;
220 }
else if(
auto miq = dynamic_cast<const MultiIndexQuantizer *>(index)) {
222 printf(
"cloning MultiIndexQuantizer: "
223 "will be valid only for search k=1\n");
227 splitv->own_fields =
true;
229 for (
int m = 0; m < pq.
M; m++) {
232 long i0 = m * n / pq.
M;
233 long i1 = pq.
M <= n ? (m + 1) * n / pq.
M : i0 + 1;
234 std::vector<ToGpuCloner> sub_cloners_2;
235 sub_cloners_2.insert(
236 sub_cloners_2.begin(), sub_cloners.begin() + i0,
237 sub_cloners.begin() + i1);
241 Index *idx2 = cm.clone_Index(&idxc);
242 splitv->add_sub_index(idx2);
246 return Cloner::clone_Index(index);
256 std::vector<GpuResources*> & resources,
257 std::vector<int> &devices,
263 return cl.clone_Index(index);
272 #define DC(classname) auto ix = dynamic_cast<const classname *>(index)
281 if (ix->count() == 0)
return;
285 if (ix->shard_indexes.size() == 0)
return;
286 index = ix->shard_indexes[0];
290 for (
int i = 0; i < 12; i++) {
291 size_t nprobe = 1 << i;
292 if (nprobe >= ix->getNumLists() ||
293 nprobe > 1024)
break;
294 pr.values.push_back (nprobe);
304 #define DC(classname) auto *ix = dynamic_cast<classname *>(index)
309 Index * index,
const std::string & name,
double val)
const
315 for (
int i = 0; i < ix->count(); i++)
320 for (
auto sub_index : ix->shard_indexes)
324 if (name ==
"nprobe") {
327 ix->setNumProbes (
int (val));
330 FAISS_ASSERT (!
"unknown parameter");
bool shard
shard rather than copying to each GPU
void initialize(const faiss::Index *index) override
initialize with reasonable parameters for the index
set some options on how to copy to GPU
void copyFrom(const faiss::IndexIVFFlat *index)
size_t dsub
dimensionality of each subvector
bool usePrecomputed
use precomputed tables?
ParameterRange & add_range(const char *name)
add a new parameter
void set_index_parameter(faiss::Index *index, const std::string &name, double val) const override
set a combination of parameters on an index
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
IndicesOptions indicesOptions
how should indices be stored on GpuIndexIVFPQ?
size_t ksub
number of centroids for each subquantizer
bool useFloat16CoarseQuantizer
is the coarse quantizer in float16?
idx_t ntotal
total nb of indexed vectors
virtual void add(idx_t n, const float *x) override
void addIndex(faiss::Index *index)
ProductQuantizer pq
produces the codes
size_t M
number of subquantizers
void setPrecomputedCodes(bool enable)
Enable or disable pre-computed codes.
bool is_trained
set if the Index does not require training, or if training is done already
possible values of a parameter, sorted from least to most expensive/accurate
size_t d
size of the input vectors
long reserveVecs
reserve vectors in the invfiles?
int use_precomputed_table
if by_residual, build precompute tables
std::vector< float > centroids
Centroid table, size M * ksub * dsub.