10 #include "GpuAutoTune.h"
14 #include "../FaissAssert.h"
15 #include "../index_io.h"
16 #include "../IndexFlat.h"
17 #include "../IndexIVF.h"
18 #include "../IndexIVFPQ.h"
19 #include "../VectorTransform.h"
20 #include "../MetaIndexes.h"
21 #include "GpuIndexFlat.h"
22 #include "GpuIndexIVFFlat.h"
23 #include "GpuIndexIVFPQ.h"
24 #include "IndexProxy.h"
26 namespace faiss {
namespace gpu {
35 Index *clone_Index(
const Index *index)
override {
36 if(
auto ifl = dynamic_cast<const GpuIndexFlat *>(index)) {
40 }
else if(
auto ifl = dynamic_cast<const GpuIndexIVFFlat *>(index)) {
44 }
else if(
auto ipq = dynamic_cast<const GpuIndexIVFPQ *>(index)) {
49 return Cloner::clone_Index(index);
57 return cl.clone_Index(gpu_index);
62 GpuClonerOptions::GpuClonerOptions():
63 indicesOptions(INDICES_64_BIT),
64 useFloat16CoarseQuantizer(false),
68 storeTransposed(false),
81 Index *clone_Index(
const Index *index)
override {
82 if(
auto ifl = dynamic_cast<const IndexFlat *>(index)) {
84 config.device = device;
89 }
else if(
auto ifl = dynamic_cast<const faiss::IndexIVFFlat *>(index)) {
103 }
else if(
auto ipq = dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
105 printf(
" IndexIVFPQ size %ld -> GpuIndexIVFPQ indicesOptions=%d "
106 "usePrecomputed=%d useFloat16=%d reserveVecs=%ld\n",
117 return Cloner::clone_Index(index);
130 ToGpuCloner cl(resources, device, options ? *options : defaults);
131 return cl.clone_Index(index);
134 GpuMultipleClonerOptions::GpuMultipleClonerOptions(): shard(false)
138 std::vector<ToGpuCloner> sub_cloners;
141 std::vector<int>& devices,
145 FAISS_ASSERT(resources.size() == devices.size());
146 for(
int i = 0; i < resources.size(); i++) {
148 resources[i], devices[i], options));
156 sub_cloners(sub_cloners)
160 Index *clone_Index(
const Index *index)
override {
161 long n = sub_cloners.size();
164 return sub_cloners[0].clone_Index(index);
166 if(dynamic_cast<const IndexFlat *>(index) ||
167 dynamic_cast<const faiss::IndexIVFFlat *>(index) ||
168 dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
171 for(
auto & sub_cloner: sub_cloners) {
172 res->
addIndex(sub_cloner.clone_Index(index));
174 res->own_fields =
true;
181 FAISS_ASSERT (index_ivfpq || index_ivfflat ||
182 !
"IndexShards implemented only for "
183 "IndexIVFFlat or IndexIVFPQ");
184 std::vector<faiss::Index*> shards(n);
186 for(
long i = 0; i < n; i++) {
188 long i0 = i * index->
ntotal / n;
189 long i1 = (i + 1) * index->
ntotal / n;
191 printf(
"IndexShards shard %ld indices %ld:%ld\n",
195 sub_cloners[i].reserveVecs =
200 index_ivfpq->quantizer, index_ivfpq->d,
201 index_ivfpq->nlist, index_ivfpq->code_size,
202 index_ivfpq->pq.nbits);
203 idx2.
pq = index_ivfpq->pq;
206 index_ivfpq->copy_subset_to(idx2, 0, i0, i1);
207 shards[i] = sub_cloners[i].clone_Index(&idx2);
208 }
else if (index_ivfflat) {
210 index_ivfflat->quantizer, index->
d,
211 index_ivfflat->nlist, index_ivfflat->metric_type);
212 index_ivfflat->copy_subset_to(idx2, 0, i0, i1);
213 shards[i] = sub_cloners[i].clone_Index(&idx2);
219 for (
int i = 0; i < n; i++) {
220 res->add_shard(shards[i]);
222 res->own_fields =
true;
226 }
else if(
auto miq = dynamic_cast<const MultiIndexQuantizer *>(index)) {
228 printf(
"cloning MultiIndexQuantizer: "
229 "will be valid only for search k=1\n");
233 splitv->own_fields =
true;
235 for (
int m = 0; m < pq.
M; m++) {
238 long i0 = m * n / pq.
M;
239 long i1 = pq.
M <= n ? (m + 1) * n / pq.
M : i0 + 1;
240 std::vector<ToGpuCloner> sub_cloners_2;
241 sub_cloners_2.insert(
242 sub_cloners_2.begin(), sub_cloners.begin() + i0,
243 sub_cloners.begin() + i1);
247 Index *idx2 = cm.clone_Index(&idxc);
248 splitv->add_sub_index(idx2);
252 return Cloner::clone_Index(index);
262 std::vector<GpuResources*> & resources,
263 std::vector<int> &devices,
269 return cl.clone_Index(index);
278 #define DC(classname) auto ix = dynamic_cast<const classname *>(index)
287 if (ix->count() == 0)
return;
291 if (ix->shard_indexes.size() == 0)
return;
292 index = ix->shard_indexes[0];
296 for (
int i = 0; i < 12; i++) {
297 size_t nprobe = 1 << i;
298 if (nprobe >= ix->getNumLists() ||
299 nprobe > 1024)
break;
300 pr.values.push_back (nprobe);
310 #define DC(classname) auto *ix = dynamic_cast<classname *>(index)
315 Index * index,
const std::string & name,
double val)
const
321 for (
int i = 0; i < ix->count(); i++)
326 for (
auto sub_index : ix->shard_indexes)
330 if (name ==
"nprobe") {
333 ix->setNumProbes (
int (val));
336 FAISS_ASSERT (!
"unknown parameter");
bool shard
shard rather than copying to each GPU
bool storeTransposed
For GpuIndexFlat, store data in transposed layout?
void initialize(const faiss::Index *index) override
initialize with reasonable parameters for the index
set some options on how to copy to GPU
void copyFrom(const faiss::IndexIVFFlat *index)
size_t dsub
dimensionality of each subvector
bool usePrecomputed
use precomputed tables?
ParameterRange & add_range(const char *name)
add a new parameter
void set_index_parameter(faiss::Index *index, const std::string &name, double val) const override
set a combination of parameters on an index
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
IndicesOptions indicesOptions
how should indices be stored on GpuIndexIVFPQ?
size_t ksub
number of centroids for each subquantizer
bool useFloat16CoarseQuantizer
is the coarse quantizer in float16?
idx_t ntotal
total nb of indexed vectors
virtual void add(idx_t n, const float *x) override
void addIndex(faiss::Index *index)
ProductQuantizer pq
produces the codes
size_t M
number of subquantizers
void setPrecomputedCodes(bool enable)
Enable or disable pre-computed codes.
bool is_trained
set if the Index does not require training, or if training is done already
possible values of a parameter, sorted from least to most expensive/accurate
size_t d
size of the input vectors
long reserveVecs
reserve vectors in the invfiles?
int use_precomputed_table
if by_residual, build precompute tables
std::vector< float > centroids
Centroid table, size M * ksub * dsub.