8 #include "GpuAutoTune.h"
12 #include "../FaissAssert.h"
13 #include "../index_io.h"
14 #include "../IndexFlat.h"
15 #include "../IndexIVF.h"
16 #include "../IndexIVFFlat.h"
17 #include "../IndexIVFPQ.h"
18 #include "../IndexReplicas.h"
19 #include "../VectorTransform.h"
20 #include "../MetaIndexes.h"
21 #include "GpuIndexFlat.h"
22 #include "GpuIndexIVFFlat.h"
23 #include "GpuIndexIVFPQ.h"
24 #include "utils/DeviceUtils.h"
26 namespace faiss {
namespace gpu {
35 void merge_index(
Index *dst,
Index *src,
bool successive_ids) {
36 if (
auto ifl = dynamic_cast<IndexFlat *>(dst)) {
37 auto ifl2 =
dynamic_cast<const IndexFlat *
>(src);
39 FAISS_ASSERT(successive_ids);
40 ifl->add(ifl2->ntotal, ifl2->xb.data());
41 }
else if(
auto ifl = dynamic_cast<IndexIVFFlat *>(dst)) {
44 ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
45 }
else if(
auto ifl = dynamic_cast<IndexIVFPQ *>(dst)) {
48 ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
50 FAISS_ASSERT(!
"merging not implemented for this type of class");
55 Index *clone_Index(
const Index *index)
override {
56 if(
auto ifl = dynamic_cast<const GpuIndexFlat *>(index)) {
60 }
else if(
auto ifl = dynamic_cast<const GpuIndexIVFFlat *>(index)) {
64 }
else if(
auto ipq = dynamic_cast<const GpuIndexIVFPQ *>(index)) {
73 }
else if(
auto ish = dynamic_cast<const IndexShards *>(index)) {
74 int nshard = ish->count();
75 FAISS_ASSERT(nshard > 0);
76 Index *res = clone_Index(ish->at(0));
77 for(
int i = 1; i < ish->count(); i++) {
78 Index *res_i = clone_Index(ish->at(i));
79 merge_index(res, res_i, ish->successive_ids);
83 }
else if(
auto ipr = dynamic_cast<const IndexReplicas *>(index)) {
85 FAISS_ASSERT(ipr->count() > 0);
86 return clone_Index(ipr->at(0));
88 return Cloner::clone_Index(index);
96 return cl.clone_Index(gpu_index);
110 Index *clone_Index(
const Index *index)
override {
111 if(
auto ifl = dynamic_cast<const IndexFlat *>(index)) {
118 }
else if(
auto ifl = dynamic_cast<const faiss::IndexIVFFlat *>(index)) {
138 }
else if(
auto ipq = dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
140 printf(
" IndexIVFPQ size %ld -> GpuIndexIVFPQ "
142 "usePrecomputed=%d useFloat16=%d reserveVecs=%ld\n",
161 return Cloner::clone_Index(index);
174 ToGpuCloner cl(resources, device, options ? *options : defaults);
175 return cl.clone_Index(index);
179 std::vector<ToGpuCloner> sub_cloners;
182 std::vector<int>& devices,
186 FAISS_ASSERT(resources.size() == devices.size());
187 for(
int i = 0; i < resources.size(); i++) {
189 resources[i], devices[i], options));
197 sub_cloners(sub_cloners)
204 long i0 = i * index_ivf->
ntotal / n;
205 long i1 = (i + 1) * index_ivf->
ntotal / n;
208 printf(
"IndexShards shard %ld indices %ld:%ld\n",
211 FAISS_ASSERT(idx2->
ntotal == i1 - i0);
214 printf(
"IndexShards shard %ld select modulo %ld = %ld\n",
218 FAISS_THROW_FMT (
"shard_type %d not implemented",
shard_type);
223 Index * clone_Index_to_shards (
const Index *index) {
224 long n = sub_cloners.size();
232 FAISS_THROW_IF_NOT_MSG (
233 index_ivfpq || index_ivfflat || index_flat,
234 "IndexShards implemented only for "
235 "IndexIVFFlat, IndexFlat and IndexIVFPQ");
237 std::vector<faiss::Index*> shards(n);
239 for(
long i = 0; i < n; i++) {
242 sub_cloners[i].reserveVecs =
247 index_ivfpq->quantizer, index_ivfpq->d,
248 index_ivfpq->nlist, index_ivfpq->code_size,
249 index_ivfpq->pq.nbits);
251 idx2.
pq = index_ivfpq->pq;
252 idx2.
nprobe = index_ivfpq->nprobe;
255 copy_ivf_shard (index_ivfpq, &idx2, n, i);
256 shards[i] = sub_cloners[i].clone_Index(&idx2);
257 }
else if (index_ivfflat) {
259 index_ivfflat->quantizer, index->
d,
260 index_ivfflat->nlist, index_ivfflat->metric_type);
261 idx2.
nprobe = index_ivfflat->nprobe;
262 copy_ivf_shard (index_ivfflat, &idx2, n, i);
263 shards[i] = sub_cloners[i].clone_Index(&idx2);
264 }
else if (index_flat) {
267 shards[i] = sub_cloners[i].clone_Index(&idx2);
269 long i0 = index->
ntotal * i / n;
270 long i1 = index->
ntotal * (i + 1) / n;
273 index_flat->xb.data() + i0 * index->
d);
278 bool successive_ids = index_flat !=
nullptr;
283 for (
int i = 0; i < n; i++) {
287 FAISS_ASSERT(index->
ntotal == res->ntotal);
291 Index *clone_Index(
const Index *index)
override {
292 long n = sub_cloners.size();
294 return sub_cloners[0].clone_Index(index);
296 if(dynamic_cast<const IndexFlat *>(index) ||
297 dynamic_cast<const faiss::IndexIVFFlat *>(index) ||
298 dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
301 for(
auto & sub_cloner: sub_cloners) {
302 res->
addIndex(sub_cloner.clone_Index(index));
307 return clone_Index_to_shards (index);
309 }
else if(
auto miq = dynamic_cast<const MultiIndexQuantizer *>(index)) {
311 printf(
"cloning MultiIndexQuantizer: "
312 "will be valid only for search k=1\n");
316 splitv->own_fields =
true;
318 for (
int m = 0; m < pq.
M; m++) {
321 long i0 = m * n / pq.
M;
322 long i1 = pq.
M <= n ? (m + 1) * n / pq.
M : i0 + 1;
323 std::vector<ToGpuCloner> sub_cloners_2;
324 sub_cloners_2.insert(
325 sub_cloners_2.begin(), sub_cloners.begin() + i0,
326 sub_cloners.begin() + i1);
330 Index *idx2 = cm.clone_Index(&idxc);
331 splitv->add_sub_index(idx2);
335 return Cloner::clone_Index(index);
345 std::vector<GpuResources*> & resources,
346 std::vector<int> &devices,
352 return cl.clone_Index(index);
361 #define DC(classname) auto ix = dynamic_cast<const classname *>(index)
370 if (ix->count() == 0)
return;
374 if (ix->count() == 0)
return;
379 for (
int i = 0; i < 12; i++) {
380 size_t nprobe = 1 << i;
381 if (nprobe >= ix->getNumLists() ||
382 nprobe > getMaxKSelection())
break;
383 pr.values.push_back (nprobe);
393 #define DC(classname) auto *ix = dynamic_cast<classname *>(index)
398 Index * index,
const std::string & name,
double val)
const
401 for (
int i = 0; i < ix->count(); i++)
405 if (name ==
"nprobe") {
407 ix->setNumProbes (
int (val));
411 if (name ==
"use_precomputed_table") {
413 ix->setPrecomputedCodes(
bool (val));
bool usePrecomputedTables
bool storeTransposed
For GpuIndexFlat, store data in transposed layout?
size_t nprobe
number of probes at query time
void initialize(const faiss::Index *index) override
initialize with reasonable parameters for the index
set some options on how to copy to GPU
void copyFrom(const faiss::IndexIVFFlat *index)
size_t dsub
dimensionality of each subvector
bool usePrecomputed
use precomputed tables?
bool useFloat16IVFStorage
bool useFloat16LookupTables
ParameterRange & add_range(const char *name)
add a new parameter (or return it if it exists)
bool useFloat16
Whether or not data is stored as float16.
int device
GPU device on which the index is resident.
virtual void copy_subset_to(IndexIVF &other, int subset_type, idx_t a1, idx_t a2) const
GpuIndexFlatConfig flatConfig
Configuration for the coarse quantizer object.
void set_index_parameter(faiss::Index *index, const std::string &name, double val) const override
set a combination of parameters on an index
bool own_fields
Whether or not we are responsible for deleting our contained indices.
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
void addIndex(IndexT *index)
IndicesOptions indicesOptions
size_t ksub
number of centroids for each subquantizer
int shard_type
IndexIVF::copy_subset_to subset type.
bool useFloat16CoarseQuantizer
is the coarse quantizer in float16?
idx_t ntotal
total nb of indexed vectors
void add_shard(IndexT *index)
Alias for addIndex()
void add(idx_t n, const float *x) override
virtual void set_index_parameter(Index *index, const std::string &name, double val) const
set one of the parameters
MetricType metric_type
type of metric this index uses for search
ProductQuantizer pq
produces the codes
size_t M
number of subquantizers
bool is_trained
set if the Index does not require training, or if training is done already
possible values of a parameter, sorted from least to most expensive/accurate
size_t d
size of the input vectors
IndicesOptions indicesOptions
Index storage options for the GPU.
bool verbose
Set verbose options on the index.
long reserveVecs
reserve vectors in the invfiles?
int use_precomputed_table
if by_residual, build precompute tables
std::vector< float > centroids
Centroid table, size M * ksub * dsub.