Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
GpuAutoTune.cpp
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #include "GpuAutoTune.h"
9 #include <typeinfo>
10 
11 #include "GpuIndex.h"
12 #include "../FaissAssert.h"
13 #include "../index_io.h"
14 #include "../IndexFlat.h"
15 #include "../IndexIVF.h"
16 #include "../IndexIVFFlat.h"
17 #include "../IndexIVFPQ.h"
18 #include "../IndexReplicas.h"
19 #include "../VectorTransform.h"
20 #include "../MetaIndexes.h"
21 #include "GpuIndexFlat.h"
22 #include "GpuIndexIVFFlat.h"
23 #include "GpuIndexIVFPQ.h"
24 #include "utils/DeviceUtils.h"
25 
26 namespace faiss { namespace gpu {
27 
28 /**********************************************************
29  * Cloning from/to GPU
30  **********************************************************/
31 
32 
34 
35  void merge_index(Index *dst, Index *src, bool successive_ids) {
36  if (auto ifl = dynamic_cast<IndexFlat *>(dst)) {
37  auto ifl2 = dynamic_cast<const IndexFlat *>(src);
38  FAISS_ASSERT(ifl2);
39  FAISS_ASSERT(successive_ids);
40  ifl->add(ifl2->ntotal, ifl2->xb.data());
41  } else if(auto ifl = dynamic_cast<IndexIVFFlat *>(dst)) {
42  auto ifl2 = dynamic_cast<IndexIVFFlat *>(src);
43  FAISS_ASSERT(ifl2);
44  ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
45  } else if(auto ifl = dynamic_cast<IndexIVFPQ *>(dst)) {
46  auto ifl2 = dynamic_cast<IndexIVFPQ *>(src);
47  FAISS_ASSERT(ifl2);
48  ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
49  } else {
50  FAISS_ASSERT(!"merging not implemented for this type of class");
51  }
52  }
53 
54 
55  Index *clone_Index(const Index *index) override {
56  if(auto ifl = dynamic_cast<const GpuIndexFlat *>(index)) {
57  IndexFlat *res = new IndexFlat();
58  ifl->copyTo(res);
59  return res;
60  } else if(auto ifl = dynamic_cast<const GpuIndexIVFFlat *>(index)) {
61  IndexIVFFlat *res = new IndexIVFFlat();
62  ifl->copyTo(res);
63  return res;
64  } else if(auto ipq = dynamic_cast<const GpuIndexIVFPQ *>(index)) {
65  IndexIVFPQ *res = new IndexIVFPQ();
66  ipq->copyTo(res);
67  return res;
68 
69  // for IndexShards and IndexReplicas we assume that the
70  // objective is to make a single component out of them
71  // (inverse op of ToGpuClonerMultiple)
72 
73  } else if(auto ish = dynamic_cast<const IndexShards *>(index)) {
74  int nshard = ish->count();
75  FAISS_ASSERT(nshard > 0);
76  Index *res = clone_Index(ish->at(0));
77  for(int i = 1; i < ish->count(); i++) {
78  Index *res_i = clone_Index(ish->at(i));
79  merge_index(res, res_i, ish->successive_ids);
80  delete res_i;
81  }
82  return res;
83  } else if(auto ipr = dynamic_cast<const IndexReplicas *>(index)) {
84  // just clone one of the replicas
85  FAISS_ASSERT(ipr->count() > 0);
86  return clone_Index(ipr->at(0));
87  } else {
88  return Cloner::clone_Index(index);
89  }
90  }
91 };
92 
93 faiss::Index * index_gpu_to_cpu(const faiss::Index *gpu_index)
94 {
95  ToCPUCloner cl;
96  return cl.clone_Index(gpu_index);
97 }
98 
99 
100 
102  GpuResources *resources;
103  int device;
104 
105  ToGpuCloner(GpuResources *resources, int device,
106  const GpuClonerOptions &options):
107  GpuClonerOptions(options), resources(resources), device(device)
108  {}
109 
110  Index *clone_Index(const Index *index) override {
111  if(auto ifl = dynamic_cast<const IndexFlat *>(index)) {
112  GpuIndexFlatConfig config;
113  config.device = device;
114  config.useFloat16 = useFloat16;
116 
117  return new GpuIndexFlat(resources, ifl, config);
118  } else if(auto ifl = dynamic_cast<const faiss::IndexIVFFlat *>(index)) {
119  GpuIndexIVFFlatConfig config;
120  config.device = device;
125 
126  GpuIndexIVFFlat *res =
127  new GpuIndexIVFFlat(resources,
128  ifl->d,
129  ifl->nlist,
130  ifl->metric_type,
131  config);
132  if(reserveVecs > 0 && ifl->ntotal == 0) {
134  }
135 
136  res->copyFrom(ifl);
137  return res;
138  } else if(auto ipq = dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
139  if(verbose)
140  printf(" IndexIVFPQ size %ld -> GpuIndexIVFPQ "
141  "indicesOptions=%d "
142  "usePrecomputed=%d useFloat16=%d reserveVecs=%ld\n",
143  ipq->ntotal, indicesOptions, usePrecomputed,
145  GpuIndexIVFPQConfig config;
146  config.device = device;
152 
153  GpuIndexIVFPQ *res = new GpuIndexIVFPQ(resources, ipq, config);
154 
155  if(reserveVecs > 0 && ipq->ntotal == 0) {
157  }
158 
159  return res;
160  } else {
161  return Cloner::clone_Index(index);
162  }
163  }
164 
165 };
166 
167 
168 faiss::Index * index_cpu_to_gpu(
169  GpuResources* resources, int device,
170  const faiss::Index *index,
171  const GpuClonerOptions *options)
172 {
173  GpuClonerOptions defaults;
174  ToGpuCloner cl(resources, device, options ? *options : defaults);
175  return cl.clone_Index(index);
176 }
177 
179  std::vector<ToGpuCloner> sub_cloners;
180 
181  ToGpuClonerMultiple(std::vector<GpuResources *> & resources,
182  std::vector<int>& devices,
183  const GpuMultipleClonerOptions &options):
184  GpuMultipleClonerOptions(options)
185  {
186  FAISS_ASSERT(resources.size() == devices.size());
187  for(int i = 0; i < resources.size(); i++) {
188  sub_cloners.push_back(ToGpuCloner(
189  resources[i], devices[i], options));
190  }
191  }
192 
193 
194  ToGpuClonerMultiple(const std::vector<ToGpuCloner> & sub_cloners,
195  const GpuMultipleClonerOptions &options):
196  GpuMultipleClonerOptions(options),
197  sub_cloners(sub_cloners)
198  {}
199 
200 
201  void copy_ivf_shard (const IndexIVF *index_ivf, IndexIVF *idx2,
202  long n, long i) {
203  if (shard_type == 2) {
204  long i0 = i * index_ivf->ntotal / n;
205  long i1 = (i + 1) * index_ivf->ntotal / n;
206 
207  if(verbose)
208  printf("IndexShards shard %ld indices %ld:%ld\n",
209  i, i0, i1);
210  index_ivf->copy_subset_to(*idx2, 2, i0, i1);
211  FAISS_ASSERT(idx2->ntotal == i1 - i0);
212  } else if (shard_type == 1) {
213  if(verbose)
214  printf("IndexShards shard %ld select modulo %ld = %ld\n",
215  i, n, i);
216  index_ivf->copy_subset_to(*idx2, 1, n, i);
217  } else {
218  FAISS_THROW_FMT ("shard_type %d not implemented", shard_type);
219  }
220 
221  }
222 
223  Index * clone_Index_to_shards (const Index *index) {
224  long n = sub_cloners.size();
225 
226  auto index_ivfpq =
227  dynamic_cast<const faiss::IndexIVFPQ *>(index);
228  auto index_ivfflat =
229  dynamic_cast<const faiss::IndexIVFFlat *>(index);
230  auto index_flat =
231  dynamic_cast<const faiss::IndexFlat *>(index);
232  FAISS_THROW_IF_NOT_MSG (
233  index_ivfpq || index_ivfflat || index_flat,
234  "IndexShards implemented only for "
235  "IndexIVFFlat, IndexFlat and IndexIVFPQ");
236 
237  std::vector<faiss::Index*> shards(n);
238 
239  for(long i = 0; i < n; i++) {
240  // make a shallow copy
241  if(reserveVecs)
242  sub_cloners[i].reserveVecs =
243  (reserveVecs + n - 1) / n;
244 
245  if (index_ivfpq) {
246  faiss::IndexIVFPQ idx2(
247  index_ivfpq->quantizer, index_ivfpq->d,
248  index_ivfpq->nlist, index_ivfpq->code_size,
249  index_ivfpq->pq.nbits);
250  idx2.metric_type = index_ivfpq->metric_type;
251  idx2.pq = index_ivfpq->pq;
252  idx2.nprobe = index_ivfpq->nprobe;
253  idx2.use_precomputed_table = 0;
254  idx2.is_trained = index->is_trained;
255  copy_ivf_shard (index_ivfpq, &idx2, n, i);
256  shards[i] = sub_cloners[i].clone_Index(&idx2);
257  } else if (index_ivfflat) {
258  faiss::IndexIVFFlat idx2(
259  index_ivfflat->quantizer, index->d,
260  index_ivfflat->nlist, index_ivfflat->metric_type);
261  idx2.nprobe = index_ivfflat->nprobe;
262  copy_ivf_shard (index_ivfflat, &idx2, n, i);
263  shards[i] = sub_cloners[i].clone_Index(&idx2);
264  } else if (index_flat) {
265  faiss::IndexFlat idx2 (
266  index->d, index->metric_type);
267  shards[i] = sub_cloners[i].clone_Index(&idx2);
268  if (index->ntotal > 0) {
269  long i0 = index->ntotal * i / n;
270  long i1 = index->ntotal * (i + 1) / n;
271  shards[i]->add (
272  i1 - i0,
273  index_flat->xb.data() + i0 * index->d);
274  }
275  }
276  }
277 
278  bool successive_ids = index_flat != nullptr;
279  faiss::IndexShards *res =
280  new faiss::IndexShards(index->d, true,
281  successive_ids);
282 
283  for (int i = 0; i < n; i++) {
284  res->add_shard(shards[i]);
285  }
286  res->own_fields = true;
287  FAISS_ASSERT(index->ntotal == res->ntotal);
288  return res;
289  }
290 
291  Index *clone_Index(const Index *index) override {
292  long n = sub_cloners.size();
293  if (n == 1)
294  return sub_cloners[0].clone_Index(index);
295 
296  if(dynamic_cast<const IndexFlat *>(index) ||
297  dynamic_cast<const faiss::IndexIVFFlat *>(index) ||
298  dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
299  if(!shard) {
300  IndexReplicas * res = new IndexReplicas();
301  for(auto & sub_cloner: sub_cloners) {
302  res->addIndex(sub_cloner.clone_Index(index));
303  }
304  res->own_fields = true;
305  return res;
306  } else {
307  return clone_Index_to_shards (index);
308  }
309  } else if(auto miq = dynamic_cast<const MultiIndexQuantizer *>(index)) {
310  if (verbose) {
311  printf("cloning MultiIndexQuantizer: "
312  "will be valid only for search k=1\n");
313  }
314  const ProductQuantizer & pq = miq->pq;
315  IndexSplitVectors *splitv = new IndexSplitVectors(pq.d, true);
316  splitv->own_fields = true;
317 
318  for (int m = 0; m < pq.M; m++) {
319  // which GPU(s) will be assigned to this sub-quantizer
320 
321  long i0 = m * n / pq.M;
322  long i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1;
323  std::vector<ToGpuCloner> sub_cloners_2;
324  sub_cloners_2.insert(
325  sub_cloners_2.begin(), sub_cloners.begin() + i0,
326  sub_cloners.begin() + i1);
327  ToGpuClonerMultiple cm(sub_cloners_2, *this);
328  IndexFlatL2 idxc (pq.dsub);
329  idxc.add (pq.ksub, pq.centroids.data() + m * pq.d * pq.ksub);
330  Index *idx2 = cm.clone_Index(&idxc);
331  splitv->add_sub_index(idx2);
332  }
333  return splitv;
334  } else {
335  return Cloner::clone_Index(index);
336  }
337  }
338 
339 
340 };
341 
342 
343 
344 faiss::Index * index_cpu_to_gpu_multiple(
345  std::vector<GpuResources*> & resources,
346  std::vector<int> &devices,
347  const faiss::Index *index,
348  const GpuMultipleClonerOptions *options)
349 {
350  GpuMultipleClonerOptions defaults;
351  ToGpuClonerMultiple cl(resources, devices, options ? *options : defaults);
352  return cl.clone_Index(index);
353 }
354 
355 
356 
357 /**********************************************************
358  * Parameters to auto-tune on GpuIndex'es
359  **********************************************************/
360 
361 #define DC(classname) auto ix = dynamic_cast<const classname *>(index)
362 
363 
365 {
366  if (DC (IndexPreTransform)) {
367  index = ix->index;
368  }
369  if (DC (IndexReplicas)) {
370  if (ix->count() == 0) return;
371  index = ix->at(0);
372  }
373  if (DC (IndexShards)) {
374  if (ix->count() == 0) return;
375  index = ix->at(0);
376  }
377  if (DC (GpuIndexIVF)) {
378  ParameterRange & pr = add_range("nprobe");
379  for (int i = 0; i < 12; i++) {
380  size_t nprobe = 1 << i;
381  if (nprobe >= ix->getNumLists() ||
382  nprobe > getMaxKSelection()) break;
383  pr.values.push_back (nprobe);
384  }
385  }
386  // not sure we should call the parent initializer
387 }
388 
389 
390 
391 #undef DC
392 // non-const version
393 #define DC(classname) auto *ix = dynamic_cast<classname *>(index)
394 
395 
396 
398  Index * index, const std::string & name, double val) const
399 {
400  if (DC (IndexReplicas)) {
401  for (int i = 0; i < ix->count(); i++)
402  set_index_parameter (ix->at(i), name, val);
403  return;
404  }
405  if (name == "nprobe") {
406  if (DC (GpuIndexIVF)) {
407  ix->setNumProbes (int (val));
408  return;
409  }
410  }
411  if (name == "use_precomputed_table") {
412  if (DC (GpuIndexIVFPQ)) {
413  ix->setPrecomputedCodes(bool (val));
414  return;
415  }
416  }
417 
418  // maybe normal index parameters apply?
419  ParameterSpace::set_index_parameter (index, name, val);
420 }
421 
422 
423 
424 
425 } } // namespace
bool storeTransposed
For GpuIndexFlat, store data in transposed layout?
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:97
void initialize(const faiss::Index *index) override
initialize with reasonable parameters for the index
set some options on how to copy to GPU
void copyFrom(const faiss::IndexIVFFlat *index)
size_t dsub
dimensionality of each subvector
bool usePrecomputed
use precomputed tables?
ParameterRange & add_range(const char *name)
add a new parameter (or return it if it exists)
Definition: AutoTune.cpp:333
bool useFloat16
Whether or not data is stored as float16.
Definition: GpuIndexFlat.h:33
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:25
int d
vector dimension
Definition: Index.h:66
virtual void copy_subset_to(IndexIVF &other, int subset_type, idx_t a1, idx_t a2) const
Definition: IndexIVF.cpp:748
GpuIndexFlatConfig flatConfig
Configuration for the coarse quantizer object.
Definition: GpuIndexIVF.h:32
void set_index_parameter(faiss::Index *index, const std::string &name, double val) const override
set a combination of parameters on an index
bool own_fields
Whether or not we are responsible for deleting our contained indices.
Definition: ThreadedIndex.h:59
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
void addIndex(IndexT *index)
size_t ksub
number of centroids for each subquantizer
int shard_type
IndexIVF::copy_subset_to subset type.
bool useFloat16CoarseQuantizer
is the coarse quantizer in float16?
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
void add_shard(IndexT *index)
Alias for addIndex()
Definition: IndexShards.h:56
void add(idx_t n, const float *x) override
Definition: IndexFlat.cpp:30
IVFPQ index for the GPU.
Definition: GpuIndexIVFPQ.h:38
virtual void set_index_parameter(Index *index, const std::string &name, double val) const
set one of the parameters
Definition: AutoTune.cpp:455
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:74
ProductQuantizer pq
produces the codes
Definition: IndexIVFPQ.h:37
size_t M
number of subquantizers
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:71
possible values of a parameter, sorted from least to most expensive/accurate
Definition: AutoTune.h:125
size_t d
size of the input vectors
IndicesOptions indicesOptions
Index storage options for the GPU.
Definition: GpuIndexIVF.h:29
bool verbose
Set verbose options on the index.
long reserveVecs
reserve vectors in the invfiles?
int use_precomputed_table
if by_residual, build precompute tables
Definition: IndexIVFPQ.h:54
std::vector< float > centroids
Centroid table, size M * ksub * dsub.