Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
GpuAutoTune.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 #include "GpuAutoTune.h"
11 
12 #include "GpuIndex.h"
13 #include "../FaissAssert.h"
14 #include "../index_io.h"
15 #include "../IndexFlat.h"
16 #include "../IndexIVF.h"
17 #include "../IndexIVFPQ.h"
18 #include "../VectorTransform.h"
19 #include "../MetaIndexes.h"
20 #include "GpuIndexFlat.h"
21 #include "GpuIndexIVFFlat.h"
22 #include "GpuIndexIVFPQ.h"
23 #include "IndexProxy.h"
24 
25 namespace faiss { namespace gpu {
26 
27 /**********************************************************
28  * Cloning from/to GPU
29  **********************************************************/
30 
31 
33 
34  void merge_index(Index *dst, Index *src, bool successive_ids) {
35  if (auto ifl = dynamic_cast<IndexFlat *>(dst)) {
36  auto ifl2 = dynamic_cast<const IndexFlat *>(src);
37  FAISS_ASSERT(ifl2);
38  FAISS_ASSERT(successive_ids);
39  ifl->add(ifl2->ntotal, ifl2->xb.data());
40  } else if(auto ifl = dynamic_cast<IndexIVFFlat *>(dst)) {
41  auto ifl2 = dynamic_cast<IndexIVFFlat *>(src);
42  FAISS_ASSERT(ifl2);
43  ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
44  } else if(auto ifl = dynamic_cast<IndexIVFPQ *>(dst)) {
45  auto ifl2 = dynamic_cast<IndexIVFPQ *>(src);
46  FAISS_ASSERT(ifl2);
47  ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
48  } else {
49  FAISS_ASSERT(!"merging not implemented for this type of class");
50  }
51  }
52 
53 
54  Index *clone_Index(const Index *index) override {
55  if(auto ifl = dynamic_cast<const GpuIndexFlat *>(index)) {
56  IndexFlat *res = new IndexFlat();
57  ifl->copyTo(res);
58  return res;
59  } else if(auto ifl = dynamic_cast<const GpuIndexIVFFlat *>(index)) {
60  IndexIVFFlat *res = new IndexIVFFlat();
61  ifl->copyTo(res);
62  return res;
63  } else if(auto ipq = dynamic_cast<const GpuIndexIVFPQ *>(index)) {
64  IndexIVFPQ *res = new IndexIVFPQ();
65  ipq->copyTo(res);
66  return res;
67 
68  // for IndexShards and IndexProxy we assume that the
69  // objective is to make a single component out of them
70  // (inverse op of ToGpuClonerMultiple)
71 
72  } else if(auto ish = dynamic_cast<const IndexShards *>(index)) {
73  int nshard = ish->shard_indexes.size();
74  FAISS_ASSERT(nshard > 0);
75  Index *res = clone_Index(ish->shard_indexes[0]);
76  for(int i = 1; i < ish->shard_indexes.size(); i++) {
77  Index *res_i = clone_Index(ish->shard_indexes[i]);
78  merge_index(res, res_i, ish->successive_ids);
79  delete res_i;
80  }
81  return res;
82  } else if(auto ipr = dynamic_cast<const IndexProxy *>(index)) {
83  // just clone one of the replicas
84  FAISS_ASSERT(ipr->count() > 0);
85  return clone_Index(ipr->at(0));
86  } else {
87  return Cloner::clone_Index(index);
88  }
89  }
90 };
91 
92 faiss::Index * index_gpu_to_cpu(const faiss::Index *gpu_index)
93 {
94  ToCPUCloner cl;
95  return cl.clone_Index(gpu_index);
96 }
97 
98 
99 
100 GpuClonerOptions::GpuClonerOptions():
101  indicesOptions(INDICES_64_BIT),
102  useFloat16CoarseQuantizer(false),
103  useFloat16(false),
104  usePrecomputed(true),
105  reserveVecs(0),
106  storeTransposed(false),
107  verbose(0)
108 {}
109 
110 
112  GpuResources *resources;
113  int device;
114 
115  ToGpuCloner(GpuResources *resources, int device,
116  const GpuClonerOptions &options):
117  GpuClonerOptions(options), resources(resources), device(device)
118  {}
119 
120  Index *clone_Index(const Index *index) override {
121  if(auto ifl = dynamic_cast<const IndexFlat *>(index)) {
122  GpuIndexFlatConfig config;
123  config.device = device;
124  config.useFloat16 = useFloat16;
126 
127  return new GpuIndexFlat(resources, ifl, config);
128  } else if(auto ifl = dynamic_cast<const faiss::IndexIVFFlat *>(index)) {
129  GpuIndexIVFFlatConfig config;
130  config.device = device;
135 
136  GpuIndexIVFFlat *res =
137  new GpuIndexIVFFlat(resources,
138  ifl->d,
139  ifl->nlist,
140  ifl->metric_type,
141  config);
142  if(reserveVecs > 0 && ifl->ntotal == 0) {
144  }
145 
146  res->copyFrom(ifl);
147  return res;
148  } else if(auto ipq = dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
149  if(verbose)
150  printf(" IndexIVFPQ size %ld -> GpuIndexIVFPQ "
151  "indicesOptions=%d "
152  "usePrecomputed=%d useFloat16=%d reserveVecs=%ld\n",
153  ipq->ntotal, indicesOptions, usePrecomputed,
155  GpuIndexIVFPQConfig config;
156  config.device = device;
162 
163  GpuIndexIVFPQ *res = new GpuIndexIVFPQ(resources, ipq, config);
164 
165  if(reserveVecs > 0 && ipq->ntotal == 0) {
167  }
168 
169  return res;
170  } else {
171  return Cloner::clone_Index(index);
172  }
173  }
174 
175 };
176 
177 
178 faiss::Index * index_cpu_to_gpu(
179  GpuResources* resources, int device,
180  const faiss::Index *index,
181  const GpuClonerOptions *options)
182 {
183  GpuClonerOptions defaults;
184  ToGpuCloner cl(resources, device, options ? *options : defaults);
185  return cl.clone_Index(index);
186 }
187 
188 GpuMultipleClonerOptions::GpuMultipleClonerOptions(): shard(false)
189 {}
190 
192  std::vector<ToGpuCloner> sub_cloners;
193 
194  ToGpuClonerMultiple(std::vector<GpuResources *> & resources,
195  std::vector<int>& devices,
196  const GpuMultipleClonerOptions &options):
197  GpuMultipleClonerOptions(options)
198  {
199  FAISS_ASSERT(resources.size() == devices.size());
200  for(int i = 0; i < resources.size(); i++) {
201  sub_cloners.push_back(ToGpuCloner(
202  resources[i], devices[i], options));
203  }
204  }
205 
206 
207  ToGpuClonerMultiple(const std::vector<ToGpuCloner> & sub_cloners,
208  const GpuMultipleClonerOptions &options):
209  GpuMultipleClonerOptions(options),
210  sub_cloners(sub_cloners)
211  {}
212 
213 
214  Index *clone_Index(const Index *index) override {
215  long n = sub_cloners.size();
216 
217  if (n == 1)
218  return sub_cloners[0].clone_Index(index);
219 
220  if(dynamic_cast<const IndexFlat *>(index) ||
221  dynamic_cast<const faiss::IndexIVFFlat *>(index) ||
222  dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
223  if(!shard) {
224  IndexProxy * res = new IndexProxy();
225  for(auto & sub_cloner: sub_cloners) {
226  res->addIndex(sub_cloner.clone_Index(index));
227  }
228  res->own_fields = true;
229  return res;
230  } else {
231  auto index_ivfpq =
232  dynamic_cast<const faiss::IndexIVFPQ *>(index);
233  auto index_ivfflat =
234  dynamic_cast<const faiss::IndexIVFFlat *>(index);
235  FAISS_ASSERT_MSG (index_ivfpq || index_ivfflat,
236  "IndexShards implemented only for "
237  "IndexIVFFlat or IndexIVFPQ");
238  std::vector<faiss::Index*> shards(n);
239 
240  for(long i = 0; i < n; i++) {
241  // make a shallow copy
242  long i0 = i * index->ntotal / n;
243  long i1 = (i + 1) * index->ntotal / n;
244  if(verbose)
245  printf("IndexShards shard %ld indices %ld:%ld\n",
246  i, i0, i1);
247 
248  if(reserveVecs)
249  sub_cloners[i].reserveVecs =
250  (reserveVecs + n - 1) / n;
251 
252  if (index_ivfpq) {
253  faiss::IndexIVFPQ idx2(
254  index_ivfpq->quantizer, index_ivfpq->d,
255  index_ivfpq->nlist, index_ivfpq->code_size,
256  index_ivfpq->pq.nbits);
257  idx2.metric_type = index_ivfpq->metric_type;
258  idx2.pq = index_ivfpq->pq;
259  idx2.nprobe = index_ivfpq->nprobe;
260  idx2.use_precomputed_table = 0;
261  idx2.is_trained = index->is_trained;
262  index_ivfpq->copy_subset_to(idx2, 0, i0, i1);
263  shards[i] = sub_cloners[i].clone_Index(&idx2);
264  } else if (index_ivfflat) {
265  faiss::IndexIVFFlat idx2(
266  index_ivfflat->quantizer, index->d,
267  index_ivfflat->nlist, index_ivfflat->metric_type);
268  idx2.nprobe = index_ivfflat->nprobe;
269  index_ivfflat->copy_subset_to(idx2, 0, i0, i1);
270  idx2.nprobe = index_ivfflat->nprobe;
271  shards[i] = sub_cloners[i].clone_Index(&idx2);
272  }
273  }
274  faiss::IndexShards *res =
275  new faiss::IndexShards(index->d, true, false);
276 
277  for (int i = 0; i < n; i++) {
278  res->add_shard(shards[i]);
279  }
280  res->own_fields = true;
281  FAISS_ASSERT(index->ntotal == res->ntotal);
282  return res;
283  }
284  } else if(auto miq = dynamic_cast<const MultiIndexQuantizer *>(index)) {
285  if (verbose) {
286  printf("cloning MultiIndexQuantizer: "
287  "will be valid only for search k=1\n");
288  }
289  const ProductQuantizer & pq = miq->pq;
290  IndexSplitVectors *splitv = new IndexSplitVectors(pq.d, true);
291  splitv->own_fields = true;
292 
293  for (int m = 0; m < pq.M; m++) {
294  // which GPU(s) will be assigned to this sub-quantizer
295 
296  long i0 = m * n / pq.M;
297  long i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1;
298  std::vector<ToGpuCloner> sub_cloners_2;
299  sub_cloners_2.insert(
300  sub_cloners_2.begin(), sub_cloners.begin() + i0,
301  sub_cloners.begin() + i1);
302  ToGpuClonerMultiple cm(sub_cloners_2, *this);
303  IndexFlatL2 idxc (pq.dsub);
304  idxc.add (pq.ksub, pq.centroids.data() + m * pq.d * pq.ksub);
305  Index *idx2 = cm.clone_Index(&idxc);
306  splitv->add_sub_index(idx2);
307  }
308  return splitv;
309  } else {
310  return Cloner::clone_Index(index);
311  }
312  }
313 
314 
315 };
316 
317 
318 
319 faiss::Index * index_cpu_to_gpu_multiple(
320  std::vector<GpuResources*> & resources,
321  std::vector<int> &devices,
322  const faiss::Index *index,
323  const GpuMultipleClonerOptions *options)
324 {
325  GpuMultipleClonerOptions defaults;
326  ToGpuClonerMultiple cl(resources, devices, options ? *options : defaults);
327  return cl.clone_Index(index);
328 }
329 
330 
331 
332 /**********************************************************
333  * Parameters to auto-tune on GpuIndex'es
334  **********************************************************/
335 
336 #define DC(classname) auto ix = dynamic_cast<const classname *>(index)
337 
338 
340 {
341  if (DC (IndexPreTransform)) {
342  index = ix->index;
343  }
344  if (DC (IndexProxy)) {
345  if (ix->count() == 0) return;
346  index = ix->at(0);
347  }
348  if (DC (faiss::IndexShards)) {
349  if (ix->shard_indexes.size() == 0) return;
350  index = ix->shard_indexes[0];
351  }
352  if (DC (GpuIndexIVF)) {
353  ParameterRange & pr = add_range("nprobe");
354  for (int i = 0; i < 12; i++) {
355  size_t nprobe = 1 << i;
356  if (nprobe >= ix->getNumLists() ||
357  nprobe > 1024) break;
358  pr.values.push_back (nprobe);
359  }
360  }
361  // not sure we should call the parent initializer
362 }
363 
364 
365 
366 #undef DC
367 // non-const version
368 #define DC(classname) auto *ix = dynamic_cast<classname *>(index)
369 
370 
371 
373  Index * index, const std::string & name, double val) const
374 {
375  if (DC (IndexPreTransform)) {
376  index = ix->index;
377  }
378  if (DC (IndexProxy)) {
379  for (int i = 0; i < ix->count(); i++)
380  set_index_parameter (ix->at(i), name, val);
381  return;
382  }
383  if (DC (faiss::IndexShards)) {
384  for (auto sub_index : ix->shard_indexes)
385  set_index_parameter (sub_index, name, val);
386  return;
387  }
388  if (name == "nprobe") {
389  DC (GpuIndexIVF);
390  FAISS_ASSERT(ix);
391  ix->setNumProbes (int (val));
392  return;
393  }
394  if (name == "use_precomputed_table") {
395  DC (GpuIndexIVFPQ);
396  FAISS_ASSERT(ix);
397  ix->setPrecomputedCodes(bool (val));
398  return;
399  }
400 
401  FAISS_ASSERT_MSG (false, "unknown parameter");
402 }
403 
404 
405 
406 
407 } } // namespace
bool storeTransposed
For GpuIndexFlat, store data in transposed layout?
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:47
void initialize(const faiss::Index *index) override
initialize with reasonable parameters for the index
set some options on how to copy to GPU
void copyFrom(const faiss::IndexIVFFlat *index)
size_t dsub
dimensionality of each subvector
bool usePrecomputed
use precomputed tables?
ParameterRange & add_range(const char *name)
add a new parameter
Definition: AutoTune.cpp:322
bool useFloat16
Whether or not data is stored as float16.
Definition: GpuIndexFlat.h:35
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:27
int d
vector dimension
Definition: Index.h:64
GpuIndexFlatConfig flatConfig
Configuration for the coarse quantizer object.
Definition: GpuIndexIVF.h:34
void set_index_parameter(faiss::Index *index, const std::string &name, double val) const override
set a combination of parameters on an index
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
size_t ksub
number of centroids for each subquantizer
bool useFloat16CoarseQuantizer
is the coarse quantizer in float16?
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:65
void add(idx_t n, const float *x) override
Definition: IndexFlat.cpp:30
void addIndex(faiss::Index *index)
Definition: IndexProxy.cpp:32
IVFPQ index for the GPU.
Definition: GpuIndexIVFPQ.h:40
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:72
ProductQuantizer pq
produces the codes
Definition: IndexIVFPQ.h:33
size_t M
number of subquantizers
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:69
possible values of a parameter, sorted from least to most expensive/accurate
Definition: AutoTune.h:125
size_t d
size of the input vectors
IndicesOptions indicesOptions
Index storage options for the GPU.
Definition: GpuIndexIVF.h:31
bool verbose
Set verbose options on the index.
long reserveVecs
reserve vectors in the invfiles?
int use_precomputed_table
if by_residual, build precompute tables
Definition: IndexIVFPQ.h:31
std::vector< float > centroids
Centroid table, size M * ksub * dsub.