Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
GpuAutoTune.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 #include "GpuAutoTune.h"
11 #include <typeinfo>
12 
13 #include "GpuIndex.h"
14 #include "../FaissAssert.h"
15 #include "../index_io.h"
16 #include "../IndexFlat.h"
17 #include "../IndexIVF.h"
18 #include "../IndexIVFPQ.h"
19 #include "../VectorTransform.h"
20 #include "../MetaIndexes.h"
21 #include "GpuIndexFlat.h"
22 #include "GpuIndexIVFFlat.h"
23 #include "GpuIndexIVFPQ.h"
24 #include "IndexProxy.h"
25 
26 namespace faiss { namespace gpu {
27 
28 /**********************************************************
29  * Cloning from/to GPU
30  **********************************************************/
31 
32 
34 
35  void merge_index(Index *dst, Index *src, bool successive_ids) {
36  if (auto ifl = dynamic_cast<IndexFlat *>(dst)) {
37  auto ifl2 = dynamic_cast<const IndexFlat *>(src);
38  FAISS_ASSERT(ifl2);
39  FAISS_ASSERT(successive_ids);
40  ifl->add(ifl2->ntotal, ifl2->xb.data());
41  } else if(auto ifl = dynamic_cast<IndexIVFFlat *>(dst)) {
42  auto ifl2 = dynamic_cast<IndexIVFFlat *>(src);
43  FAISS_ASSERT(ifl2);
44  ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
45  } else if(auto ifl = dynamic_cast<IndexIVFPQ *>(dst)) {
46  auto ifl2 = dynamic_cast<IndexIVFPQ *>(src);
47  FAISS_ASSERT(ifl2);
48  ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
49  } else {
50  FAISS_ASSERT(!"merging not implemented for this type of class");
51  }
52  }
53 
54 
55  Index *clone_Index(const Index *index) override {
56  if(auto ifl = dynamic_cast<const GpuIndexFlat *>(index)) {
57  IndexFlat *res = new IndexFlat();
58  ifl->copyTo(res);
59  return res;
60  } else if(auto ifl = dynamic_cast<const GpuIndexIVFFlat *>(index)) {
61  IndexIVFFlat *res = new IndexIVFFlat();
62  ifl->copyTo(res);
63  return res;
64  } else if(auto ipq = dynamic_cast<const GpuIndexIVFPQ *>(index)) {
65  IndexIVFPQ *res = new IndexIVFPQ();
66  ipq->copyTo(res);
67  return res;
68 
69  // for IndexShards and IndexProxy we assume that the
70  // objective is to make a single component out of them
71  // (inverse op of ToGpuClonerMultiple)
72 
73  } else if(auto ish = dynamic_cast<const IndexShards *>(index)) {
74  int nshard = ish->shard_indexes.size();
75  FAISS_ASSERT(nshard > 0);
76  Index *res = clone_Index(ish->shard_indexes[0]);
77  for(int i = 1; i < ish->shard_indexes.size(); i++) {
78  Index *res_i = clone_Index(ish->shard_indexes[i]);
79  merge_index(res, res_i, ish->successive_ids);
80  delete res_i;
81  }
82  return res;
83  } else if(auto ipr = dynamic_cast<const IndexProxy *>(index)) {
84  // just clone one of the replicas
85  FAISS_ASSERT(ipr->count() > 0);
86  return clone_Index(ipr->at(0));
87  } else {
88  return Cloner::clone_Index(index);
89  }
90  }
91 };
92 
93 faiss::Index * index_gpu_to_cpu(const faiss::Index *gpu_index)
94 {
95  ToCPUCloner cl;
96  return cl.clone_Index(gpu_index);
97 }
98 
99 
100 
102  GpuResources *resources;
103  int device;
104 
105  ToGpuCloner(GpuResources *resources, int device,
106  const GpuClonerOptions &options):
107  GpuClonerOptions(options), resources(resources), device(device)
108  {}
109 
110  Index *clone_Index(const Index *index) override {
111  if(auto ifl = dynamic_cast<const IndexFlat *>(index)) {
112  GpuIndexFlatConfig config;
113  config.device = device;
114  config.useFloat16 = useFloat16;
116 
117  return new GpuIndexFlat(resources, ifl, config);
118  } else if(auto ifl = dynamic_cast<const faiss::IndexIVFFlat *>(index)) {
119  GpuIndexIVFFlatConfig config;
120  config.device = device;
125 
126  GpuIndexIVFFlat *res =
127  new GpuIndexIVFFlat(resources,
128  ifl->d,
129  ifl->nlist,
130  ifl->metric_type,
131  config);
132  if(reserveVecs > 0 && ifl->ntotal == 0) {
134  }
135 
136  res->copyFrom(ifl);
137  return res;
138  } else if(auto ipq = dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
139  if(verbose)
140  printf(" IndexIVFPQ size %ld -> GpuIndexIVFPQ "
141  "indicesOptions=%d "
142  "usePrecomputed=%d useFloat16=%d reserveVecs=%ld\n",
143  ipq->ntotal, indicesOptions, usePrecomputed,
145  GpuIndexIVFPQConfig config;
146  config.device = device;
152 
153  GpuIndexIVFPQ *res = new GpuIndexIVFPQ(resources, ipq, config);
154 
155  if(reserveVecs > 0 && ipq->ntotal == 0) {
157  }
158 
159  return res;
160  } else {
161  return Cloner::clone_Index(index);
162  }
163  }
164 
165 };
166 
167 
168 faiss::Index * index_cpu_to_gpu(
169  GpuResources* resources, int device,
170  const faiss::Index *index,
171  const GpuClonerOptions *options)
172 {
173  GpuClonerOptions defaults;
174  ToGpuCloner cl(resources, device, options ? *options : defaults);
175  return cl.clone_Index(index);
176 }
177 
179  std::vector<ToGpuCloner> sub_cloners;
180 
181  ToGpuClonerMultiple(std::vector<GpuResources *> & resources,
182  std::vector<int>& devices,
183  const GpuMultipleClonerOptions &options):
184  GpuMultipleClonerOptions(options)
185  {
186  FAISS_ASSERT(resources.size() == devices.size());
187  for(int i = 0; i < resources.size(); i++) {
188  sub_cloners.push_back(ToGpuCloner(
189  resources[i], devices[i], options));
190  }
191  }
192 
193 
194  ToGpuClonerMultiple(const std::vector<ToGpuCloner> & sub_cloners,
195  const GpuMultipleClonerOptions &options):
196  GpuMultipleClonerOptions(options),
197  sub_cloners(sub_cloners)
198  {}
199 
200 
201  void copy_ivf_shard (const IndexIVF *index_ivf, IndexIVF *idx2,
202  long n, long i) {
203  if (shard_type == 2) {
204  long i0 = i * index_ivf->ntotal / n;
205  long i1 = (i + 1) * index_ivf->ntotal / n;
206 
207  if(verbose)
208  printf("IndexShards shard %ld indices %ld:%ld\n",
209  i, i0, i1);
210  index_ivf->copy_subset_to(*idx2, 2, i0, i1);
211  FAISS_ASSERT(idx2->ntotal == i1 - i0);
212  } else if (shard_type == 1) {
213  if(verbose)
214  printf("IndexShards shard %ld select modulo %ld = %ld\n",
215  i, n, i);
216  index_ivf->copy_subset_to(*idx2, 1, n, i);
217  } else {
218  FAISS_THROW_FMT ("shard_type %d not implemented", shard_type);
219  }
220 
221  }
222 
223  Index *clone_Index(const Index *index) override {
224  long n = sub_cloners.size();
225  if (n == 1)
226  return sub_cloners[0].clone_Index(index);
227 
228  if(dynamic_cast<const IndexFlat *>(index) ||
229  dynamic_cast<const faiss::IndexIVFFlat *>(index) ||
230  dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
231  if(!shard) {
232  IndexProxy * res = new IndexProxy();
233  for(auto & sub_cloner: sub_cloners) {
234  res->addIndex(sub_cloner.clone_Index(index));
235  }
236  res->own_fields = true;
237  return res;
238  } else {
239  auto index_ivfpq =
240  dynamic_cast<const faiss::IndexIVFPQ *>(index);
241  auto index_ivfflat =
242  dynamic_cast<const faiss::IndexIVFFlat *>(index);
243  FAISS_THROW_IF_NOT_MSG (index_ivfpq || index_ivfflat,
244  "IndexShards implemented only for "
245  "IndexIVFFlat or IndexIVFPQ");
246  std::vector<faiss::Index*> shards(n);
247 
248  for(long i = 0; i < n; i++) {
249  // make a shallow copy
250  if(reserveVecs)
251  sub_cloners[i].reserveVecs =
252  (reserveVecs + n - 1) / n;
253 
254  if (index_ivfpq) {
255  faiss::IndexIVFPQ idx2(
256  index_ivfpq->quantizer, index_ivfpq->d,
257  index_ivfpq->nlist, index_ivfpq->code_size,
258  index_ivfpq->pq.nbits);
259  idx2.metric_type = index_ivfpq->metric_type;
260  idx2.pq = index_ivfpq->pq;
261  idx2.nprobe = index_ivfpq->nprobe;
262  idx2.use_precomputed_table = 0;
263  idx2.is_trained = index->is_trained;
264  copy_ivf_shard (index_ivfpq, &idx2, n, i);
265  shards[i] = sub_cloners[i].clone_Index(&idx2);
266  } else if (index_ivfflat) {
267  faiss::IndexIVFFlat idx2(
268  index_ivfflat->quantizer, index->d,
269  index_ivfflat->nlist, index_ivfflat->metric_type);
270  idx2.nprobe = index_ivfflat->nprobe;
271  idx2.nprobe = index_ivfflat->nprobe;
272  copy_ivf_shard (index_ivfflat, &idx2, n, i);
273  shards[i] = sub_cloners[i].clone_Index(&idx2);
274  }
275 
276 
277  }
278  faiss::IndexShards *res =
279  new faiss::IndexShards(index->d, true, false);
280 
281  for (int i = 0; i < n; i++) {
282  res->add_shard(shards[i]);
283  }
284  res->own_fields = true;
285  FAISS_ASSERT(index->ntotal == res->ntotal);
286  return res;
287  }
288  } else if(auto miq = dynamic_cast<const MultiIndexQuantizer *>(index)) {
289  if (verbose) {
290  printf("cloning MultiIndexQuantizer: "
291  "will be valid only for search k=1\n");
292  }
293  const ProductQuantizer & pq = miq->pq;
294  IndexSplitVectors *splitv = new IndexSplitVectors(pq.d, true);
295  splitv->own_fields = true;
296 
297  for (int m = 0; m < pq.M; m++) {
298  // which GPU(s) will be assigned to this sub-quantizer
299 
300  long i0 = m * n / pq.M;
301  long i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1;
302  std::vector<ToGpuCloner> sub_cloners_2;
303  sub_cloners_2.insert(
304  sub_cloners_2.begin(), sub_cloners.begin() + i0,
305  sub_cloners.begin() + i1);
306  ToGpuClonerMultiple cm(sub_cloners_2, *this);
307  IndexFlatL2 idxc (pq.dsub);
308  idxc.add (pq.ksub, pq.centroids.data() + m * pq.d * pq.ksub);
309  Index *idx2 = cm.clone_Index(&idxc);
310  splitv->add_sub_index(idx2);
311  }
312  return splitv;
313  } else {
314  return Cloner::clone_Index(index);
315  }
316  }
317 
318 
319 };
320 
321 
322 
323 faiss::Index * index_cpu_to_gpu_multiple(
324  std::vector<GpuResources*> & resources,
325  std::vector<int> &devices,
326  const faiss::Index *index,
327  const GpuMultipleClonerOptions *options)
328 {
329  GpuMultipleClonerOptions defaults;
330  ToGpuClonerMultiple cl(resources, devices, options ? *options : defaults);
331  return cl.clone_Index(index);
332 }
333 
334 
335 
336 /**********************************************************
337  * Parameters to auto-tune on GpuIndex'es
338  **********************************************************/
339 
340 #define DC(classname) auto ix = dynamic_cast<const classname *>(index)
341 
342 
344 {
345  if (DC (IndexPreTransform)) {
346  index = ix->index;
347  }
348  if (DC (IndexProxy)) {
349  if (ix->count() == 0) return;
350  index = ix->at(0);
351  }
352  if (DC (faiss::IndexShards)) {
353  if (ix->shard_indexes.size() == 0) return;
354  index = ix->shard_indexes[0];
355  }
356  if (DC (GpuIndexIVF)) {
357  ParameterRange & pr = add_range("nprobe");
358  for (int i = 0; i < 12; i++) {
359  size_t nprobe = 1 << i;
360  if (nprobe >= ix->getNumLists() ||
361  nprobe > 1024) break;
362  pr.values.push_back (nprobe);
363  }
364  }
365  // not sure we should call the parent initializer
366 }
367 
368 
369 
370 #undef DC
371 // non-const version
372 #define DC(classname) auto *ix = dynamic_cast<classname *>(index)
373 
374 
375 
377  Index * index, const std::string & name, double val) const
378 {
379  if (DC (IndexProxy)) {
380  for (int i = 0; i < ix->count(); i++)
381  set_index_parameter (ix->at(i), name, val);
382  return;
383  }
384  if (DC (GpuIndexIVF)) {
385  if (name == "nprobe") {
386  ix->setNumProbes (int (val));
387  return;
388  }
389  }
390  if(DC (GpuIndexIVFPQ)) {
391  if (name == "use_precomputed_table") {
392  ix->setPrecomputedCodes(bool (val));
393  return;
394  }
395  }
396 
397  // maybe norma lindex parameters apply?
398  ParameterSpace::set_index_parameter (index, name, val);
399 }
400 
401 
402 
403 
404 } } // namespace
bool storeTransposed
For GpuIndexFlat, store data in transposed layout?
virtual void copy_subset_to(IndexIVF &other, int subset_type, long a1, long a2) const
Definition: IndexIVF.cpp:272
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:47
void initialize(const faiss::Index *index) override
initialize with reasonable parameters for the index
set some options on how to copy to GPU
void copyFrom(const faiss::IndexIVFFlat *index)
size_t dsub
dimensionality of each subvector
bool usePrecomputed
use precomputed tables?
ParameterRange & add_range(const char *name)
add a new parameter
Definition: AutoTune.cpp:322
bool useFloat16
Whether or not data is stored as float16.
Definition: GpuIndexFlat.h:35
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:27
int d
vector dimension
Definition: Index.h:64
GpuIndexFlatConfig flatConfig
Configuration for the coarse quantizer object.
Definition: GpuIndexIVF.h:34
void set_index_parameter(faiss::Index *index, const std::string &name, double val) const override
set a combination of parameters on an index
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
size_t ksub
number of centroids for each subquantizer
int shard_type
IndexIVF::copy_subset_to subset type.
bool useFloat16CoarseQuantizer
is the coarse quantizer in float16?
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:65
void add(idx_t n, const float *x) override
Definition: IndexFlat.cpp:30
void addIndex(faiss::Index *index)
Definition: IndexProxy.cpp:32
IVFPQ index for the GPU.
Definition: GpuIndexIVFPQ.h:40
virtual void set_index_parameter(Index *index, const std::string &name, double val) const
set one of the parameters
Definition: AutoTune.cpp:423
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:72
ProductQuantizer pq
produces the codes
Definition: IndexIVFPQ.h:32
size_t M
number of subquantizers
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:69
possible values of a parameter, sorted from least to most expensive/accurate
Definition: AutoTune.h:125
size_t d
size of the input vectors
IndicesOptions indicesOptions
Index storage options for the GPU.
Definition: GpuIndexIVF.h:31
bool verbose
Set verbose options on the index.
long reserveVecs
reserve vectors in the invfiles?
int use_precomputed_table
if by_residual, build precompute tables
Definition: IndexIVFPQ.h:31
std::vector< float > centroids
Centroid table, size M * ksub * dsub.