Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
GpuAutoTune.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 #include "GpuAutoTune.h"
11 #include <typeinfo>
12 
13 #include "GpuIndex.h"
14 #include "../FaissAssert.h"
15 #include "../index_io.h"
16 #include "../IndexFlat.h"
17 #include "../IndexIVF.h"
18 #include "../IndexIVFFlat.h"
19 #include "../IndexIVFPQ.h"
20 #include "../VectorTransform.h"
21 #include "../MetaIndexes.h"
22 #include "GpuIndexFlat.h"
23 #include "GpuIndexIVFFlat.h"
24 #include "GpuIndexIVFPQ.h"
25 #include "IndexProxy.h"
26 
27 namespace faiss { namespace gpu {
28 
29 /**********************************************************
30  * Cloning from/to GPU
31  **********************************************************/
32 
33 
35 
36  void merge_index(Index *dst, Index *src, bool successive_ids) {
37  if (auto ifl = dynamic_cast<IndexFlat *>(dst)) {
38  auto ifl2 = dynamic_cast<const IndexFlat *>(src);
39  FAISS_ASSERT(ifl2);
40  FAISS_ASSERT(successive_ids);
41  ifl->add(ifl2->ntotal, ifl2->xb.data());
42  } else if(auto ifl = dynamic_cast<IndexIVFFlat *>(dst)) {
43  auto ifl2 = dynamic_cast<IndexIVFFlat *>(src);
44  FAISS_ASSERT(ifl2);
45  ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
46  } else if(auto ifl = dynamic_cast<IndexIVFPQ *>(dst)) {
47  auto ifl2 = dynamic_cast<IndexIVFPQ *>(src);
48  FAISS_ASSERT(ifl2);
49  ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
50  } else {
51  FAISS_ASSERT(!"merging not implemented for this type of class");
52  }
53  }
54 
55 
56  Index *clone_Index(const Index *index) override {
57  if(auto ifl = dynamic_cast<const GpuIndexFlat *>(index)) {
58  IndexFlat *res = new IndexFlat();
59  ifl->copyTo(res);
60  return res;
61  } else if(auto ifl = dynamic_cast<const GpuIndexIVFFlat *>(index)) {
62  IndexIVFFlat *res = new IndexIVFFlat();
63  ifl->copyTo(res);
64  return res;
65  } else if(auto ipq = dynamic_cast<const GpuIndexIVFPQ *>(index)) {
66  IndexIVFPQ *res = new IndexIVFPQ();
67  ipq->copyTo(res);
68  return res;
69 
70  // for IndexShards and IndexProxy we assume that the
71  // objective is to make a single component out of them
72  // (inverse op of ToGpuClonerMultiple)
73 
74  } else if(auto ish = dynamic_cast<const IndexShards *>(index)) {
75  int nshard = ish->shard_indexes.size();
76  FAISS_ASSERT(nshard > 0);
77  Index *res = clone_Index(ish->shard_indexes[0]);
78  for(int i = 1; i < ish->shard_indexes.size(); i++) {
79  Index *res_i = clone_Index(ish->shard_indexes[i]);
80  merge_index(res, res_i, ish->successive_ids);
81  delete res_i;
82  }
83  return res;
84  } else if(auto ipr = dynamic_cast<const IndexProxy *>(index)) {
85  // just clone one of the replicas
86  FAISS_ASSERT(ipr->count() > 0);
87  return clone_Index(ipr->at(0));
88  } else {
89  return Cloner::clone_Index(index);
90  }
91  }
92 };
93 
94 faiss::Index * index_gpu_to_cpu(const faiss::Index *gpu_index)
95 {
96  ToCPUCloner cl;
97  return cl.clone_Index(gpu_index);
98 }
99 
100 
101 
103  GpuResources *resources;
104  int device;
105 
106  ToGpuCloner(GpuResources *resources, int device,
107  const GpuClonerOptions &options):
108  GpuClonerOptions(options), resources(resources), device(device)
109  {}
110 
111  Index *clone_Index(const Index *index) override {
112  if(auto ifl = dynamic_cast<const IndexFlat *>(index)) {
113  GpuIndexFlatConfig config;
114  config.device = device;
115  config.useFloat16 = useFloat16;
117 
118  return new GpuIndexFlat(resources, ifl, config);
119  } else if(auto ifl = dynamic_cast<const faiss::IndexIVFFlat *>(index)) {
120  GpuIndexIVFFlatConfig config;
121  config.device = device;
126 
127  GpuIndexIVFFlat *res =
128  new GpuIndexIVFFlat(resources,
129  ifl->d,
130  ifl->nlist,
131  ifl->metric_type,
132  config);
133  if(reserveVecs > 0 && ifl->ntotal == 0) {
135  }
136 
137  res->copyFrom(ifl);
138  return res;
139  } else if(auto ipq = dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
140  if(verbose)
141  printf(" IndexIVFPQ size %ld -> GpuIndexIVFPQ "
142  "indicesOptions=%d "
143  "usePrecomputed=%d useFloat16=%d reserveVecs=%ld\n",
144  ipq->ntotal, indicesOptions, usePrecomputed,
146  GpuIndexIVFPQConfig config;
147  config.device = device;
153 
154  GpuIndexIVFPQ *res = new GpuIndexIVFPQ(resources, ipq, config);
155 
156  if(reserveVecs > 0 && ipq->ntotal == 0) {
158  }
159 
160  return res;
161  } else {
162  return Cloner::clone_Index(index);
163  }
164  }
165 
166 };
167 
168 
169 faiss::Index * index_cpu_to_gpu(
170  GpuResources* resources, int device,
171  const faiss::Index *index,
172  const GpuClonerOptions *options)
173 {
174  GpuClonerOptions defaults;
175  ToGpuCloner cl(resources, device, options ? *options : defaults);
176  return cl.clone_Index(index);
177 }
178 
180  std::vector<ToGpuCloner> sub_cloners;
181 
182  ToGpuClonerMultiple(std::vector<GpuResources *> & resources,
183  std::vector<int>& devices,
184  const GpuMultipleClonerOptions &options):
185  GpuMultipleClonerOptions(options)
186  {
187  FAISS_ASSERT(resources.size() == devices.size());
188  for(int i = 0; i < resources.size(); i++) {
189  sub_cloners.push_back(ToGpuCloner(
190  resources[i], devices[i], options));
191  }
192  }
193 
194 
195  ToGpuClonerMultiple(const std::vector<ToGpuCloner> & sub_cloners,
196  const GpuMultipleClonerOptions &options):
197  GpuMultipleClonerOptions(options),
198  sub_cloners(sub_cloners)
199  {}
200 
201 
202  void copy_ivf_shard (const IndexIVF *index_ivf, IndexIVF *idx2,
203  long n, long i) {
204  if (shard_type == 2) {
205  long i0 = i * index_ivf->ntotal / n;
206  long i1 = (i + 1) * index_ivf->ntotal / n;
207 
208  if(verbose)
209  printf("IndexShards shard %ld indices %ld:%ld\n",
210  i, i0, i1);
211  index_ivf->copy_subset_to(*idx2, 2, i0, i1);
212  FAISS_ASSERT(idx2->ntotal == i1 - i0);
213  } else if (shard_type == 1) {
214  if(verbose)
215  printf("IndexShards shard %ld select modulo %ld = %ld\n",
216  i, n, i);
217  index_ivf->copy_subset_to(*idx2, 1, n, i);
218  } else {
219  FAISS_THROW_FMT ("shard_type %d not implemented", shard_type);
220  }
221 
222  }
223 
224  Index *clone_Index(const Index *index) override {
225  long n = sub_cloners.size();
226  if (n == 1)
227  return sub_cloners[0].clone_Index(index);
228 
229  if(dynamic_cast<const IndexFlat *>(index) ||
230  dynamic_cast<const faiss::IndexIVFFlat *>(index) ||
231  dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
232  if(!shard) {
233  IndexProxy * res = new IndexProxy();
234  for(auto & sub_cloner: sub_cloners) {
235  res->addIndex(sub_cloner.clone_Index(index));
236  }
237  res->own_fields = true;
238  return res;
239  } else {
240  auto index_ivfpq =
241  dynamic_cast<const faiss::IndexIVFPQ *>(index);
242  auto index_ivfflat =
243  dynamic_cast<const faiss::IndexIVFFlat *>(index);
244  FAISS_THROW_IF_NOT_MSG (index_ivfpq || index_ivfflat,
245  "IndexShards implemented only for "
246  "IndexIVFFlat or IndexIVFPQ");
247  std::vector<faiss::Index*> shards(n);
248 
249  for(long i = 0; i < n; i++) {
250  // make a shallow copy
251  if(reserveVecs)
252  sub_cloners[i].reserveVecs =
253  (reserveVecs + n - 1) / n;
254 
255  if (index_ivfpq) {
256  faiss::IndexIVFPQ idx2(
257  index_ivfpq->quantizer, index_ivfpq->d,
258  index_ivfpq->nlist, index_ivfpq->code_size,
259  index_ivfpq->pq.nbits);
260  idx2.metric_type = index_ivfpq->metric_type;
261  idx2.pq = index_ivfpq->pq;
262  idx2.nprobe = index_ivfpq->nprobe;
263  idx2.use_precomputed_table = 0;
264  idx2.is_trained = index->is_trained;
265  copy_ivf_shard (index_ivfpq, &idx2, n, i);
266  shards[i] = sub_cloners[i].clone_Index(&idx2);
267  } else if (index_ivfflat) {
268  faiss::IndexIVFFlat idx2(
269  index_ivfflat->quantizer, index->d,
270  index_ivfflat->nlist, index_ivfflat->metric_type);
271  idx2.nprobe = index_ivfflat->nprobe;
272  idx2.nprobe = index_ivfflat->nprobe;
273  copy_ivf_shard (index_ivfflat, &idx2, n, i);
274  shards[i] = sub_cloners[i].clone_Index(&idx2);
275  }
276 
277 
278  }
279  faiss::IndexShards *res =
280  new faiss::IndexShards(index->d, true, false);
281 
282  for (int i = 0; i < n; i++) {
283  res->add_shard(shards[i]);
284  }
285  res->own_fields = true;
286  FAISS_ASSERT(index->ntotal == res->ntotal);
287  return res;
288  }
289  } else if(auto miq = dynamic_cast<const MultiIndexQuantizer *>(index)) {
290  if (verbose) {
291  printf("cloning MultiIndexQuantizer: "
292  "will be valid only for search k=1\n");
293  }
294  const ProductQuantizer & pq = miq->pq;
295  IndexSplitVectors *splitv = new IndexSplitVectors(pq.d, true);
296  splitv->own_fields = true;
297 
298  for (int m = 0; m < pq.M; m++) {
299  // which GPU(s) will be assigned to this sub-quantizer
300 
301  long i0 = m * n / pq.M;
302  long i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1;
303  std::vector<ToGpuCloner> sub_cloners_2;
304  sub_cloners_2.insert(
305  sub_cloners_2.begin(), sub_cloners.begin() + i0,
306  sub_cloners.begin() + i1);
307  ToGpuClonerMultiple cm(sub_cloners_2, *this);
308  IndexFlatL2 idxc (pq.dsub);
309  idxc.add (pq.ksub, pq.centroids.data() + m * pq.d * pq.ksub);
310  Index *idx2 = cm.clone_Index(&idxc);
311  splitv->add_sub_index(idx2);
312  }
313  return splitv;
314  } else {
315  return Cloner::clone_Index(index);
316  }
317  }
318 
319 
320 };
321 
322 
323 
324 faiss::Index * index_cpu_to_gpu_multiple(
325  std::vector<GpuResources*> & resources,
326  std::vector<int> &devices,
327  const faiss::Index *index,
328  const GpuMultipleClonerOptions *options)
329 {
330  GpuMultipleClonerOptions defaults;
331  ToGpuClonerMultiple cl(resources, devices, options ? *options : defaults);
332  return cl.clone_Index(index);
333 }
334 
335 
336 
337 /**********************************************************
338  * Parameters to auto-tune on GpuIndex'es
339  **********************************************************/
340 
341 #define DC(classname) auto ix = dynamic_cast<const classname *>(index)
342 
343 
345 {
346  if (DC (IndexPreTransform)) {
347  index = ix->index;
348  }
349  if (DC (IndexProxy)) {
350  if (ix->count() == 0) return;
351  index = ix->at(0);
352  }
353  if (DC (faiss::IndexShards)) {
354  if (ix->shard_indexes.size() == 0) return;
355  index = ix->shard_indexes[0];
356  }
357  if (DC (GpuIndexIVF)) {
358  ParameterRange & pr = add_range("nprobe");
359  for (int i = 0; i < 12; i++) {
360  size_t nprobe = 1 << i;
361  if (nprobe >= ix->getNumLists() ||
362  nprobe > 1024) break;
363  pr.values.push_back (nprobe);
364  }
365  }
366  // not sure we should call the parent initializer
367 }
368 
369 
370 
371 #undef DC
372 // non-const version
373 #define DC(classname) auto *ix = dynamic_cast<classname *>(index)
374 
375 
376 
378  Index * index, const std::string & name, double val) const
379 {
380  if (DC (IndexProxy)) {
381  for (int i = 0; i < ix->count(); i++)
382  set_index_parameter (ix->at(i), name, val);
383  return;
384  }
385  if (name == "nprobe") {
386  if (DC (GpuIndexIVF)) {
387  ix->setNumProbes (int (val));
388  return;
389  }
390  }
391  if (name == "use_precomputed_table") {
392  if (DC (GpuIndexIVFPQ)) {
393  ix->setPrecomputedCodes(bool (val));
394  return;
395  }
396  }
397 
398  // maybe normal index parameters apply?
399  ParameterSpace::set_index_parameter (index, name, val);
400 }
401 
402 
403 
404 
405 } } // namespace
bool storeTransposed
For GpuIndexFlat, store data in transposed layout?
virtual void copy_subset_to(IndexIVF &other, int subset_type, long a1, long a2) const
Definition: IndexIVF.cpp:527
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:173
void initialize(const faiss::Index *index) override
initialize with reasonable parameters for the index
set some options on how to copy to GPU
void copyFrom(const faiss::IndexIVFFlat *index)
size_t dsub
dimensionality of each subvector
bool usePrecomputed
use precomputed tables?
ParameterRange & add_range(const char *name)
add a new parameter (or return it if it exists)
Definition: AutoTune.cpp:324
bool useFloat16
Whether or not data is stored as float16.
Definition: GpuIndexFlat.h:35
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:27
int d
vector dimension
Definition: Index.h:64
GpuIndexFlatConfig flatConfig
Configuration for the coarse quantizer object.
Definition: GpuIndexIVF.h:34
void set_index_parameter(faiss::Index *index, const std::string &name, double val) const override
set a combination of parameters on an index
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
size_t ksub
number of centroids for each subquantizer
int shard_type
IndexIVF::copy_subset_to subset type.
bool useFloat16CoarseQuantizer
is the coarse quantizer in float16?
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:65
void add(idx_t n, const float *x) override
Definition: IndexFlat.cpp:30
void addIndex(faiss::Index *index)
Definition: IndexProxy.cpp:32
IVFPQ index for the GPU.
Definition: GpuIndexIVFPQ.h:40
virtual void set_index_parameter(Index *index, const std::string &name, double val) const
set one of the parameters
Definition: AutoTune.cpp:444
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:72
ProductQuantizer pq
produces the codes
Definition: IndexIVFPQ.h:32
size_t M
number of subquantizers
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:69
possible values of a parameter, sorted from least to most expensive/accurate
Definition: AutoTune.h:125
size_t d
size of the input vectors
IndicesOptions indicesOptions
Index storage options for the GPU.
Definition: GpuIndexIVF.h:31
bool verbose
Set verbose options on the index.
long reserveVecs
reserve vectors in the invfiles?
int use_precomputed_table
if by_residual, build precompute tables
Definition: IndexIVFPQ.h:31
std::vector< float > centroids
Centroid table, size M * ksub * dsub.