Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
GpuAutoTune.cpp
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 #include "GpuAutoTune.h"
11 
12 
13 #include "GpuIndex.h"
14 #include "../FaissAssert.h"
15 #include "../index_io.h"
16 #include "../IndexFlat.h"
17 #include "../IndexIVF.h"
18 #include "../IndexIVFPQ.h"
19 #include "../VectorTransform.h"
20 #include "../MetaIndexes.h"
21 #include "GpuIndexFlat.h"
22 #include "GpuIndexIVFFlat.h"
23 #include "GpuIndexIVFPQ.h"
24 #include "IndexProxy.h"
25 
26 namespace faiss { namespace gpu {
27 
28 /**********************************************************
29  * Cloning from/to GPU
30  **********************************************************/
31 
32 
34 
35  Index *clone_Index(const Index *index) override {
36  if(auto ifl = dynamic_cast<const GpuIndexFlat *>(index)) {
37  IndexFlat *res = new IndexFlat();
38  ifl->copyTo(res);
39  return res;
40  } else if(auto ifl = dynamic_cast<const GpuIndexIVFFlat *>(index)) {
41  IndexIVFFlat *res = new IndexIVFFlat();
42  ifl->copyTo(res);
43  return res;
44  } else if(auto ipq = dynamic_cast<const GpuIndexIVFPQ *>(index)) {
45  IndexIVFPQ *res = new IndexIVFPQ();
46  ipq->copyTo(res);
47  return res;
48  } else {
49  return Cloner::clone_Index(index);
50  }
51  }
52 };
53 
54 faiss::Index * index_gpu_to_cpu(const faiss::Index *gpu_index)
55 {
56  ToCPUCloner cl;
57  return cl.clone_Index(gpu_index);
58 }
59 
60 
61 
62 GpuClonerOptions::GpuClonerOptions():
63  indicesOptions(INDICES_64_BIT),
64  useFloat16CoarseQuantizer(false),
65  useFloat16(false),
66  usePrecomputed(true),
67  reserveVecs(0),
68  storeTransposed(false),
69  verbose(0)
70 {}
71 
72 
74  GpuResources *resources;
75  int device;
76 
77  ToGpuCloner(GpuResources *resources, int device, const GpuClonerOptions &options):
78  GpuClonerOptions(options), resources(resources), device(device)
79  {}
80 
81  Index *clone_Index(const Index *index) override {
82  if(auto ifl = dynamic_cast<const IndexFlat *>(index)) {
83  GpuIndexFlatConfig config;
84  config.device = device;
85  config.useFloat16 = useFloat16;
86  config.storeTransposed = storeTransposed;
87 
88  return new GpuIndexFlat(resources, ifl, config);
89  } else if(auto ifl = dynamic_cast<const faiss::IndexIVFFlat *>(index)) {
90  GpuIndexIVFFlat *res =
91  new GpuIndexIVFFlat(resources,
92  device,
94  useFloat16,
95  ifl->d,
96  ifl->nlist,
98  ifl->metric_type);
99  if(reserveVecs > 0 && ifl->ntotal == 0)
101  res->copyFrom(ifl);
102  return res;
103  } else if(auto ipq = dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
104  if(verbose)
105  printf(" IndexIVFPQ size %ld -> GpuIndexIVFPQ indicesOptions=%d "
106  "usePrecomputed=%d useFloat16=%d reserveVecs=%ld\n",
107  ipq->ntotal, indicesOptions, usePrecomputed,
109  GpuIndexIVFPQ *res = new GpuIndexIVFPQ(
110  resources, device, indicesOptions, useFloat16,
111  ipq);
113  if(reserveVecs > 0 && ipq->ntotal == 0)
115  return res;
116  } else {
117  return Cloner::clone_Index(index);
118  }
119  }
120 
121 };
122 
123 
124 faiss::Index * index_cpu_to_gpu(
125  GpuResources* resources, int device,
126  const faiss::Index *index,
127  const GpuClonerOptions *options)
128 {
129  GpuClonerOptions defaults;
130  ToGpuCloner cl(resources, device, options ? *options : defaults);
131  return cl.clone_Index(index);
132 }
133 
134 GpuMultipleClonerOptions::GpuMultipleClonerOptions(): shard(false)
135 {}
136 
138  std::vector<ToGpuCloner> sub_cloners;
139 
140  ToGpuClonerMultiple(std::vector<GpuResources *> & resources,
141  std::vector<int>& devices,
142  const GpuMultipleClonerOptions &options):
143  GpuMultipleClonerOptions(options)
144  {
145  FAISS_ASSERT(resources.size() == devices.size());
146  for(int i = 0; i < resources.size(); i++) {
147  sub_cloners.push_back(ToGpuCloner(
148  resources[i], devices[i], options));
149  }
150  }
151 
152 
153  ToGpuClonerMultiple(const std::vector<ToGpuCloner> & sub_cloners,
154  const GpuMultipleClonerOptions &options):
155  GpuMultipleClonerOptions(options),
156  sub_cloners(sub_cloners)
157  {}
158 
159 
160  Index *clone_Index(const Index *index) override {
161  long n = sub_cloners.size();
162 
163  if (n == 1)
164  return sub_cloners[0].clone_Index(index);
165 
166  if(dynamic_cast<const IndexFlat *>(index) ||
167  dynamic_cast<const faiss::IndexIVFFlat *>(index) ||
168  dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
169  if(!shard) {
170  IndexProxy * res = new IndexProxy();
171  for(auto & sub_cloner: sub_cloners) {
172  res->addIndex(sub_cloner.clone_Index(index));
173  }
174  res->own_fields = true;
175  return res;
176  } else {
177  auto index_ivfpq =
178  dynamic_cast<const faiss::IndexIVFPQ *>(index);
179  auto index_ivfflat =
180  dynamic_cast<const faiss::IndexIVFFlat *>(index);
181  FAISS_ASSERT (index_ivfpq || index_ivfflat ||
182  !"IndexShards implemented only for "
183  "IndexIVFFlat or IndexIVFPQ");
184  std::vector<faiss::Index*> shards(n);
185 
186  for(long i = 0; i < n; i++) {
187  // make a shallow copy
188  long i0 = i * index->ntotal / n;
189  long i1 = (i + 1) * index->ntotal / n;
190  if(verbose)
191  printf("IndexShards shard %ld indices %ld:%ld\n",
192  i, i0, i1);
193 
194  if(reserveVecs)
195  sub_cloners[i].reserveVecs =
196  (reserveVecs + n - 1) / n;
197 
198  if (index_ivfpq) {
199  faiss::IndexIVFPQ idx2(
200  index_ivfpq->quantizer, index_ivfpq->d,
201  index_ivfpq->nlist, index_ivfpq->code_size,
202  index_ivfpq->pq.nbits);
203  idx2.pq = index_ivfpq->pq;
204  idx2.use_precomputed_table = 0;
205  idx2.is_trained = index->is_trained;
206  index_ivfpq->copy_subset_to(idx2, 0, i0, i1);
207  shards[i] = sub_cloners[i].clone_Index(&idx2);
208  } else if (index_ivfflat) {
209  faiss::IndexIVFFlat idx2(
210  index_ivfflat->quantizer, index->d,
211  index_ivfflat->nlist, index_ivfflat->metric_type);
212  index_ivfflat->copy_subset_to(idx2, 0, i0, i1);
213  shards[i] = sub_cloners[i].clone_Index(&idx2);
214  }
215  }
216  faiss::IndexShards *res =
217  new faiss::IndexShards(index->d, true, false);
218 
219  for (int i = 0; i < n; i++) {
220  res->add_shard(shards[i]);
221  }
222  res->own_fields = true;
223  assert(index->ntotal == res->ntotal);
224  return res;
225  }
226  } else if(auto miq = dynamic_cast<const MultiIndexQuantizer *>(index)) {
227  if (verbose) {
228  printf("cloning MultiIndexQuantizer: "
229  "will be valid only for search k=1\n");
230  }
231  const ProductQuantizer & pq = miq->pq;
232  IndexSplitVectors *splitv = new IndexSplitVectors(pq.d, true);
233  splitv->own_fields = true;
234 
235  for (int m = 0; m < pq.M; m++) {
236  // which GPU(s) will be assigned to this sub-quantizer
237 
238  long i0 = m * n / pq.M;
239  long i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1;
240  std::vector<ToGpuCloner> sub_cloners_2;
241  sub_cloners_2.insert(
242  sub_cloners_2.begin(), sub_cloners.begin() + i0,
243  sub_cloners.begin() + i1);
244  ToGpuClonerMultiple cm(sub_cloners_2, *this);
245  IndexFlatL2 idxc (pq.dsub);
246  idxc.add (pq.ksub, pq.centroids.data() + m * pq.d * pq.ksub);
247  Index *idx2 = cm.clone_Index(&idxc);
248  splitv->add_sub_index(idx2);
249  }
250  return splitv;
251  } else {
252  return Cloner::clone_Index(index);
253  }
254  }
255 
256 
257 };
258 
259 
260 
261 faiss::Index * index_cpu_to_gpu_multiple(
262  std::vector<GpuResources*> & resources,
263  std::vector<int> &devices,
264  const faiss::Index *index,
265  const GpuMultipleClonerOptions *options)
266 {
267  GpuMultipleClonerOptions defaults;
268  ToGpuClonerMultiple cl(resources, devices, options ? *options : defaults);
269  return cl.clone_Index(index);
270 }
271 
272 
273 
274 /**********************************************************
275  * Parameters to auto-tune on GpuIndex'es
276  **********************************************************/
277 
278 #define DC(classname) auto ix = dynamic_cast<const classname *>(index)
279 
280 
282 {
283  if (DC (IndexPreTransform)) {
284  index = ix->index;
285  }
286  if (DC (IndexProxy)) {
287  if (ix->count() == 0) return;
288  index = ix->at(0);
289  }
290  if (DC (faiss::IndexShards)) {
291  if (ix->shard_indexes.size() == 0) return;
292  index = ix->shard_indexes[0];
293  }
294  if (DC (GpuIndexIVF)) {
295  ParameterRange & pr = add_range("nprobe");
296  for (int i = 0; i < 12; i++) {
297  size_t nprobe = 1 << i;
298  if (nprobe >= ix->getNumLists() ||
299  nprobe > 1024) break;
300  pr.values.push_back (nprobe);
301  }
302  }
303  // not sure we should call the parent initializer
304 }
305 
306 
307 
308 #undef DC
309 // non-const version
310 #define DC(classname) auto *ix = dynamic_cast<classname *>(index)
311 
312 
313 
315  Index * index, const std::string & name, double val) const
316 {
317  if (DC (IndexPreTransform)) {
318  index = ix->index;
319  }
320  if (DC (IndexProxy)) {
321  for (int i = 0; i < ix->count(); i++)
322  set_index_parameter (ix->at(i), name, val);
323  return;
324  }
325  if (DC (faiss::IndexShards)) {
326  for (auto sub_index : ix->shard_indexes)
327  set_index_parameter (sub_index, name, val);
328  return;
329  }
330  if (name == "nprobe") {
331  DC (GpuIndexIVF);
332  FAISS_ASSERT(ix);
333  ix->setNumProbes (int (val));
334  return;
335  }
336  FAISS_ASSERT (!"unknown parameter");
337 }
338 
339 
340 
341 
342 } } // namespace
bool shard
shard rather than copying to each GPU
Definition: GpuAutoTune.h:57
bool storeTransposed
For GpuIndexFlat, store data in transposed layout?
Definition: GpuAutoTune.h:44
void initialize(const faiss::Index *index) override
initialize with reasonable parameters for the index
set some options on how to copy to GPU
Definition: GpuAutoTune.h:31
void copyFrom(const faiss::IndexIVFFlat *index)
size_t dsub
dimensionality of each subvector
bool usePrecomputed
use precomputed tables?
Definition: GpuAutoTune.h:40
ParameterRange & add_range(const char *name)
add a new parameter
Definition: AutoTune.cpp:321
int d
vector dimension
Definition: Index.h:66
void set_index_parameter(faiss::Index *index, const std::string &name, double val) const override
set a combination of parameters on an index
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
IndicesOptions indicesOptions
how should indices be stored on GpuIndexIVFPQ?
Definition: GpuAutoTune.h:33
size_t ksub
number of centroids for each subquantizer
bool useFloat16CoarseQuantizer
is the coarse quantizer in float16?
Definition: GpuAutoTune.h:35
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
virtual void add(idx_t n, const float *x) override
Definition: IndexFlat.cpp:41
void addIndex(faiss::Index *index)
Definition: IndexProxy.cpp:33
IVFPQ index for the GPU.
Definition: GpuIndexIVFPQ.h:25
ProductQuantizer pq
produces the codes
Definition: IndexIVFPQ.h:34
size_t M
number of subquantizers
void setPrecomputedCodes(bool enable)
Enable or disable pre-computed codes.
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:71
possible values of a parameter, sorted from least to most expensive/accurate
Definition: AutoTune.h:126
size_t d
size of the input vectors
long reserveVecs
reserve vectors in the invfiles?
Definition: GpuAutoTune.h:42
int use_precomputed_table
if by_residual, build precompute tables
Definition: IndexIVFPQ.h:32
std::vector< float > centroids
Centroid table, size M * ksub * dsub.