Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
GpuAutoTune.cpp
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 #include "GpuAutoTune.h"
11 
12 
13 #include "GpuIndex.h"
14 #include "../FaissAssert.h"
15 #include "../index_io.h"
16 #include "../IndexFlat.h"
17 #include "../IndexIVF.h"
18 #include "../IndexIVFPQ.h"
19 #include "../VectorTransform.h"
20 #include "../MetaIndexes.h"
21 #include "GpuIndexFlat.h"
22 #include "GpuIndexIVFFlat.h"
23 #include "GpuIndexIVFPQ.h"
24 #include "IndexProxy.h"
25 
26 namespace faiss { namespace gpu {
27 
28 /**********************************************************
29  * Cloning from/to GPU
30  **********************************************************/
31 
32 
34 
35  Index *clone_Index(const Index *index) override {
36  if(auto ifl = dynamic_cast<const GpuIndexFlat *>(index)) {
37  IndexFlat *res = new IndexFlat();
38  ifl->copyTo(res);
39  return res;
40  } else if(auto ifl = dynamic_cast<const GpuIndexIVFFlat *>(index)) {
41  IndexIVFFlat *res = new IndexIVFFlat();
42  ifl->copyTo(res);
43  return res;
44  } else if(auto ipq = dynamic_cast<const GpuIndexIVFPQ *>(index)) {
45  IndexIVFPQ *res = new IndexIVFPQ();
46  ipq->copyTo(res);
47  return res;
48  } else {
49  return Cloner::clone_Index(index);
50  }
51  }
52 };
53 
54 faiss::Index * index_gpu_to_cpu(const faiss::Index *gpu_index)
55 {
56  ToCPUCloner cl;
57  return cl.clone_Index(gpu_index);
58 }
59 
60 
61 
62 GpuClonerOptions::GpuClonerOptions():
63  indicesOptions(INDICES_64_BIT),
64  useFloat16CoarseQuantizer(false),
65  useFloat16(false),
66  usePrecomputed(true),
67  reserveVecs(0),
68  verbose(0)
69 {}
70 
71 
73  GpuResources *resources;
74  int device;
75 
76  ToGpuCloner(GpuResources *resources, int device, const GpuClonerOptions &options):
77  GpuClonerOptions(options), resources(resources), device(device)
78  {}
79 
80  Index *clone_Index(const Index *index) override {
81  if(auto ifl = dynamic_cast<const IndexFlat *>(index)) {
82  return new GpuIndexFlat(resources, device, useFloat16, ifl);
83  } else if(auto ifl = dynamic_cast<const faiss::IndexIVFFlat *>(index)) {
84  GpuIndexIVFFlat *res =
85  new GpuIndexIVFFlat(resources,
86  device,
88  useFloat16,
89  ifl->d,
90  ifl->nlist,
92  ifl->metric_type);
93  if(reserveVecs > 0 && ifl->ntotal == 0)
95  res->copyFrom(ifl);
96  return res;
97  } else if(auto ipq = dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
98  if(verbose)
99  printf(" IndexIVFPQ size %ld -> GpuIndexIVFPQ indicesOptions=%d "
100  "usePrecomputed=%d useFloat16=%d reserveVecs=%ld\n",
101  ipq->ntotal, indicesOptions, usePrecomputed,
103  GpuIndexIVFPQ *res = new GpuIndexIVFPQ(
104  resources, device, indicesOptions, useFloat16,
105  ipq);
107  if(reserveVecs > 0 && ipq->ntotal == 0)
109  return res;
110  } else {
111  return Cloner::clone_Index(index);
112  }
113  }
114 
115 };
116 
117 
118 faiss::Index * index_cpu_to_gpu(
119  GpuResources* resources, int device,
120  const faiss::Index *index,
121  const GpuClonerOptions *options)
122 {
123  GpuClonerOptions defaults;
124  ToGpuCloner cl(resources, device, options ? *options : defaults);
125  return cl.clone_Index(index);
126 }
127 
128 GpuMultipleClonerOptions::GpuMultipleClonerOptions(): shard(false)
129 {}
130 
132  std::vector<ToGpuCloner> sub_cloners;
133 
134  ToGpuClonerMultiple(std::vector<GpuResources *> & resources,
135  std::vector<int>& devices,
136  const GpuMultipleClonerOptions &options):
137  GpuMultipleClonerOptions(options)
138  {
139  FAISS_ASSERT(resources.size() == devices.size());
140  for(int i = 0; i < resources.size(); i++) {
141  sub_cloners.push_back(ToGpuCloner(
142  resources[i], devices[i], options));
143  }
144  }
145 
146 
147  ToGpuClonerMultiple(const std::vector<ToGpuCloner> & sub_cloners,
148  const GpuMultipleClonerOptions &options):
149  GpuMultipleClonerOptions(options),
150  sub_cloners(sub_cloners)
151  {}
152 
153 
154  Index *clone_Index(const Index *index) override {
155  long n = sub_cloners.size();
156 
157  if (n == 1)
158  return sub_cloners[0].clone_Index(index);
159 
160  if(dynamic_cast<const IndexFlat *>(index) ||
161  dynamic_cast<const faiss::IndexIVFFlat *>(index) ||
162  dynamic_cast<const faiss::IndexIVFPQ *>(index)) {
163  if(!shard) {
164  IndexProxy * res = new IndexProxy();
165  for(auto & sub_cloner: sub_cloners) {
166  res->addIndex(sub_cloner.clone_Index(index));
167  }
168  res->own_fields = true;
169  return res;
170  } else {
171  auto index_ivfpq =
172  dynamic_cast<const faiss::IndexIVFPQ *>(index);
173  auto index_ivfflat =
174  dynamic_cast<const faiss::IndexIVFFlat *>(index);
175  FAISS_ASSERT (index_ivfpq || index_ivfflat ||
176  !"IndexShards implemented only for "
177  "IndexIVFFlat or IndexIVFPQ");
178  std::vector<faiss::Index*> shards(n);
179 
180  for(long i = 0; i < n; i++) {
181  // make a shallow copy
182  long i0 = i * index->ntotal / n;
183  long i1 = (i + 1) * index->ntotal / n;
184  if(verbose)
185  printf("IndexShards shard %ld indices %ld:%ld\n",
186  i, i0, i1);
187 
188  if(reserveVecs)
189  sub_cloners[i].reserveVecs =
190  (reserveVecs + n - 1) / n;
191 
192  if (index_ivfpq) {
193  faiss::IndexIVFPQ idx2(
194  index_ivfpq->quantizer, index_ivfpq->d,
195  index_ivfpq->nlist, index_ivfpq->code_size,
196  index_ivfpq->pq.nbits);
197  idx2.pq = index_ivfpq->pq;
198  idx2.use_precomputed_table = 0;
199  idx2.is_trained = index->is_trained;
200  index_ivfpq->copy_subset_to(idx2, 0, i0, i1);
201  shards[i] = sub_cloners[i].clone_Index(&idx2);
202  } else if (index_ivfflat) {
203  faiss::IndexIVFFlat idx2(
204  index_ivfflat->quantizer, index->d,
205  index_ivfflat->nlist, index_ivfflat->metric_type);
206  index_ivfflat->copy_subset_to(idx2, 0, i0, i1);
207  shards[i] = sub_cloners[i].clone_Index(&idx2);
208  }
209  }
210  faiss::IndexShards *res =
211  new faiss::IndexShards(index->d, true, false);
212 
213  for (int i = 0; i < n; i++) {
214  res->add_shard(shards[i]);
215  }
216  res->own_fields = true;
217  assert(index->ntotal == res->ntotal);
218  return res;
219  }
220  } else if(auto miq = dynamic_cast<const MultiIndexQuantizer *>(index)) {
221  if (verbose) {
222  printf("cloning MultiIndexQuantizer: "
223  "will be valid only for search k=1\n");
224  }
225  const ProductQuantizer & pq = miq->pq;
226  IndexSplitVectors *splitv = new IndexSplitVectors(pq.d, true);
227  splitv->own_fields = true;
228 
229  for (int m = 0; m < pq.M; m++) {
230  // which GPU(s) will be assigned to this sub-quantizer
231 
232  long i0 = m * n / pq.M;
233  long i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1;
234  std::vector<ToGpuCloner> sub_cloners_2;
235  sub_cloners_2.insert(
236  sub_cloners_2.begin(), sub_cloners.begin() + i0,
237  sub_cloners.begin() + i1);
238  ToGpuClonerMultiple cm(sub_cloners_2, *this);
239  IndexFlatL2 idxc (pq.dsub);
240  idxc.add (pq.ksub, pq.centroids.data() + m * pq.d * pq.ksub);
241  Index *idx2 = cm.clone_Index(&idxc);
242  splitv->add_sub_index(idx2);
243  }
244  return splitv;
245  } else {
246  return Cloner::clone_Index(index);
247  }
248  }
249 
250 
251 };
252 
253 
254 
255 faiss::Index * index_cpu_to_gpu_multiple(
256  std::vector<GpuResources*> & resources,
257  std::vector<int> &devices,
258  const faiss::Index *index,
259  const GpuMultipleClonerOptions *options)
260 {
261  GpuMultipleClonerOptions defaults;
262  ToGpuClonerMultiple cl(resources, devices, options ? *options : defaults);
263  return cl.clone_Index(index);
264 }
265 
266 
267 
268 /**********************************************************
269  * Parameters to auto-tune on GpuIndex'es
270  **********************************************************/
271 
272 #define DC(classname) auto ix = dynamic_cast<const classname *>(index)
273 
274 
276 {
277  if (DC (IndexPreTransform)) {
278  index = ix->index;
279  }
280  if (DC (IndexProxy)) {
281  if (ix->count() == 0) return;
282  index = ix->at(0);
283  }
284  if (DC (faiss::IndexShards)) {
285  if (ix->shard_indexes.size() == 0) return;
286  index = ix->shard_indexes[0];
287  }
288  if (DC (GpuIndexIVF)) {
289  ParameterRange & pr = add_range("nprobe");
290  for (int i = 0; i < 12; i++) {
291  size_t nprobe = 1 << i;
292  if (nprobe >= ix->getNumLists() ||
293  nprobe > 1024) break;
294  pr.values.push_back (nprobe);
295  }
296  }
297  // not sure we should call the parent initializer
298 }
299 
300 
301 
302 #undef DC
303 // non-const version
304 #define DC(classname) auto *ix = dynamic_cast<classname *>(index)
305 
306 
307 
309  Index * index, const std::string & name, double val) const
310 {
311  if (DC (IndexPreTransform)) {
312  index = ix->index;
313  }
314  if (DC (IndexProxy)) {
315  for (int i = 0; i < ix->count(); i++)
316  set_index_parameter (ix->at(i), name, val);
317  return;
318  }
319  if (DC (faiss::IndexShards)) {
320  for (auto sub_index : ix->shard_indexes)
321  set_index_parameter (sub_index, name, val);
322  return;
323  }
324  if (name == "nprobe") {
325  DC (GpuIndexIVF);
326  FAISS_ASSERT(ix);
327  ix->setNumProbes (int (val));
328  return;
329  }
330  FAISS_ASSERT (!"unknown parameter");
331 }
332 
333 
334 
335 
336 } } // namespace
bool shard
shard rather than copying to each GPU
Definition: GpuAutoTune.h:55
void initialize(const faiss::Index *index) override
initialize with reasonable parameters for the index
set some options on how to copy to GPU
Definition: GpuAutoTune.h:31
void copyFrom(const faiss::IndexIVFFlat *index)
size_t dsub
dimensionality of each subvector
bool usePrecomputed
use precomputed tables?
Definition: GpuAutoTune.h:40
ParameterRange & add_range(const char *name)
add a new parameter
Definition: AutoTune.cpp:321
int d
vector dimension
Definition: Index.h:66
void set_index_parameter(faiss::Index *index, const std::string &name, double val) const override
set a combination of parameters on an index
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
IndicesOptions indicesOptions
how should indices be stored on GpuIndexIVFPQ?
Definition: GpuAutoTune.h:33
size_t ksub
number of centroids for each subquantizer
bool useFloat16CoarseQuantizer
is the coarse quantizer in float16?
Definition: GpuAutoTune.h:35
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
virtual void add(idx_t n, const float *x) override
Definition: IndexFlat.cpp:41
void addIndex(faiss::Index *index)
Definition: IndexProxy.cpp:33
IVFPQ index for the GPU.
Definition: GpuIndexIVFPQ.h:25
ProductQuantizer pq
produces the codes
Definition: IndexIVFPQ.h:34
size_t M
number of subquantizers
void setPrecomputedCodes(bool enable)
Enable or disable pre-computed codes.
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:71
possible values of a parameter, sorted from least to most expensive/accurate
Definition: AutoTune.h:126
size_t d
size of the input vectors
long reserveVecs
reserve vectors in the invfiles?
Definition: GpuAutoTune.h:42
int use_precomputed_table
if by_residual, build precompute tables
Definition: IndexIVFPQ.h:32
std::vector< float > centroids
Centroid table, size M * ksub * dsub.