Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
TestGpuIndexIVFFlat.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 
10 #include "../../IndexFlat.h"
11 #include "../../IndexIVFFlat.h"
12 #include "../GpuIndexIVFFlat.h"
13 #include "../StandardGpuResources.h"
14 #include "../utils/DeviceUtils.h"
15 #include "../test/TestUtils.h"
16 #include <cmath>
17 #include <gtest/gtest.h>
18 #include <glog/logging.h>
19 #include <sstream>
20 #include <vector>
21 
22 // FIXME: figure out a better way to test fp16
23 constexpr float kF16MaxRelErr = 0.3f;
24 constexpr float kF32MaxRelErr = 0.03f;
25 
26 
27 struct Options {
28  Options() {
29  numAdd = faiss::gpu::randVal(4000, 20000);
30  dim = faiss::gpu::randVal(64, 200);
31 
32  numCentroids = std::sqrt((float) numAdd);
33  numTrain = numCentroids * 40;
34  nprobe = faiss::gpu::randVal(10, numCentroids);
35  numQuery = faiss::gpu::randVal(32, 100);
36  k = std::min(faiss::gpu::randVal(10, 30), numAdd / 40);
37  indicesOpt = faiss::gpu::randSelect({
38  faiss::gpu::INDICES_CPU,
39  faiss::gpu::INDICES_32_BIT,
40  faiss::gpu::INDICES_64_BIT});
41 
42  device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
43  }
44 
45  std::string toString() const {
46  std::stringstream str;
47  str << "IVFFlat device " << device
48  << " numVecs " << numAdd
49  << " dim " << dim
50  << " numCentroids " << numCentroids
51  << " nprobe " << nprobe
52  << " numQuery " << numQuery
53  << " k " << k
54  << " indicesOpt " << indicesOpt;
55 
56  return str.str();
57  }
58 
59  int numAdd;
60  int dim;
61  int numCentroids;
62  int numTrain;
63  int nprobe;
64  int numQuery;
65  int k;
66  int device;
67  faiss::gpu::IndicesOptions indicesOpt;
68 };
69 
70 void queryTest(faiss::MetricType metricType,
71  bool useFloat16CoarseQuantizer,
72  bool useFloat16,
73  int dimOverride = -1) {
74  for (int tries = 0; tries < 3; ++tries) {
75  Options opt;
76  opt.dim = dimOverride != -1 ? dimOverride : opt.dim;
77 
78  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
79  std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
80 
81  faiss::IndexFlatL2 quantizerL2(opt.dim);
82  faiss::IndexFlatIP quantizerIP(opt.dim);
83  faiss::Index* quantizer =
84  metricType == faiss::METRIC_L2 ?
85  (faiss::Index*) &quantizerL2 : (faiss::Index*) &quantizerIP;
86 
87  faiss::IndexIVFFlat cpuIndex(quantizer,
88  opt.dim, opt.numCentroids, metricType);
89  cpuIndex.train(opt.numTrain, trainVecs.data());
90  cpuIndex.add(opt.numAdd, addVecs.data());
91  cpuIndex.nprobe = opt.nprobe;
92 
94  res.noTempMemory();
95 
97  config.device = opt.device;
98  config.indicesOptions = opt.indicesOpt;
99  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
100  config.useFloat16IVFStorage = useFloat16;
101 
102  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
103  cpuIndex.d,
104  cpuIndex.nlist,
105  cpuIndex.metric_type,
106  config);
107  gpuIndex.copyFrom(&cpuIndex);
108  gpuIndex.setNumProbes(opt.nprobe);
109 
110  bool compFloat16 = useFloat16CoarseQuantizer || useFloat16;
111  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
112  opt.numQuery, opt.dim, opt.k, opt.toString(),
113  compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
114  // FIXME: the fp16 bounds are
115  // useless when math (the accumulator) is
116  // in fp16. Figure out another way to test
117  compFloat16 ? 0.70f : 0.1f,
118  compFloat16 ? 0.65f : 0.015f);
119  }
120 }
121 
122 void addTest(faiss::MetricType metricType,
123  bool useFloat16CoarseQuantizer,
124  bool useFloat16) {
125  for (int tries = 0; tries < 5; ++tries) {
126  Options opt;
127 
128  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
129  std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
130 
131  faiss::IndexFlatL2 quantizerL2(opt.dim);
132  faiss::IndexFlatIP quantizerIP(opt.dim);
133  faiss::Index* quantizer =
134  metricType == faiss::METRIC_L2 ?
135  (faiss::Index*) &quantizerL2 : (faiss::Index*) &quantizerIP;
136 
137  faiss::IndexIVFFlat cpuIndex(quantizer,
138  opt.dim,
139  opt.numCentroids,
140  metricType);
141  cpuIndex.train(opt.numTrain, trainVecs.data());
142  cpuIndex.nprobe = opt.nprobe;
143 
145  res.noTempMemory();
146 
148  config.device = opt.device;
149  config.indicesOptions = opt.indicesOpt;
150  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
151  config.useFloat16IVFStorage = useFloat16;
152 
153  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
154  cpuIndex.d,
155  cpuIndex.nlist,
156  cpuIndex.metric_type,
157  config);
158  gpuIndex.copyFrom(&cpuIndex);
159  gpuIndex.setNumProbes(opt.nprobe);
160 
161  cpuIndex.add(opt.numAdd, addVecs.data());
162  gpuIndex.add(opt.numAdd, addVecs.data());
163 
164  bool compFloat16 = useFloat16CoarseQuantizer || useFloat16;
165  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
166  opt.numQuery, opt.dim, opt.k, opt.toString(),
167  compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
168  compFloat16 ? 0.70f : 0.1f,
169  compFloat16 ? 0.30f : 0.015f);
170  }
171 }
172 
173 void copyToTest(bool useFloat16CoarseQuantizer,
174  bool useFloat16) {
175  Options opt;
176  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
177  std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
178 
180  res.noTempMemory();
181 
183  config.device = opt.device;
184  config.indicesOptions = opt.indicesOpt;
185  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
186  config.useFloat16IVFStorage = useFloat16;
187 
188  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
189  opt.dim,
190  opt.numCentroids,
191  faiss::METRIC_L2,
192  config);
193  gpuIndex.train(opt.numTrain, trainVecs.data());
194  gpuIndex.add(opt.numAdd, addVecs.data());
195  gpuIndex.setNumProbes(opt.nprobe);
196 
197  // use garbage values to see if we overwrite then
198  faiss::IndexFlatL2 cpuQuantizer(1);
199  faiss::IndexIVFFlat cpuIndex(&cpuQuantizer, 1, 1, faiss::METRIC_L2);
200  cpuIndex.nprobe = 1;
201 
202  gpuIndex.copyTo(&cpuIndex);
203 
204  EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
205  EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
206 
207  EXPECT_EQ(cpuIndex.d, gpuIndex.d);
208  EXPECT_EQ(cpuIndex.d, opt.dim);
209  EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
210  EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
211 
212  // Query both objects; results should be equivalent
213  bool compFloat16 = useFloat16CoarseQuantizer || useFloat16;
214  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
215  opt.numQuery, opt.dim, opt.k, opt.toString(),
216  compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
217  compFloat16 ? 0.70f : 0.1f,
218  compFloat16 ? 0.30f : 0.015f);
219 }
220 
221 void copyFromTest(bool useFloat16CoarseQuantizer,
222  bool useFloat16) {
223  Options opt;
224  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
225  std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
226 
227  faiss::IndexFlatL2 cpuQuantizer(opt.dim);
228  faiss::IndexIVFFlat cpuIndex(&cpuQuantizer,
229  opt.dim,
230  opt.numCentroids,
231  faiss::METRIC_L2);
232  cpuIndex.nprobe = opt.nprobe;
233  cpuIndex.train(opt.numTrain, trainVecs.data());
234  cpuIndex.add(opt.numAdd, addVecs.data());
235 
236  // use garbage values to see if we overwrite then
238  res.noTempMemory();
239 
241  config.device = opt.device;
242  config.indicesOptions = opt.indicesOpt;
243  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
244  config.useFloat16IVFStorage = useFloat16;
245 
246  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
247  1,
248  1,
249  faiss::METRIC_L2,
250  config);
251  gpuIndex.setNumProbes(1);
252 
253  gpuIndex.copyFrom(&cpuIndex);
254 
255  EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
256  EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
257 
258  EXPECT_EQ(cpuIndex.d, gpuIndex.d);
259  EXPECT_EQ(cpuIndex.d, opt.dim);
260  EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
261  EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
262 
263  // Query both objects; results should be equivalent
264  bool compFloat16 = useFloat16CoarseQuantizer || useFloat16;
265  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
266  opt.numQuery, opt.dim, opt.k, opt.toString(),
267  compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
268  compFloat16 ? 0.70f : 0.1f,
269  compFloat16 ? 0.30f : 0.015f);
270 }
271 
272 TEST(TestGpuIndexIVFFlat, Float32_32_Add_L2) {
273  addTest(faiss::METRIC_L2, false, false);
274 }
275 
276 TEST(TestGpuIndexIVFFlat, Float32_32_Add_IP) {
277  addTest(faiss::METRIC_INNER_PRODUCT, false, false);
278 }
279 
280 TEST(TestGpuIndexIVFFlat, Float32_16_Add_L2) {
281  addTest(faiss::METRIC_L2, false, true);
282 }
283 
284 TEST(TestGpuIndexIVFFlat, Float32_16_Add_IP) {
285  addTest(faiss::METRIC_INNER_PRODUCT, false, true);
286 }
287 
288 TEST(TestGpuIndexIVFFlat, Float16_32_Add_L2) {
289  addTest(faiss::METRIC_L2, true, false);
290 }
291 
292 TEST(TestGpuIndexIVFFlat, Float16_32_Add_IP) {
293  addTest(faiss::METRIC_INNER_PRODUCT, true, false);
294 }
295 
296 //
297 // General query tests
298 //
299 
300 TEST(TestGpuIndexIVFFlat, Float32_Query_L2) {
301  queryTest(faiss::METRIC_L2, false, false);
302 }
303 
304 TEST(TestGpuIndexIVFFlat, Float32_Query_IP) {
305  queryTest(faiss::METRIC_INNER_PRODUCT, false, false);
306 }
307 
308 TEST(TestGpuIndexIVFFlat, Float16_Query_L2) {
309  queryTest(faiss::METRIC_L2, false, true);
310 }
311 
312 TEST(TestGpuIndexIVFFlat, Float16_Query_IP) {
313  queryTest(faiss::METRIC_INNER_PRODUCT, false, true);
314 }
315 
316 // float16 coarse quantizer
317 
318 TEST(TestGpuIndexIVFFlat, Float16_32_Query_L2) {
319  queryTest(faiss::METRIC_L2, true, false);
320 }
321 
322 TEST(TestGpuIndexIVFFlat, Float16_32_Query_IP) {
323  queryTest(faiss::METRIC_INNER_PRODUCT, true, false);
324 }
325 
326 //
327 // There are IVF list scanning specializations for 64-d and 128-d that we
328 // make sure we explicitly test here
329 //
330 
331 TEST(TestGpuIndexIVFFlat, Float32_Query_L2_64) {
332  queryTest(faiss::METRIC_L2, false, false, 64);
333 }
334 
335 TEST(TestGpuIndexIVFFlat, Float32_Query_IP_64) {
336  queryTest(faiss::METRIC_INNER_PRODUCT, false, false, 64);
337 }
338 
339 TEST(TestGpuIndexIVFFlat, Float16_Query_L2_64) {
340  queryTest(faiss::METRIC_L2, false, true, 64);
341 }
342 
343 TEST(TestGpuIndexIVFFlat, Float16_Query_IP_64) {
344  queryTest(faiss::METRIC_INNER_PRODUCT, false, true, 64);
345 }
346 
347 TEST(TestGpuIndexIVFFlat, Float32_Query_L2_128) {
348  queryTest(faiss::METRIC_L2, false, false, 128);
349 }
350 
351 TEST(TestGpuIndexIVFFlat, Float32_Query_IP_128) {
352  queryTest(faiss::METRIC_INNER_PRODUCT, false, false, 128);
353 }
354 
355 TEST(TestGpuIndexIVFFlat, Float16_Query_L2_128) {
356  queryTest(faiss::METRIC_L2, false, true, 128);
357 }
358 
359 TEST(TestGpuIndexIVFFlat, Float16_Query_IP_128) {
360  queryTest(faiss::METRIC_INNER_PRODUCT, false, true, 128);
361 }
362 
363 // For 256-d, only float16 is specialized
364 
365 TEST(TestGpuIndexIVFFlat, Float16_Query_L2_256) {
366  queryTest(faiss::METRIC_L2, false, true, 256);
367 }
368 
369 TEST(TestGpuIndexIVFFlat, Float16_Query_IP_256) {
370  queryTest(faiss::METRIC_INNER_PRODUCT, false, true, 256);
371 }
372 
373 //
374 // Copy tests
375 //
376 
377 TEST(TestGpuIndexIVFFlat, Float32_16_CopyTo) {
378  copyToTest(false, true);
379 }
380 
381 TEST(TestGpuIndexIVFFlat, Float32_32_CopyTo) {
382  copyToTest(false, false);
383 }
384 
385 TEST(TestGpuIndexIVFFlat, Float32_negative) {
386  Options opt;
387 
388  auto trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
389  auto addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
390 
391  // Put all vecs on negative side
392  for (auto& f : trainVecs) {
393  f = std::abs(f) * -1.0f;
394  }
395 
396  for (auto& f : addVecs) {
397  f *= std::abs(f) * -1.0f;
398  }
399 
400  faiss::IndexFlatIP quantizerIP(opt.dim);
401  faiss::Index* quantizer = (faiss::Index*) &quantizerIP;
402 
403  faiss::IndexIVFFlat cpuIndex(quantizer,
404  opt.dim, opt.numCentroids,
405  faiss::METRIC_INNER_PRODUCT);
406  cpuIndex.train(opt.numTrain, trainVecs.data());
407  cpuIndex.add(opt.numAdd, addVecs.data());
408  cpuIndex.nprobe = opt.nprobe;
409 
411  res.noTempMemory();
412 
414  config.device = opt.device;
415  config.indicesOptions = opt.indicesOpt;
416 
417  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
418  cpuIndex.d,
419  cpuIndex.nlist,
420  cpuIndex.metric_type,
421  config);
422  gpuIndex.copyFrom(&cpuIndex);
423  gpuIndex.setNumProbes(opt.nprobe);
424 
425  // Construct a positive test set
426  auto queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
427 
428  // Put all vecs on positive size
429  for (auto& f : queryVecs) {
430  f = std::abs(f);
431  }
432 
433  bool compFloat16 = false;
434  faiss::gpu::compareIndices(queryVecs,
435  cpuIndex, gpuIndex,
436  opt.numQuery, opt.dim, opt.k, opt.toString(),
437  compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
438  // FIXME: the fp16 bounds are
439  // useless when math (the accumulator) is
440  // in fp16. Figure out another way to test
441  compFloat16 ? 0.99f : 0.1f,
442  compFloat16 ? 0.65f : 0.015f);
443 }
444 
445 //
446 // NaN tests
447 //
448 
449 TEST(TestGpuIndexIVFFlat, QueryNaN) {
450  Options opt;
451 
452  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
453  std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
454 
456  res.noTempMemory();
457 
459  config.device = opt.device;
460  config.indicesOptions = opt.indicesOpt;
461  config.flatConfig.useFloat16 = faiss::gpu::randBool();
462  config.useFloat16IVFStorage = faiss::gpu::randBool();
463 
464  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
465  opt.dim,
466  opt.numCentroids,
467  faiss::METRIC_L2,
468  config);
469  gpuIndex.setNumProbes(opt.nprobe);
470 
471  gpuIndex.train(opt.numTrain, trainVecs.data());
472  gpuIndex.add(opt.numAdd, addVecs.data());
473 
474  int numQuery = 10;
475  std::vector<float> nans(numQuery * opt.dim,
476  std::numeric_limits<float>::quiet_NaN());
477 
478  std::vector<float> distances(numQuery * opt.k, 0);
479  std::vector<faiss::Index::idx_t> indices(numQuery * opt.k, 0);
480 
481  gpuIndex.search(numQuery,
482  nans.data(),
483  opt.k,
484  distances.data(),
485  indices.data());
486 
487  for (int q = 0; q < numQuery; ++q) {
488  for (int k = 0; k < opt.k; ++k) {
489  EXPECT_EQ(indices[q * opt.k + k], -1);
490  EXPECT_EQ(distances[q * opt.k + k], std::numeric_limits<float>::max());
491  }
492  }
493 }
494 
495 TEST(TestGpuIndexIVFFlat, AddNaN) {
496  Options opt;
497 
499  res.noTempMemory();
500 
502  config.device = opt.device;
503  config.indicesOptions = opt.indicesOpt;
504  config.flatConfig.useFloat16 = faiss::gpu::randBool();
505  config.useFloat16IVFStorage = faiss::gpu::randBool();
506 
507  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
508  opt.dim,
509  opt.numCentroids,
510  faiss::METRIC_L2,
511  config);
512  gpuIndex.setNumProbes(opt.nprobe);
513 
514  int numNans = 10;
515  std::vector<float> nans(numNans * opt.dim,
516  std::numeric_limits<float>::quiet_NaN());
517 
518  // Make one vector valid, which should actually add
519  for (int i = 0; i < opt.dim; ++i) {
520  nans[i] = 0.0f;
521  }
522 
523  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
524  gpuIndex.train(opt.numTrain, trainVecs.data());
525 
526  // should not crash
527  EXPECT_EQ(gpuIndex.ntotal, 0);
528  gpuIndex.add(numNans, nans.data());
529 
530  // Only the single valid vector should have added
531  EXPECT_EQ(gpuIndex.ntotal, 1);
532 
533  std::vector<float> queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
534  std::vector<float> distance(opt.numQuery * opt.k, 0);
535  std::vector<faiss::Index::idx_t> indices(opt.numQuery * opt.k, 0);
536 
537  // should not crash
538  gpuIndex.search(opt.numQuery, queryVecs.data(), opt.k,
539  distance.data(), indices.data());
540 
541 }
542 
543 TEST(TestGpuIndexIVFFlat, UnifiedMemory) {
544  // Construct on a random device to test multi-device, if we have
545  // multiple devices
546  int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
547 
548  if (!faiss::gpu::getFullUnifiedMemSupport(device)) {
549  return;
550  }
551 
552  int dim = 256;
553 
554  int numCentroids = 1024;
555  // 24 GB of vecs should be enough to test unified memory in
556  // oversubscription mode
557  size_t numAdd =
558  (size_t) 1024 * 1024 * 1024 * 24 / ((size_t) dim * sizeof(float));
559  size_t numTrain = numCentroids * 40;
560  int numQuery = 10;
561  int k = 10;
562  int nprobe = 8;
563 
564  LOG(INFO) << "generating vecs";
565  std::vector<float> trainVecs = faiss::gpu::randVecs(numTrain, dim);
566  std::vector<float> addVecs = faiss::gpu::randVecs(numAdd, dim);
567 
568  LOG(INFO) << "train CPU";
569  faiss::IndexFlatL2 quantizer(dim);
570  faiss::IndexIVFFlat cpuIndex(&quantizer, dim, numCentroids, faiss::METRIC_L2);
571  LOG(INFO) << "train CPU";
572  cpuIndex.train(numTrain, trainVecs.data());
573  LOG(INFO) << "add CPU";
574  cpuIndex.add(numAdd, addVecs.data());
575  cpuIndex.nprobe = nprobe;
576 
578  res.noTempMemory();
579 
581  config.device = device;
582  config.memorySpace = faiss::gpu::MemorySpace::Unified;
583 
584  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
585  dim,
586  numCentroids,
587  faiss::METRIC_L2,
588  config);
589  LOG(INFO) << "copy from CPU";
590  gpuIndex.copyFrom(&cpuIndex);
591  gpuIndex.setNumProbes(nprobe);
592 
593  LOG(INFO) << "compare";
594 
595  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
596  numQuery, dim, k, "Unified Memory",
597  kF32MaxRelErr,
598  0.1f,
599  0.015f);
600 }
601 
602 int main(int argc, char** argv) {
603  testing::InitGoogleTest(&argc, argv);
604 
605  // just run with a fixed test seed
606  faiss::gpu::setTestSeed(100);
607 
608  return RUN_ALL_TESTS();
609 }
bool useFloat16
Whether or not data is stored as float16.
Definition: GpuIndexFlat.h:34
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:26
GpuIndexFlatConfig flatConfig
Configuration for the coarse quantizer object.
Definition: GpuIndexIVF.h:33
MemorySpace memorySpace
Definition: GpuIndex.h:31
IndicesOptions indicesOptions
Index storage options for the GPU.
Definition: GpuIndexIVF.h:30
MetricType
Some algorithms support both an inner product version and a L2 search version.
Definition: Index.h:45