Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
TestGpuIndexIVFFlat.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #include "../../IndexFlat.h"
12 #include "../../IndexIVF.h"
13 #include "../GpuIndexIVFFlat.h"
14 #include "../StandardGpuResources.h"
15 #include "../utils/DeviceUtils.h"
16 #include "../test/TestUtils.h"
17 #include <gtest/gtest.h>
18 #include <glog/logging.h>
19 #include <sstream>
20 #include <vector>
21 
22 // FIXME: figure out a better way to test fp16
23 constexpr float kF16MaxRelErr = 0.3f;
24 constexpr float kF32MaxRelErr = 0.03f;
25 
26 
27 struct Options {
28  Options() {
29  numAdd = faiss::gpu::randVal(4000, 20000);
30  dim = faiss::gpu::randVal(64, 200);
31 
32  numCentroids = std::sqrt((float) numAdd);
33  numTrain = numCentroids * 40;
34  nprobe = faiss::gpu::randVal(10, numCentroids);
35  numQuery = faiss::gpu::randVal(32, 100);
36  k = std::min(faiss::gpu::randVal(10, 30), numAdd / 40);
37  indicesOpt = faiss::gpu::randSelect({
38  faiss::gpu::INDICES_CPU,
39  faiss::gpu::INDICES_32_BIT,
40  faiss::gpu::INDICES_64_BIT});
41 
42  device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
43  }
44 
45  std::string toString() const {
46  std::stringstream str;
47  str << "IVFFlat device " << device
48  << " numVecs " << numAdd
49  << " dim " << dim
50  << " numCentroids " << numCentroids
51  << " nprobe " << nprobe
52  << " numQuery " << numQuery
53  << " k " << k
54  << " indicesOpt " << indicesOpt;
55 
56  return str.str();
57  }
58 
59  int numAdd;
60  int dim;
61  int numCentroids;
62  int numTrain;
63  int nprobe;
64  int numQuery;
65  int k;
66  int device;
67  faiss::gpu::IndicesOptions indicesOpt;
68 };
69 
70 void queryTest(faiss::MetricType metricType,
71  bool useFloat16CoarseQuantizer,
72  bool useFloat16,
73  int dimOverride = -1) {
74  for (int tries = 0; tries < 3; ++tries) {
75  faiss::gpu::newTestSeed();
76 
77  Options opt;
78  opt.dim = dimOverride != -1 ? dimOverride : opt.dim;
79 
80  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
81  std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
82 
83  faiss::IndexFlatL2 quantizerL2(opt.dim);
84  faiss::IndexFlatIP quantizerIP(opt.dim);
85  faiss::Index* quantizer =
86  metricType == faiss::METRIC_L2 ?
87  (faiss::Index*) &quantizerL2 : (faiss::Index*) &quantizerIP;
88 
89  faiss::IndexIVFFlat cpuIndex(quantizer,
90  opt.dim, opt.numCentroids, metricType);
91  cpuIndex.train(opt.numTrain, trainVecs.data());
92  cpuIndex.add(opt.numAdd, addVecs.data());
93  cpuIndex.nprobe = opt.nprobe;
94 
96  res.noTempMemory();
97 
99  config.device = opt.device;
100  config.indicesOptions = opt.indicesOpt;
101  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
102  config.useFloat16IVFStorage = useFloat16;
103 
104  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
105  cpuIndex.d,
106  cpuIndex.nlist,
107  cpuIndex.metric_type,
108  config);
109  gpuIndex.copyFrom(&cpuIndex);
110  gpuIndex.setNumProbes(opt.nprobe);
111 
112  bool compFloat16 = useFloat16CoarseQuantizer || useFloat16;
113  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
114  opt.numQuery, opt.dim, opt.k, opt.toString(),
115  compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
116  // FIXME: the fp16 bounds are
117  // useless when math (the accumulator) is
118  // in fp16. Figure out another way to test
119  compFloat16 ? 0.99f : 0.1f,
120  compFloat16 ? 0.65f : 0.015f);
121  }
122 }
123 
124 void addTest(faiss::MetricType metricType,
125  bool useFloat16CoarseQuantizer,
126  bool useFloat16) {
127  for (int tries = 0; tries < 5; ++tries) {
128  faiss::gpu::newTestSeed();
129 
130  Options opt;
131 
132  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
133  std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
134 
135  faiss::IndexFlatL2 quantizerL2(opt.dim);
136  faiss::IndexFlatIP quantizerIP(opt.dim);
137  faiss::Index* quantizer =
138  metricType == faiss::METRIC_L2 ?
139  (faiss::Index*) &quantizerL2 : (faiss::Index*) &quantizerIP;
140 
141  faiss::IndexIVFFlat cpuIndex(quantizer,
142  opt.dim,
143  opt.numCentroids,
144  metricType);
145  cpuIndex.train(opt.numTrain, trainVecs.data());
146  cpuIndex.nprobe = opt.nprobe;
147 
149  res.noTempMemory();
150 
152  config.device = opt.device;
153  config.indicesOptions = opt.indicesOpt;
154  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
155  config.useFloat16IVFStorage = useFloat16;
156 
157  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
158  cpuIndex.d,
159  cpuIndex.nlist,
160  cpuIndex.metric_type,
161  config);
162  gpuIndex.copyFrom(&cpuIndex);
163  gpuIndex.setNumProbes(opt.nprobe);
164 
165  cpuIndex.add(opt.numAdd, addVecs.data());
166  gpuIndex.add(opt.numAdd, addVecs.data());
167 
168  bool compFloat16 = useFloat16CoarseQuantizer || useFloat16;
169  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
170  opt.numQuery, opt.dim, opt.k, opt.toString(),
171  compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
172  compFloat16 ? 0.70f : 0.1f,
173  compFloat16 ? 0.30f : 0.015f);
174  }
175 }
176 
177 void copyToTest(bool useFloat16CoarseQuantizer,
178  bool useFloat16) {
179  faiss::gpu::newTestSeed();
180 
181  Options opt;
182  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
183  std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
184 
186  res.noTempMemory();
187 
189  config.device = opt.device;
190  config.indicesOptions = opt.indicesOpt;
191  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
192  config.useFloat16IVFStorage = useFloat16;
193 
194  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
195  opt.dim,
196  opt.numCentroids,
197  faiss::METRIC_L2,
198  config);
199  gpuIndex.train(opt.numTrain, trainVecs.data());
200  gpuIndex.add(opt.numAdd, addVecs.data());
201  gpuIndex.setNumProbes(opt.nprobe);
202 
203  // use garbage values to see if we overwrite then
204  faiss::IndexFlatL2 cpuQuantizer(1);
205  faiss::IndexIVFFlat cpuIndex(&cpuQuantizer, 1, 1, faiss::METRIC_L2);
206  cpuIndex.nprobe = 1;
207 
208  gpuIndex.copyTo(&cpuIndex);
209 
210  EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
211  EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
212 
213  EXPECT_EQ(cpuIndex.d, gpuIndex.d);
214  EXPECT_EQ(cpuIndex.d, opt.dim);
215  EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
216  EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
217 
218  // Query both objects; results should be equivalent
219  bool compFloat16 = useFloat16CoarseQuantizer || useFloat16;
220  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
221  opt.numQuery, opt.dim, opt.k, opt.toString(),
222  compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
223  compFloat16 ? 0.70f : 0.1f,
224  compFloat16 ? 0.30f : 0.015f);
225 }
226 
227 void copyFromTest(bool useFloat16CoarseQuantizer,
228  bool useFloat16) {
229  faiss::gpu::newTestSeed();
230 
231  Options opt;
232  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
233  std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
234 
235  faiss::IndexFlatL2 cpuQuantizer(opt.dim);
236  faiss::IndexIVFFlat cpuIndex(&cpuQuantizer,
237  opt.dim,
238  opt.numCentroids,
239  faiss::METRIC_L2);
240  cpuIndex.nprobe = opt.nprobe;
241  cpuIndex.train(opt.numTrain, trainVecs.data());
242  cpuIndex.add(opt.numAdd, addVecs.data());
243 
244  // use garbage values to see if we overwrite then
246  res.noTempMemory();
247 
249  config.device = opt.device;
250  config.indicesOptions = opt.indicesOpt;
251  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
252  config.useFloat16IVFStorage = useFloat16;
253 
254  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
255  1,
256  1,
257  faiss::METRIC_L2,
258  config);
259  gpuIndex.setNumProbes(1);
260 
261  gpuIndex.copyFrom(&cpuIndex);
262 
263  EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
264  EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
265 
266  EXPECT_EQ(cpuIndex.d, gpuIndex.d);
267  EXPECT_EQ(cpuIndex.d, opt.dim);
268  EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
269  EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
270 
271  // Query both objects; results should be equivalent
272  bool compFloat16 = useFloat16CoarseQuantizer || useFloat16;
273  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
274  opt.numQuery, opt.dim, opt.k, opt.toString(),
275  compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
276  compFloat16 ? 0.70f : 0.1f,
277  compFloat16 ? 0.30f : 0.015f);
278 }
279 
280 TEST(TestGpuIndexIVFFlat, Float32_32_Add_L2) {
281  addTest(faiss::METRIC_L2, false, false);
282 }
283 
284 TEST(TestGpuIndexIVFFlat, Float32_32_Add_IP) {
285  addTest(faiss::METRIC_INNER_PRODUCT, false, false);
286 }
287 
288 TEST(TestGpuIndexIVFFlat, Float32_16_Add_L2) {
289  addTest(faiss::METRIC_L2, false, true);
290 }
291 
292 TEST(TestGpuIndexIVFFlat, Float32_16_Add_IP) {
293  addTest(faiss::METRIC_INNER_PRODUCT, false, true);
294 }
295 
296 TEST(TestGpuIndexIVFFlat, Float16_32_Add_L2) {
297  addTest(faiss::METRIC_L2, true, false);
298 }
299 
300 TEST(TestGpuIndexIVFFlat, Float16_32_Add_IP) {
301  addTest(faiss::METRIC_INNER_PRODUCT, true, false);
302 }
303 
304 //
305 // General query tests
306 //
307 
308 TEST(TestGpuIndexIVFFlat, Float32_Query_L2) {
309  queryTest(faiss::METRIC_L2, false, false);
310 }
311 
312 TEST(TestGpuIndexIVFFlat, Float32_Query_IP) {
313  queryTest(faiss::METRIC_INNER_PRODUCT, false, false);
314 }
315 
316 TEST(TestGpuIndexIVFFlat, Float16_Query_L2) {
317  queryTest(faiss::METRIC_L2, false, true);
318 }
319 
320 TEST(TestGpuIndexIVFFlat, Float16_Query_IP) {
321  queryTest(faiss::METRIC_INNER_PRODUCT, false, true);
322 }
323 
324 // float16 coarse quantizer
325 
326 TEST(TestGpuIndexIVFFlat, Float16_32_Query_L2) {
327  queryTest(faiss::METRIC_L2, true, false);
328 }
329 
330 TEST(TestGpuIndexIVFFlat, Float16_32_Query_IP) {
331  queryTest(faiss::METRIC_INNER_PRODUCT, true, false);
332 }
333 
334 //
335 // There are IVF list scanning specializations for 64-d and 128-d that we
336 // make sure we explicitly test here
337 //
338 
339 TEST(TestGpuIndexIVFFlat, Float32_Query_L2_64) {
340  queryTest(faiss::METRIC_L2, false, false, 64);
341 }
342 
343 TEST(TestGpuIndexIVFFlat, Float32_Query_IP_64) {
344  queryTest(faiss::METRIC_INNER_PRODUCT, false, false, 64);
345 }
346 
347 TEST(TestGpuIndexIVFFlat, Float16_Query_L2_64) {
348  queryTest(faiss::METRIC_L2, false, true, 64);
349 }
350 
351 TEST(TestGpuIndexIVFFlat, Float16_Query_IP_64) {
352  queryTest(faiss::METRIC_INNER_PRODUCT, false, true, 64);
353 }
354 
355 TEST(TestGpuIndexIVFFlat, Float32_Query_L2_128) {
356  queryTest(faiss::METRIC_L2, false, false, 128);
357 }
358 
359 TEST(TestGpuIndexIVFFlat, Float32_Query_IP_128) {
360  queryTest(faiss::METRIC_INNER_PRODUCT, false, false, 128);
361 }
362 
363 TEST(TestGpuIndexIVFFlat, Float16_Query_L2_128) {
364  queryTest(faiss::METRIC_L2, false, true, 128);
365 }
366 
367 TEST(TestGpuIndexIVFFlat, Float16_Query_IP_128) {
368  queryTest(faiss::METRIC_INNER_PRODUCT, false, true, 128);
369 }
370 
371 // For 256-d, only float16 is specialized
372 
373 TEST(TestGpuIndexIVFFlat, Float16_Query_L2_256) {
374  queryTest(faiss::METRIC_L2, false, true, 256);
375 }
376 
377 TEST(TestGpuIndexIVFFlat, Float16_Query_IP_256) {
378  queryTest(faiss::METRIC_INNER_PRODUCT, false, true, 256);
379 }
380 
381 //
382 // Copy tests
383 //
384 
385 TEST(TestGpuIndexIVFFlat, Float32_16_CopyTo) {
386  copyToTest(false, true);
387 }
388 
389 TEST(TestGpuIndexIVFFlat, Float32_32_CopyTo) {
390  copyToTest(false, false);
391 }
392 
393 //
394 // NaN tests
395 //
396 
397 TEST(TestGpuIndexIVFFlat, QueryNaN) {
398  faiss::gpu::newTestSeed();
399 
400  Options opt;
401 
402  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
403  std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
404 
406  res.noTempMemory();
407 
409  config.device = opt.device;
410  config.indicesOptions = opt.indicesOpt;
411  config.flatConfig.useFloat16 = faiss::gpu::randBool();
412  config.useFloat16IVFStorage = faiss::gpu::randBool();
413 
414  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
415  opt.dim,
416  opt.numCentroids,
417  faiss::METRIC_L2,
418  config);
419  gpuIndex.setNumProbes(opt.nprobe);
420 
421  gpuIndex.train(opt.numTrain, trainVecs.data());
422  gpuIndex.add(opt.numAdd, addVecs.data());
423 
424  int numQuery = 10;
425  std::vector<float> nans(numQuery * opt.dim,
426  std::numeric_limits<float>::quiet_NaN());
427 
428  std::vector<float> distances(numQuery * opt.k, 0);
429  std::vector<faiss::Index::idx_t> indices(numQuery * opt.k, 0);
430 
431  gpuIndex.search(numQuery,
432  nans.data(),
433  opt.k,
434  distances.data(),
435  indices.data());
436 
437  for (int q = 0; q < numQuery; ++q) {
438  for (int k = 0; k < opt.k; ++k) {
439  EXPECT_EQ(indices[q * opt.k + k], -1);
440  EXPECT_EQ(distances[q * opt.k + k], std::numeric_limits<float>::max());
441  }
442  }
443 }
444 
445 TEST(TestGpuIndexIVFFlat, AddNaN) {
446  faiss::gpu::newTestSeed();
447 
448  Options opt;
449 
451  res.noTempMemory();
452 
454  config.device = opt.device;
455  config.indicesOptions = opt.indicesOpt;
456  config.flatConfig.useFloat16 = faiss::gpu::randBool();
457  config.useFloat16IVFStorage = faiss::gpu::randBool();
458 
459  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
460  opt.dim,
461  opt.numCentroids,
462  faiss::METRIC_L2,
463  config);
464  gpuIndex.setNumProbes(opt.nprobe);
465 
466  int numNans = 10;
467  std::vector<float> nans(numNans * opt.dim,
468  std::numeric_limits<float>::quiet_NaN());
469 
470  // Make one vector valid, which should actually add
471  for (int i = 0; i < opt.dim; ++i) {
472  nans[i] = 0.0f;
473  }
474 
475  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
476  gpuIndex.train(opt.numTrain, trainVecs.data());
477 
478  // should not crash
479  EXPECT_EQ(gpuIndex.ntotal, 0);
480  gpuIndex.add(numNans, nans.data());
481 
482  // Only the single valid vector should have added
483  EXPECT_EQ(gpuIndex.ntotal, 1);
484 
485  std::vector<float> queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
486  std::vector<float> distance(opt.numQuery * opt.k, 0);
487  std::vector<faiss::Index::idx_t> indices(opt.numQuery * opt.k, 0);
488 
489  // should not crash
490  gpuIndex.search(opt.numQuery, queryVecs.data(), opt.k,
491  distance.data(), indices.data());
492 
493 }
494 
495 TEST(TestGpuIndexIVFFlat, UnifiedMemory) {
496  // Construct on a random device to test multi-device, if we have
497  // multiple devices
498  int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
499 
500  if (!faiss::gpu::getFullUnifiedMemSupport(device)) {
501  return;
502  }
503 
504  int dim = 256;
505 
506  int numCentroids = 1024;
507  // 24 GB of vecs should be enough to test unified memory in
508  // oversubscription mode
509  size_t numAdd =
510  (size_t) 1024 * 1024 * 1024 * 24 / ((size_t) dim * sizeof(float));
511  size_t numTrain = numCentroids * 40;
512  int numQuery = 10;
513  int k = 10;
514  int nprobe = 8;
515 
516  LOG(INFO) << "generating vecs";
517  std::vector<float> trainVecs = faiss::gpu::randVecs(numTrain, dim);
518  std::vector<float> addVecs = faiss::gpu::randVecs(numAdd, dim);
519 
520  LOG(INFO) << "train CPU";
521  faiss::IndexFlatL2 quantizer(dim);
522  faiss::IndexIVFFlat cpuIndex(&quantizer, dim, numCentroids, faiss::METRIC_L2);
523  LOG(INFO) << "train CPU";
524  cpuIndex.train(numTrain, trainVecs.data());
525  LOG(INFO) << "add CPU";
526  cpuIndex.add(numAdd, addVecs.data());
527  cpuIndex.nprobe = nprobe;
528 
530  res.noTempMemory();
531 
533  config.device = device;
534  config.memorySpace = faiss::gpu::MemorySpace::Unified;
535 
536  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
537  dim,
538  numCentroids,
539  faiss::METRIC_L2,
540  config);
541  LOG(INFO) << "copy from CPU";
542  gpuIndex.copyFrom(&cpuIndex);
543  gpuIndex.setNumProbes(nprobe);
544 
545  LOG(INFO) << "compare";
546 
547  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
548  numQuery, dim, k, "Unified Memory",
549  kF32MaxRelErr,
550  0.1f,
551  0.015f);
552 }
bool useFloat16
Whether or not data is stored as float16.
Definition: GpuIndexFlat.h:35
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:27
GpuIndexFlatConfig flatConfig
Configuration for the coarse quantizer object.
Definition: GpuIndexIVF.h:34
MemorySpace memorySpace
Definition: GpuIndex.h:32
IndicesOptions indicesOptions
Index storage options for the GPU.
Definition: GpuIndexIVF.h:31
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:43