Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
TestGpuIndexFlat.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #include "../../IndexFlat.h"
12 #include "../GpuIndexFlat.h"
13 #include "../StandardGpuResources.h"
14 #include "../utils/DeviceUtils.h"
15 #include "../test/TestUtils.h"
16 #include <gtest/gtest.h>
17 #include <sstream>
18 #include <vector>
19 
20 // FIXME: figure out a better way to test fp16
21 constexpr float kF16MaxRelErr = 0.07f;
22 constexpr float kF32MaxRelErr = 6e-3f;
23 
26  : useL2(true),
27  useFloat16(false),
28  useTransposed(false),
29  numVecsOverride(-1),
30  numQueriesOverride(-1),
31  kOverride(-1) {
32  }
33 
34  bool useL2;
35  bool useFloat16;
36  bool useTransposed;
37  int numVecsOverride;
38  int numQueriesOverride;
39  int kOverride;
40 };
41 
42 void testFlat(const TestFlatOptions& opt) {
43  int numVecs = opt.numVecsOverride > 0 ?
44  opt.numVecsOverride : faiss::gpu::randVal(1000, 20000);
45  int dim = faiss::gpu::randVal(50, 800);
46  int numQuery = opt.numQueriesOverride > 0 ?
47  opt.numQueriesOverride : faiss::gpu::randVal(1, 512);
48 
49  // Due to loss of precision in a float16 accumulator, for large k,
50  // the number of differences is pretty huge. Restrict ourselves to a
51  // fairly small `k` for float16
52  int k = opt.useFloat16 ?
53  std::min(faiss::gpu::randVal(1, 50), numVecs) :
54  std::min(faiss::gpu::randVal(1, 1024), numVecs);
55  if (opt.kOverride > 0) {
56  k = opt.kOverride;
57  }
58 
59  faiss::IndexFlatIP cpuIndexIP(dim);
60  faiss::IndexFlatL2 cpuIndexL2(dim);
61 
62  faiss::IndexFlat* cpuIndex =
63  opt.useL2 ? (faiss::IndexFlat*) &cpuIndexL2 :
64  (faiss::IndexFlat*) &cpuIndexIP;
65 
66  // Construct on a random device to test multi-device, if we have
67  // multiple devices
68  int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
69 
71  res.noTempMemory();
72 
73 
75  config.device = device;
76  config.useFloat16 = opt.useFloat16;
77  config.storeTransposed = opt.useTransposed;
78 
79  faiss::gpu::GpuIndexFlatIP gpuIndexIP(&res, dim, config);
80  faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);
81 
82  faiss::gpu::GpuIndexFlat* gpuIndex =
83  opt.useL2 ? (faiss::gpu::GpuIndexFlat*) &gpuIndexL2 :
84  (faiss::gpu::GpuIndexFlat*) &gpuIndexIP;
85 
86  std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
87  cpuIndex->add(numVecs, vecs.data());
88  gpuIndex->add(numVecs, vecs.data());
89 
90  std::stringstream str;
91  str << (opt.useL2 ? "L2" : "IP") << " numVecs " << numVecs
92  << " dim " << dim
93  << " useFloat16 " << opt.useFloat16
94  << " transposed " << opt.useTransposed
95  << " numQuery " << numQuery
96  << " k " << k;
97 
98  // To some extent, we depend upon the relative error for the test
99  // for float16
100  faiss::gpu::compareIndices(*cpuIndex, *gpuIndex, numQuery, dim, k, str.str(),
101  opt.useFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
102  // FIXME: the fp16 bounds are
103  // useless when math (the accumulator) is
104  // in fp16. Figure out another way to test
105  opt.useFloat16 ? 0.99f : 0.1f,
106  opt.useFloat16 ? 0.65f : 0.015f);
107 }
108 
109 TEST(TestGpuIndexFlat, IP_Float32) {
110  for (int tries = 0; tries < 5; ++tries) {
111  faiss::gpu::newTestSeed();
112 
113  TestFlatOptions opt;
114  opt.useL2 = false;
115  opt.useFloat16 = false;
116  opt.useTransposed = false;
117 
118  testFlat(opt);
119 
120  opt.useTransposed = true;
121  testFlat(opt);
122  }
123 }
124 
125 TEST(TestGpuIndexFlat, L2_Float32) {
126  for (int tries = 0; tries < 5; ++tries) {
127  faiss::gpu::newTestSeed();
128 
129  TestFlatOptions opt;
130  opt.useL2 = true;
131  opt.useFloat16 = false;
132  opt.useTransposed = false;
133 
134  testFlat(opt);
135 
136  opt.useTransposed = true;
137  testFlat(opt);
138  }
139 }
140 
141 // test specialized k == 1 codepath
142 TEST(TestGpuIndexFlat, L2_Float32_K1) {
143  for (int tries = 0; tries < 5; ++tries) {
144  faiss::gpu::newTestSeed();
145 
146  TestFlatOptions opt;
147  opt.useL2 = true;
148  opt.useFloat16 = false;
149  opt.useTransposed = false;
150  opt.kOverride = 1;
151 
152  testFlat(opt);
153  }
154 }
155 
156 TEST(TestGpuIndexFlat, IP_Float16) {
157  for (int tries = 0; tries < 5; ++tries) {
158  faiss::gpu::newTestSeed();
159 
160  TestFlatOptions opt;
161  opt.useL2 = false;
162  opt.useFloat16 = true;
163  opt.useTransposed = false;
164 
165  testFlat(opt);
166 
167  opt.useTransposed = true;
168  testFlat(opt);
169  }
170 }
171 
172 TEST(TestGpuIndexFlat, L2_Float16) {
173  for (int tries = 0; tries < 5; ++tries) {
174  faiss::gpu::newTestSeed();
175 
176  TestFlatOptions opt;
177  opt.useL2 = true;
178  opt.useFloat16 = true;
179  opt.useTransposed = false;
180 
181  testFlat(opt);
182 
183  opt.useTransposed = true;
184  testFlat(opt);
185  }
186 }
187 
188 // test specialized k == 1 codepath
189 TEST(TestGpuIndexFlat, L2_Float16_K1) {
190  for (int tries = 0; tries < 5; ++tries) {
191  faiss::gpu::newTestSeed();
192 
193  TestFlatOptions opt;
194  opt.useL2 = true;
195  opt.useFloat16 = true;
196  opt.useTransposed = false;
197  opt.kOverride = 1;
198 
199  testFlat(opt);
200  }
201 }
202 
203 // test tiling along a huge vector set
204 TEST(TestGpuIndexFlat, L2_Tiling) {
205  for (int tries = 0; tries < 3; ++tries) {
206  faiss::gpu::newTestSeed();
207 
208  TestFlatOptions opt;
209  opt.useL2 = true;
210  opt.useFloat16 = false;
211  opt.useTransposed = false;
212  opt.numVecsOverride = 1000000;
213  opt.numQueriesOverride = 8;
214 
215  testFlat(opt);
216 
217  opt.useTransposed = true;
218  testFlat(opt);
219  }
220 }
221 
222 TEST(TestGpuIndexFlat, QueryEmpty) {
224  res.noTempMemory();
225 
227  config.device = 0;
228  config.useFloat16 = false;
229  config.storeTransposed = false;
230 
231  int dim = 128;
232  faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
233 
234  // Querying an empty index should not blow up, and just return
235  // (FLT_MAX, -1)
236  int numQuery = 10;
237  int k = 50;
238  std::vector<float> queries(numQuery * dim, 1.0f);
239 
240  std::vector<float> dist(numQuery * k, 0);
241  std::vector<faiss::Index::idx_t> ind(numQuery * k);
242 
243  gpuIndex.search(numQuery, queries.data(), k, dist.data(), ind.data());
244 
245  for (auto d : dist) {
246  EXPECT_EQ(d, std::numeric_limits<float>::max());
247  }
248 
249  for (auto i : ind) {
250  EXPECT_EQ(i, -1);
251  }
252 }
253 
254 TEST(TestGpuIndexFlat, CopyFrom) {
255  faiss::gpu::newTestSeed();
256 
257  int numVecs = faiss::gpu::randVal(100, 200);
258  int dim = faiss::gpu::randVal(1, 1000);
259 
260  faiss::IndexFlatL2 cpuIndex(dim);
261 
262  std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
263  cpuIndex.add(numVecs, vecs.data());
264 
266  res.noTempMemory();
267 
268  // Fill with garbage values
269  int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
270 
272  config.device = 0;
273  config.useFloat16 = false;
274  config.storeTransposed = false;
275 
276  faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, 2000, config);
277  gpuIndex.copyFrom(&cpuIndex);
278 
279  EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
280  EXPECT_EQ(gpuIndex.ntotal, numVecs);
281 
282  EXPECT_EQ(cpuIndex.d, gpuIndex.d);
283  EXPECT_EQ(cpuIndex.d, dim);
284 
285  int idx = faiss::gpu::randVal(0, numVecs - 1);
286 
287  std::vector<float> gpuVals(dim);
288  gpuIndex.reconstruct(idx, gpuVals.data());
289 
290  std::vector<float> cpuVals(dim);
291  cpuIndex.reconstruct(idx, cpuVals.data());
292 
293  EXPECT_EQ(gpuVals, cpuVals);
294 }
295 
296 TEST(TestGpuIndexFlat, CopyTo) {
297  faiss::gpu::newTestSeed();
298 
300  res.noTempMemory();
301 
302  int numVecs = faiss::gpu::randVal(100, 200);
303  int dim = faiss::gpu::randVal(1, 1000);
304 
305  int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
306 
308  config.device = device;
309  config.useFloat16 = false;
310  config.storeTransposed = false;
311 
312  faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
313 
314  std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
315  gpuIndex.add(numVecs, vecs.data());
316 
317  // Fill with garbage values
318  faiss::IndexFlatL2 cpuIndex(2000);
319  gpuIndex.copyTo(&cpuIndex);
320 
321  EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
322  EXPECT_EQ(gpuIndex.ntotal, numVecs);
323 
324  EXPECT_EQ(cpuIndex.d, gpuIndex.d);
325  EXPECT_EQ(cpuIndex.d, dim);
326 
327  int idx = faiss::gpu::randVal(0, numVecs - 1);
328 
329  std::vector<float> gpuVals(dim);
330  gpuIndex.reconstruct(idx, gpuVals.data());
331 
332  std::vector<float> cpuVals(dim);
333  cpuIndex.reconstruct(idx, cpuVals.data());
334 
335  EXPECT_EQ(gpuVals, cpuVals);
336 }
337 
338 TEST(TestGpuIndexFlat, UnifiedMemory) {
339  // Construct on a random device to test multi-device, if we have
340  // multiple devices
341  int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
342 
343  if (!faiss::gpu::getFullUnifiedMemSupport(device)) {
344  return;
345  }
346 
347  int dim = 256;
348 
349  // FIXME: GpuIndexFlat doesn't support > 2^31 (vecs * dims) due to
350  // kernel indexing, so we can't test unified memory for memory
351  // oversubscription.
352  size_t numVecs = 50000;
353  int numQuery = 10;
354  int k = 10;
355 
356  faiss::IndexFlatL2 cpuIndexL2(dim);
357 
359  res.noTempMemory();
360 
362  config.device = device;
363  config.memorySpace = faiss::gpu::MemorySpace::Unified;
364 
365  faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);
366 
367  std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
368  cpuIndexL2.add(numVecs, vecs.data());
369  gpuIndexL2.add(numVecs, vecs.data());
370 
371  // To some extent, we depend upon the relative error for the test
372  // for float16
373  faiss::gpu::compareIndices(cpuIndexL2, gpuIndexL2,
374  numQuery, dim, k, "Unified Memory",
375  kF32MaxRelErr,
376  0.1f,
377  0.015f);
378 }
void copyTo(faiss::IndexFlat *index) const
void reconstruct(idx_t key, float *recons) const override
Definition: IndexFlat.cpp:118
bool useFloat16
Whether or not data is stored as float16.
Definition: GpuIndexFlat.h:35
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:27
int d
vector dimension
Definition: Index.h:64
MemorySpace memorySpace
Definition: GpuIndex.h:32
void reconstruct(faiss::Index::idx_t key, float *out) const override
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:65
void add(idx_t n, const float *x) override
Definition: IndexFlat.cpp:30
void copyFrom(const faiss::IndexFlat *index)
Definition: GpuIndexFlat.cu:87
void add(faiss::Index::idx_t, const float *x) override
Overrides to avoid excessive copies.
void search(faiss::Index::idx_t n, const float *x, faiss::Index::idx_t k, float *distances, faiss::Index::idx_t *labels) const override