Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
TestGpuIndexFlat.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 
10 #include "../../IndexFlat.h"
11 #include "../GpuIndexFlat.h"
12 #include "../StandardGpuResources.h"
13 #include "../utils/DeviceUtils.h"
14 #include "../test/TestUtils.h"
15 #include <gtest/gtest.h>
16 #include <sstream>
17 #include <vector>
18 
19 // FIXME: figure out a better way to test fp16
20 constexpr float kF16MaxRelErr = 0.07f;
21 constexpr float kF32MaxRelErr = 6e-3f;
22 
25  : useL2(true),
26  useFloat16(false),
27  useTransposed(false),
28  numVecsOverride(-1),
29  numQueriesOverride(-1),
30  kOverride(-1) {
31  }
32 
33  bool useL2;
34  bool useFloat16;
35  bool useTransposed;
36  int numVecsOverride;
37  int numQueriesOverride;
38  int kOverride;
39 };
40 
41 void testFlat(const TestFlatOptions& opt) {
42  int numVecs = opt.numVecsOverride > 0 ?
43  opt.numVecsOverride : faiss::gpu::randVal(1000, 20000);
44  int dim = faiss::gpu::randVal(50, 800);
45  int numQuery = opt.numQueriesOverride > 0 ?
46  opt.numQueriesOverride : faiss::gpu::randVal(1, 512);
47 
48  // Due to loss of precision in a float16 accumulator, for large k,
49  // the number of differences is pretty huge. Restrict ourselves to a
50  // fairly small `k` for float16
51  int k = opt.useFloat16 ?
52  std::min(faiss::gpu::randVal(1, 50), numVecs) :
53  std::min(faiss::gpu::randVal(1, 1024), numVecs);
54  if (opt.kOverride > 0) {
55  k = opt.kOverride;
56  }
57 
58  faiss::IndexFlatIP cpuIndexIP(dim);
59  faiss::IndexFlatL2 cpuIndexL2(dim);
60 
61  faiss::IndexFlat* cpuIndex =
62  opt.useL2 ? (faiss::IndexFlat*) &cpuIndexL2 :
63  (faiss::IndexFlat*) &cpuIndexIP;
64 
65  // Construct on a random device to test multi-device, if we have
66  // multiple devices
67  int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
68 
70  res.noTempMemory();
71 
72 
74  config.device = device;
75  config.useFloat16 = opt.useFloat16;
76  config.storeTransposed = opt.useTransposed;
77 
78  faiss::gpu::GpuIndexFlatIP gpuIndexIP(&res, dim, config);
79  faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);
80 
81  faiss::gpu::GpuIndexFlat* gpuIndex =
82  opt.useL2 ? (faiss::gpu::GpuIndexFlat*) &gpuIndexL2 :
83  (faiss::gpu::GpuIndexFlat*) &gpuIndexIP;
84 
85  std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
86  cpuIndex->add(numVecs, vecs.data());
87  gpuIndex->add(numVecs, vecs.data());
88 
89  std::stringstream str;
90  str << (opt.useL2 ? "L2" : "IP") << " numVecs " << numVecs
91  << " dim " << dim
92  << " useFloat16 " << opt.useFloat16
93  << " transposed " << opt.useTransposed
94  << " numQuery " << numQuery
95  << " k " << k;
96 
97  // To some extent, we depend upon the relative error for the test
98  // for float16
99  faiss::gpu::compareIndices(*cpuIndex, *gpuIndex, numQuery, dim, k, str.str(),
100  opt.useFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
101  // FIXME: the fp16 bounds are
102  // useless when math (the accumulator) is
103  // in fp16. Figure out another way to test
104  opt.useFloat16 ? 0.99f : 0.1f,
105  opt.useFloat16 ? 0.65f : 0.015f);
106 }
107 
108 TEST(TestGpuIndexFlat, IP_Float32) {
109  for (int tries = 0; tries < 5; ++tries) {
110  TestFlatOptions opt;
111  opt.useL2 = false;
112  opt.useFloat16 = false;
113  opt.useTransposed = false;
114 
115  testFlat(opt);
116 
117  opt.useTransposed = true;
118  testFlat(opt);
119  }
120 }
121 
122 TEST(TestGpuIndexFlat, L2_Float32) {
123  for (int tries = 0; tries < 5; ++tries) {
124  TestFlatOptions opt;
125  opt.useL2 = true;
126  opt.useFloat16 = false;
127  opt.useTransposed = false;
128 
129  testFlat(opt);
130 
131  opt.useTransposed = true;
132  testFlat(opt);
133  }
134 }
135 
136 // test specialized k == 1 codepath
137 TEST(TestGpuIndexFlat, L2_Float32_K1) {
138  for (int tries = 0; tries < 5; ++tries) {
139  TestFlatOptions opt;
140  opt.useL2 = true;
141  opt.useFloat16 = false;
142  opt.useTransposed = false;
143  opt.kOverride = 1;
144 
145  testFlat(opt);
146  }
147 }
148 
149 TEST(TestGpuIndexFlat, IP_Float16) {
150  for (int tries = 0; tries < 5; ++tries) {
151  TestFlatOptions opt;
152  opt.useL2 = false;
153  opt.useFloat16 = true;
154  opt.useTransposed = false;
155 
156  testFlat(opt);
157 
158  opt.useTransposed = true;
159  testFlat(opt);
160  }
161 }
162 
163 TEST(TestGpuIndexFlat, L2_Float16) {
164  for (int tries = 0; tries < 5; ++tries) {
165  TestFlatOptions opt;
166  opt.useL2 = true;
167  opt.useFloat16 = true;
168  opt.useTransposed = false;
169 
170  testFlat(opt);
171 
172  opt.useTransposed = true;
173  testFlat(opt);
174  }
175 }
176 
177 // test specialized k == 1 codepath
178 TEST(TestGpuIndexFlat, L2_Float16_K1) {
179  for (int tries = 0; tries < 5; ++tries) {
180  TestFlatOptions opt;
181  opt.useL2 = true;
182  opt.useFloat16 = true;
183  opt.useTransposed = false;
184  opt.kOverride = 1;
185 
186  testFlat(opt);
187  }
188 }
189 
190 // test tiling along a huge vector set
191 TEST(TestGpuIndexFlat, L2_Tiling) {
192  for (int tries = 0; tries < 2; ++tries) {
193  TestFlatOptions opt;
194  opt.useL2 = true;
195  opt.useFloat16 = false;
196  opt.useTransposed = false;
197  opt.numVecsOverride = 1000000;
198  opt.numQueriesOverride = 4;
199 
200  testFlat(opt);
201 
202  opt.useTransposed = true;
203  testFlat(opt);
204  }
205 }
206 
207 TEST(TestGpuIndexFlat, QueryEmpty) {
209  res.noTempMemory();
210 
212  config.device = 0;
213  config.useFloat16 = false;
214  config.storeTransposed = false;
215 
216  int dim = 128;
217  faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
218 
219  // Querying an empty index should not blow up, and just return
220  // (FLT_MAX, -1)
221  int numQuery = 10;
222  int k = 50;
223  std::vector<float> queries(numQuery * dim, 1.0f);
224 
225  std::vector<float> dist(numQuery * k, 0);
226  std::vector<faiss::Index::idx_t> ind(numQuery * k);
227 
228  gpuIndex.search(numQuery, queries.data(), k, dist.data(), ind.data());
229 
230  for (auto d : dist) {
231  EXPECT_EQ(d, std::numeric_limits<float>::max());
232  }
233 
234  for (auto i : ind) {
235  EXPECT_EQ(i, -1);
236  }
237 }
238 
239 TEST(TestGpuIndexFlat, CopyFrom) {
240  int numVecs = faiss::gpu::randVal(100, 200);
241  int dim = faiss::gpu::randVal(1, 1000);
242 
243  faiss::IndexFlatL2 cpuIndex(dim);
244 
245  std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
246  cpuIndex.add(numVecs, vecs.data());
247 
249  res.noTempMemory();
250 
251  // Fill with garbage values
252  int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
253 
255  config.device = 0;
256  config.useFloat16 = false;
257  config.storeTransposed = false;
258 
259  faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, 2000, config);
260  gpuIndex.copyFrom(&cpuIndex);
261 
262  EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
263  EXPECT_EQ(gpuIndex.ntotal, numVecs);
264 
265  EXPECT_EQ(cpuIndex.d, gpuIndex.d);
266  EXPECT_EQ(cpuIndex.d, dim);
267 
268  int idx = faiss::gpu::randVal(0, numVecs - 1);
269 
270  std::vector<float> gpuVals(dim);
271  gpuIndex.reconstruct(idx, gpuVals.data());
272 
273  std::vector<float> cpuVals(dim);
274  cpuIndex.reconstruct(idx, cpuVals.data());
275 
276  EXPECT_EQ(gpuVals, cpuVals);
277 }
278 
279 TEST(TestGpuIndexFlat, CopyTo) {
281  res.noTempMemory();
282 
283  int numVecs = faiss::gpu::randVal(100, 200);
284  int dim = faiss::gpu::randVal(1, 1000);
285 
286  int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
287 
289  config.device = device;
290  config.useFloat16 = false;
291  config.storeTransposed = false;
292 
293  faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
294 
295  std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
296  gpuIndex.add(numVecs, vecs.data());
297 
298  // Fill with garbage values
299  faiss::IndexFlatL2 cpuIndex(2000);
300  gpuIndex.copyTo(&cpuIndex);
301 
302  EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
303  EXPECT_EQ(gpuIndex.ntotal, numVecs);
304 
305  EXPECT_EQ(cpuIndex.d, gpuIndex.d);
306  EXPECT_EQ(cpuIndex.d, dim);
307 
308  int idx = faiss::gpu::randVal(0, numVecs - 1);
309 
310  std::vector<float> gpuVals(dim);
311  gpuIndex.reconstruct(idx, gpuVals.data());
312 
313  std::vector<float> cpuVals(dim);
314  cpuIndex.reconstruct(idx, cpuVals.data());
315 
316  EXPECT_EQ(gpuVals, cpuVals);
317 }
318 
319 TEST(TestGpuIndexFlat, UnifiedMemory) {
320  // Construct on a random device to test multi-device, if we have
321  // multiple devices
322  int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
323 
324  if (!faiss::gpu::getFullUnifiedMemSupport(device)) {
325  return;
326  }
327 
328  int dim = 256;
329 
330  // FIXME: GpuIndexFlat doesn't support > 2^31 (vecs * dims) due to
331  // kernel indexing, so we can't test unified memory for memory
332  // oversubscription.
333  size_t numVecs = 50000;
334  int numQuery = 10;
335  int k = 10;
336 
337  faiss::IndexFlatL2 cpuIndexL2(dim);
338 
340  res.noTempMemory();
341 
343  config.device = device;
344  config.memorySpace = faiss::gpu::MemorySpace::Unified;
345 
346  faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);
347 
348  std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
349  cpuIndexL2.add(numVecs, vecs.data());
350  gpuIndexL2.add(numVecs, vecs.data());
351 
352  // To some extent, we depend upon the relative error for the test
353  // for float16
354  faiss::gpu::compareIndices(cpuIndexL2, gpuIndexL2,
355  numQuery, dim, k, "Unified Memory",
356  kF32MaxRelErr,
357  0.1f,
358  0.015f);
359 }
360 
361 int main(int argc, char** argv) {
362  testing::InitGoogleTest(&argc, argv);
363 
364  // just run with a fixed test seed
365  faiss::gpu::setTestSeed(100);
366 
367  return RUN_ALL_TESTS();
368 }
void copyTo(faiss::IndexFlat *index) const
void reconstruct(idx_t key, float *recons) const override
Definition: IndexFlat.cpp:119
bool useFloat16
Whether or not data is stored as float16.
Definition: GpuIndexFlat.h:34
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:26
int d
vector dimension
Definition: Index.h:66
MemorySpace memorySpace
Definition: GpuIndex.h:31
void reconstruct(faiss::Index::idx_t key, float *out) const override
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
void add(idx_t n, const float *x) override
Definition: IndexFlat.cpp:31
void copyFrom(const faiss::IndexFlat *index)
Definition: GpuIndexFlat.cu:87
void add(faiss::Index::idx_t, const float *x) override
Overrides to avoid excessive copies.
void search(faiss::Index::idx_t n, const float *x, faiss::Index::idx_t k, float *distances, faiss::Index::idx_t *labels) const override