11 #include "../../IndexFlat.h"
12 #include "../../IndexIVF.h"
13 #include "../GpuIndexIVFFlat.h"
14 #include "../StandardGpuResources.h"
15 #include "../utils/DeviceUtils.h"
16 #include "../test/TestUtils.h"
17 #include <gtest/gtest.h>
18 #include <glog/logging.h>
23 constexpr
float kF16MaxRelErr = 0.3f;
24 constexpr
float kF32MaxRelErr = 0.03f;
29 numAdd = faiss::gpu::randVal(4000, 20000);
30 dim = faiss::gpu::randVal(64, 200);
32 numCentroids = std::sqrt((
float) numAdd);
33 numTrain = numCentroids * 40;
34 nprobe = faiss::gpu::randVal(10, numCentroids);
35 numQuery = faiss::gpu::randVal(32, 100);
36 k = std::min(faiss::gpu::randVal(10, 30), numAdd / 40);
37 indicesOpt = faiss::gpu::randSelect({
38 faiss::gpu::INDICES_CPU,
39 faiss::gpu::INDICES_32_BIT,
40 faiss::gpu::INDICES_64_BIT});
42 device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
45 std::string toString()
const {
46 std::stringstream str;
47 str <<
"IVFFlat device " << device
48 <<
" numVecs " << numAdd
50 <<
" numCentroids " << numCentroids
51 <<
" nprobe " << nprobe
52 <<
" numQuery " << numQuery
54 <<
" indicesOpt " << indicesOpt;
67 faiss::gpu::IndicesOptions indicesOpt;
71 bool useFloat16CoarseQuantizer,
73 int dimOverride = -1) {
74 for (
int tries = 0; tries < 3; ++tries) {
75 faiss::gpu::newTestSeed();
78 opt.dim = dimOverride != -1 ? dimOverride : opt.dim;
80 std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
81 std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
86 metricType == faiss::METRIC_L2 ?
90 opt.dim, opt.numCentroids, metricType);
91 cpuIndex.train(opt.numTrain, trainVecs.data());
92 cpuIndex.add(opt.numAdd, addVecs.data());
93 cpuIndex.nprobe = opt.nprobe;
99 config.
device = opt.device;
107 cpuIndex.metric_type,
109 gpuIndex.copyFrom(&cpuIndex);
110 gpuIndex.setNumProbes(opt.nprobe);
112 bool compFloat16 = useFloat16CoarseQuantizer || useFloat16;
113 faiss::gpu::compareIndices(cpuIndex, gpuIndex,
114 opt.numQuery, opt.dim, opt.k, opt.toString(),
115 compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
119 compFloat16 ? 0.99f : 0.1f,
120 compFloat16 ? 0.65f : 0.015f);
125 bool useFloat16CoarseQuantizer,
127 for (
int tries = 0; tries < 5; ++tries) {
128 faiss::gpu::newTestSeed();
132 std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
133 std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
138 metricType == faiss::METRIC_L2 ?
145 cpuIndex.train(opt.numTrain, trainVecs.data());
146 cpuIndex.nprobe = opt.nprobe;
152 config.
device = opt.device;
160 cpuIndex.metric_type,
162 gpuIndex.copyFrom(&cpuIndex);
163 gpuIndex.setNumProbes(opt.nprobe);
165 cpuIndex.add(opt.numAdd, addVecs.data());
166 gpuIndex.add(opt.numAdd, addVecs.data());
168 bool compFloat16 = useFloat16CoarseQuantizer || useFloat16;
169 faiss::gpu::compareIndices(cpuIndex, gpuIndex,
170 opt.numQuery, opt.dim, opt.k, opt.toString(),
171 compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
172 compFloat16 ? 0.70f : 0.1f,
173 compFloat16 ? 0.30f : 0.015f);
177 void copyToTest(
bool useFloat16CoarseQuantizer,
179 faiss::gpu::newTestSeed();
182 std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
183 std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
189 config.
device = opt.device;
199 gpuIndex.train(opt.numTrain, trainVecs.data());
200 gpuIndex.add(opt.numAdd, addVecs.data());
201 gpuIndex.setNumProbes(opt.nprobe);
208 gpuIndex.copyTo(&cpuIndex);
210 EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
211 EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
213 EXPECT_EQ(cpuIndex.d, gpuIndex.d);
214 EXPECT_EQ(cpuIndex.d, opt.dim);
215 EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
216 EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
219 bool compFloat16 = useFloat16CoarseQuantizer || useFloat16;
220 faiss::gpu::compareIndices(cpuIndex, gpuIndex,
221 opt.numQuery, opt.dim, opt.k, opt.toString(),
222 compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
223 compFloat16 ? 0.70f : 0.1f,
224 compFloat16 ? 0.30f : 0.015f);
227 void copyFromTest(
bool useFloat16CoarseQuantizer,
229 faiss::gpu::newTestSeed();
232 std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
233 std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
240 cpuIndex.nprobe = opt.nprobe;
241 cpuIndex.train(opt.numTrain, trainVecs.data());
242 cpuIndex.add(opt.numAdd, addVecs.data());
249 config.
device = opt.device;
259 gpuIndex.setNumProbes(1);
261 gpuIndex.copyFrom(&cpuIndex);
263 EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
264 EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
266 EXPECT_EQ(cpuIndex.d, gpuIndex.d);
267 EXPECT_EQ(cpuIndex.d, opt.dim);
268 EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
269 EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
272 bool compFloat16 = useFloat16CoarseQuantizer || useFloat16;
273 faiss::gpu::compareIndices(cpuIndex, gpuIndex,
274 opt.numQuery, opt.dim, opt.k, opt.toString(),
275 compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
276 compFloat16 ? 0.70f : 0.1f,
277 compFloat16 ? 0.30f : 0.015f);
280 TEST(TestGpuIndexIVFFlat, Float32_32_Add_L2) {
281 addTest(faiss::METRIC_L2,
false,
false);
284 TEST(TestGpuIndexIVFFlat, Float32_32_Add_IP) {
285 addTest(faiss::METRIC_INNER_PRODUCT,
false,
false);
288 TEST(TestGpuIndexIVFFlat, Float32_16_Add_L2) {
289 addTest(faiss::METRIC_L2,
false,
true);
292 TEST(TestGpuIndexIVFFlat, Float32_16_Add_IP) {
293 addTest(faiss::METRIC_INNER_PRODUCT,
false,
true);
296 TEST(TestGpuIndexIVFFlat, Float16_32_Add_L2) {
297 addTest(faiss::METRIC_L2,
true,
false);
300 TEST(TestGpuIndexIVFFlat, Float16_32_Add_IP) {
301 addTest(faiss::METRIC_INNER_PRODUCT,
true,
false);
308 TEST(TestGpuIndexIVFFlat, Float32_Query_L2) {
309 queryTest(faiss::METRIC_L2,
false,
false);
312 TEST(TestGpuIndexIVFFlat, Float32_Query_IP) {
313 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
false);
316 TEST(TestGpuIndexIVFFlat, Float16_Query_L2) {
317 queryTest(faiss::METRIC_L2,
false,
true);
320 TEST(TestGpuIndexIVFFlat, Float16_Query_IP) {
321 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
true);
326 TEST(TestGpuIndexIVFFlat, Float16_32_Query_L2) {
327 queryTest(faiss::METRIC_L2,
true,
false);
330 TEST(TestGpuIndexIVFFlat, Float16_32_Query_IP) {
331 queryTest(faiss::METRIC_INNER_PRODUCT,
true,
false);
339 TEST(TestGpuIndexIVFFlat, Float32_Query_L2_64) {
340 queryTest(faiss::METRIC_L2,
false,
false, 64);
343 TEST(TestGpuIndexIVFFlat, Float32_Query_IP_64) {
344 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
false, 64);
347 TEST(TestGpuIndexIVFFlat, Float16_Query_L2_64) {
348 queryTest(faiss::METRIC_L2,
false,
true, 64);
351 TEST(TestGpuIndexIVFFlat, Float16_Query_IP_64) {
352 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
true, 64);
355 TEST(TestGpuIndexIVFFlat, Float32_Query_L2_128) {
356 queryTest(faiss::METRIC_L2,
false,
false, 128);
359 TEST(TestGpuIndexIVFFlat, Float32_Query_IP_128) {
360 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
false, 128);
363 TEST(TestGpuIndexIVFFlat, Float16_Query_L2_128) {
364 queryTest(faiss::METRIC_L2,
false,
true, 128);
367 TEST(TestGpuIndexIVFFlat, Float16_Query_IP_128) {
368 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
true, 128);
373 TEST(TestGpuIndexIVFFlat, Float16_Query_L2_256) {
374 queryTest(faiss::METRIC_L2,
false,
true, 256);
377 TEST(TestGpuIndexIVFFlat, Float16_Query_IP_256) {
378 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
true, 256);
385 TEST(TestGpuIndexIVFFlat, Float32_16_CopyTo) {
386 copyToTest(
false,
true);
389 TEST(TestGpuIndexIVFFlat, Float32_32_CopyTo) {
390 copyToTest(
false,
false);
397 TEST(TestGpuIndexIVFFlat, QueryNaN) {
398 faiss::gpu::newTestSeed();
402 std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
403 std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
409 config.
device = opt.device;
419 gpuIndex.setNumProbes(opt.nprobe);
421 gpuIndex.train(opt.numTrain, trainVecs.data());
422 gpuIndex.add(opt.numAdd, addVecs.data());
425 std::vector<float> nans(numQuery * opt.dim,
426 std::numeric_limits<float>::quiet_NaN());
428 std::vector<float> distances(numQuery * opt.k, 0);
429 std::vector<faiss::Index::idx_t> indices(numQuery * opt.k, 0);
431 gpuIndex.search(numQuery,
437 for (
int q = 0; q < numQuery; ++q) {
438 for (
int k = 0; k < opt.k; ++k) {
439 EXPECT_EQ(indices[q * opt.k + k], -1);
440 EXPECT_EQ(distances[q * opt.k + k], std::numeric_limits<float>::max());
445 TEST(TestGpuIndexIVFFlat, AddNaN) {
446 faiss::gpu::newTestSeed();
454 config.
device = opt.device;
464 gpuIndex.setNumProbes(opt.nprobe);
467 std::vector<float> nans(numNans * opt.dim,
468 std::numeric_limits<float>::quiet_NaN());
471 for (
int i = 0; i < opt.dim; ++i) {
475 std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
476 gpuIndex.train(opt.numTrain, trainVecs.data());
479 EXPECT_EQ(gpuIndex.ntotal, 0);
480 gpuIndex.add(numNans, nans.data());
483 EXPECT_EQ(gpuIndex.ntotal, 1);
485 std::vector<float> queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
486 std::vector<float> distance(opt.numQuery * opt.k, 0);
487 std::vector<faiss::Index::idx_t> indices(opt.numQuery * opt.k, 0);
490 gpuIndex.search(opt.numQuery, queryVecs.data(), opt.k,
491 distance.data(), indices.data());
495 TEST(TestGpuIndexIVFFlat, UnifiedMemory) {
498 int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
500 if (!faiss::gpu::getFullUnifiedMemSupport(device)) {
506 int numCentroids = 1024;
510 (size_t) 1024 * 1024 * 1024 * 24 / ((
size_t) dim *
sizeof(float));
511 size_t numTrain = numCentroids * 40;
516 LOG(INFO) <<
"generating vecs";
517 std::vector<float> trainVecs = faiss::gpu::randVecs(numTrain, dim);
518 std::vector<float> addVecs = faiss::gpu::randVecs(numAdd, dim);
520 LOG(INFO) <<
"train CPU";
523 LOG(INFO) <<
"train CPU";
524 cpuIndex.train(numTrain, trainVecs.data());
525 LOG(INFO) <<
"add CPU";
526 cpuIndex.add(numAdd, addVecs.data());
527 cpuIndex.nprobe = nprobe;
534 config.
memorySpace = faiss::gpu::MemorySpace::Unified;
541 LOG(INFO) <<
"copy from CPU";
542 gpuIndex.copyFrom(&cpuIndex);
543 gpuIndex.setNumProbes(nprobe);
545 LOG(INFO) <<
"compare";
547 faiss::gpu::compareIndices(cpuIndex, gpuIndex,
548 numQuery, dim, k,
"Unified Memory",
bool useFloat16IVFStorage
bool useFloat16
Whether or not data is stored as float16.
int device
GPU device on which the index is resident.
GpuIndexFlatConfig flatConfig
Configuration for the coarse quantizer object.
IndicesOptions indicesOptions
Index storage options for the GPU.
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.