11 #include "../../IndexFlat.h"
12 #include "../../IndexIVFFlat.h"
13 #include "../GpuIndexIVFFlat.h"
14 #include "../StandardGpuResources.h"
15 #include "../utils/DeviceUtils.h"
16 #include "../test/TestUtils.h"
18 #include <gtest/gtest.h>
19 #include <glog/logging.h>
24 constexpr
float kF16MaxRelErr = 0.3f;
25 constexpr
float kF32MaxRelErr = 0.03f;
30 numAdd = faiss::gpu::randVal(4000, 20000);
31 dim = faiss::gpu::randVal(64, 200);
33 numCentroids = std::sqrt((
float) numAdd);
34 numTrain = numCentroids * 40;
35 nprobe = faiss::gpu::randVal(10, numCentroids);
36 numQuery = faiss::gpu::randVal(32, 100);
37 k = std::min(faiss::gpu::randVal(10, 30), numAdd / 40);
38 indicesOpt = faiss::gpu::randSelect({
39 faiss::gpu::INDICES_CPU,
40 faiss::gpu::INDICES_32_BIT,
41 faiss::gpu::INDICES_64_BIT});
43 device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
46 std::string toString()
const {
47 std::stringstream str;
48 str <<
"IVFFlat device " << device
49 <<
" numVecs " << numAdd
51 <<
" numCentroids " << numCentroids
52 <<
" nprobe " << nprobe
53 <<
" numQuery " << numQuery
55 <<
" indicesOpt " << indicesOpt;
68 faiss::gpu::IndicesOptions indicesOpt;
72 bool useFloat16CoarseQuantizer,
74 int dimOverride = -1) {
75 for (
int tries = 0; tries < 3; ++tries) {
76 faiss::gpu::newTestSeed();
79 opt.dim = dimOverride != -1 ? dimOverride : opt.dim;
81 std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
82 std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
87 metricType == faiss::METRIC_L2 ?
91 opt.dim, opt.numCentroids, metricType);
92 cpuIndex.train(opt.numTrain, trainVecs.data());
93 cpuIndex.add(opt.numAdd, addVecs.data());
94 cpuIndex.nprobe = opt.nprobe;
100 config.
device = opt.device;
108 cpuIndex.metric_type,
110 gpuIndex.copyFrom(&cpuIndex);
111 gpuIndex.setNumProbes(opt.nprobe);
113 bool compFloat16 = useFloat16CoarseQuantizer || useFloat16;
114 faiss::gpu::compareIndices(cpuIndex, gpuIndex,
115 opt.numQuery, opt.dim, opt.k, opt.toString(),
116 compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
120 compFloat16 ? 0.99f : 0.1f,
121 compFloat16 ? 0.65f : 0.015f);
126 bool useFloat16CoarseQuantizer,
128 for (
int tries = 0; tries < 5; ++tries) {
129 faiss::gpu::newTestSeed();
133 std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
134 std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
139 metricType == faiss::METRIC_L2 ?
146 cpuIndex.train(opt.numTrain, trainVecs.data());
147 cpuIndex.nprobe = opt.nprobe;
153 config.
device = opt.device;
161 cpuIndex.metric_type,
163 gpuIndex.copyFrom(&cpuIndex);
164 gpuIndex.setNumProbes(opt.nprobe);
166 cpuIndex.add(opt.numAdd, addVecs.data());
167 gpuIndex.add(opt.numAdd, addVecs.data());
169 bool compFloat16 = useFloat16CoarseQuantizer || useFloat16;
170 faiss::gpu::compareIndices(cpuIndex, gpuIndex,
171 opt.numQuery, opt.dim, opt.k, opt.toString(),
172 compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
173 compFloat16 ? 0.70f : 0.1f,
174 compFloat16 ? 0.30f : 0.015f);
178 void copyToTest(
bool useFloat16CoarseQuantizer,
180 faiss::gpu::newTestSeed();
183 std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
184 std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
190 config.
device = opt.device;
200 gpuIndex.train(opt.numTrain, trainVecs.data());
201 gpuIndex.add(opt.numAdd, addVecs.data());
202 gpuIndex.setNumProbes(opt.nprobe);
209 gpuIndex.copyTo(&cpuIndex);
211 EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
212 EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
214 EXPECT_EQ(cpuIndex.d, gpuIndex.d);
215 EXPECT_EQ(cpuIndex.d, opt.dim);
216 EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
217 EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
220 bool compFloat16 = useFloat16CoarseQuantizer || useFloat16;
221 faiss::gpu::compareIndices(cpuIndex, gpuIndex,
222 opt.numQuery, opt.dim, opt.k, opt.toString(),
223 compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
224 compFloat16 ? 0.70f : 0.1f,
225 compFloat16 ? 0.30f : 0.015f);
228 void copyFromTest(
bool useFloat16CoarseQuantizer,
230 faiss::gpu::newTestSeed();
233 std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
234 std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
241 cpuIndex.nprobe = opt.nprobe;
242 cpuIndex.train(opt.numTrain, trainVecs.data());
243 cpuIndex.add(opt.numAdd, addVecs.data());
250 config.
device = opt.device;
260 gpuIndex.setNumProbes(1);
262 gpuIndex.copyFrom(&cpuIndex);
264 EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
265 EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
267 EXPECT_EQ(cpuIndex.d, gpuIndex.d);
268 EXPECT_EQ(cpuIndex.d, opt.dim);
269 EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
270 EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
273 bool compFloat16 = useFloat16CoarseQuantizer || useFloat16;
274 faiss::gpu::compareIndices(cpuIndex, gpuIndex,
275 opt.numQuery, opt.dim, opt.k, opt.toString(),
276 compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
277 compFloat16 ? 0.70f : 0.1f,
278 compFloat16 ? 0.30f : 0.015f);
281 TEST(TestGpuIndexIVFFlat, Float32_32_Add_L2) {
282 addTest(faiss::METRIC_L2,
false,
false);
285 TEST(TestGpuIndexIVFFlat, Float32_32_Add_IP) {
286 addTest(faiss::METRIC_INNER_PRODUCT,
false,
false);
289 TEST(TestGpuIndexIVFFlat, Float32_16_Add_L2) {
290 addTest(faiss::METRIC_L2,
false,
true);
293 TEST(TestGpuIndexIVFFlat, Float32_16_Add_IP) {
294 addTest(faiss::METRIC_INNER_PRODUCT,
false,
true);
297 TEST(TestGpuIndexIVFFlat, Float16_32_Add_L2) {
298 addTest(faiss::METRIC_L2,
true,
false);
301 TEST(TestGpuIndexIVFFlat, Float16_32_Add_IP) {
302 addTest(faiss::METRIC_INNER_PRODUCT,
true,
false);
309 TEST(TestGpuIndexIVFFlat, Float32_Query_L2) {
310 queryTest(faiss::METRIC_L2,
false,
false);
313 TEST(TestGpuIndexIVFFlat, Float32_Query_IP) {
314 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
false);
317 TEST(TestGpuIndexIVFFlat, Float16_Query_L2) {
318 queryTest(faiss::METRIC_L2,
false,
true);
321 TEST(TestGpuIndexIVFFlat, Float16_Query_IP) {
322 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
true);
327 TEST(TestGpuIndexIVFFlat, Float16_32_Query_L2) {
328 queryTest(faiss::METRIC_L2,
true,
false);
331 TEST(TestGpuIndexIVFFlat, Float16_32_Query_IP) {
332 queryTest(faiss::METRIC_INNER_PRODUCT,
true,
false);
340 TEST(TestGpuIndexIVFFlat, Float32_Query_L2_64) {
341 queryTest(faiss::METRIC_L2,
false,
false, 64);
344 TEST(TestGpuIndexIVFFlat, Float32_Query_IP_64) {
345 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
false, 64);
348 TEST(TestGpuIndexIVFFlat, Float16_Query_L2_64) {
349 queryTest(faiss::METRIC_L2,
false,
true, 64);
352 TEST(TestGpuIndexIVFFlat, Float16_Query_IP_64) {
353 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
true, 64);
356 TEST(TestGpuIndexIVFFlat, Float32_Query_L2_128) {
357 queryTest(faiss::METRIC_L2,
false,
false, 128);
360 TEST(TestGpuIndexIVFFlat, Float32_Query_IP_128) {
361 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
false, 128);
364 TEST(TestGpuIndexIVFFlat, Float16_Query_L2_128) {
365 queryTest(faiss::METRIC_L2,
false,
true, 128);
368 TEST(TestGpuIndexIVFFlat, Float16_Query_IP_128) {
369 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
true, 128);
374 TEST(TestGpuIndexIVFFlat, Float16_Query_L2_256) {
375 queryTest(faiss::METRIC_L2,
false,
true, 256);
378 TEST(TestGpuIndexIVFFlat, Float16_Query_IP_256) {
379 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
true, 256);
386 TEST(TestGpuIndexIVFFlat, Float32_16_CopyTo) {
387 copyToTest(
false,
true);
390 TEST(TestGpuIndexIVFFlat, Float32_32_CopyTo) {
391 copyToTest(
false,
false);
394 TEST(TestGpuIndexIVFFlat, Float32_negative) {
395 faiss::gpu::newTestSeed();
399 auto trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
400 auto addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
403 for (
auto& f : trainVecs) {
404 f = std::abs(f) * -1.0f;
407 for (
auto& f : addVecs) {
408 f *= std::abs(f) * -1.0f;
415 opt.dim, opt.numCentroids,
416 faiss::METRIC_INNER_PRODUCT);
417 cpuIndex.train(opt.numTrain, trainVecs.data());
418 cpuIndex.add(opt.numAdd, addVecs.data());
419 cpuIndex.nprobe = opt.nprobe;
425 config.
device = opt.device;
431 cpuIndex.metric_type,
433 gpuIndex.copyFrom(&cpuIndex);
434 gpuIndex.setNumProbes(opt.nprobe);
437 auto queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
440 for (
auto& f : queryVecs) {
444 bool compFloat16 =
false;
445 faiss::gpu::compareIndices(queryVecs,
447 opt.numQuery, opt.dim, opt.k, opt.toString(),
448 compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
452 compFloat16 ? 0.99f : 0.1f,
453 compFloat16 ? 0.65f : 0.015f);
460 TEST(TestGpuIndexIVFFlat, QueryNaN) {
461 faiss::gpu::newTestSeed();
465 std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
466 std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
472 config.
device = opt.device;
482 gpuIndex.setNumProbes(opt.nprobe);
484 gpuIndex.train(opt.numTrain, trainVecs.data());
485 gpuIndex.add(opt.numAdd, addVecs.data());
488 std::vector<float> nans(numQuery * opt.dim,
489 std::numeric_limits<float>::quiet_NaN());
491 std::vector<float> distances(numQuery * opt.k, 0);
492 std::vector<faiss::Index::idx_t> indices(numQuery * opt.k, 0);
494 gpuIndex.search(numQuery,
500 for (
int q = 0; q < numQuery; ++q) {
501 for (
int k = 0; k < opt.k; ++k) {
502 EXPECT_EQ(indices[q * opt.k + k], -1);
503 EXPECT_EQ(distances[q * opt.k + k], std::numeric_limits<float>::max());
508 TEST(TestGpuIndexIVFFlat, AddNaN) {
509 faiss::gpu::newTestSeed();
517 config.
device = opt.device;
527 gpuIndex.setNumProbes(opt.nprobe);
530 std::vector<float> nans(numNans * opt.dim,
531 std::numeric_limits<float>::quiet_NaN());
534 for (
int i = 0; i < opt.dim; ++i) {
538 std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
539 gpuIndex.train(opt.numTrain, trainVecs.data());
542 EXPECT_EQ(gpuIndex.ntotal, 0);
543 gpuIndex.add(numNans, nans.data());
546 EXPECT_EQ(gpuIndex.ntotal, 1);
548 std::vector<float> queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
549 std::vector<float> distance(opt.numQuery * opt.k, 0);
550 std::vector<faiss::Index::idx_t> indices(opt.numQuery * opt.k, 0);
553 gpuIndex.search(opt.numQuery, queryVecs.data(), opt.k,
554 distance.data(), indices.data());
558 TEST(TestGpuIndexIVFFlat, UnifiedMemory) {
561 int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
563 if (!faiss::gpu::getFullUnifiedMemSupport(device)) {
569 int numCentroids = 1024;
573 (size_t) 1024 * 1024 * 1024 * 24 / ((
size_t) dim *
sizeof(float));
574 size_t numTrain = numCentroids * 40;
579 LOG(INFO) <<
"generating vecs";
580 std::vector<float> trainVecs = faiss::gpu::randVecs(numTrain, dim);
581 std::vector<float> addVecs = faiss::gpu::randVecs(numAdd, dim);
583 LOG(INFO) <<
"train CPU";
586 LOG(INFO) <<
"train CPU";
587 cpuIndex.train(numTrain, trainVecs.data());
588 LOG(INFO) <<
"add CPU";
589 cpuIndex.add(numAdd, addVecs.data());
590 cpuIndex.nprobe = nprobe;
597 config.
memorySpace = faiss::gpu::MemorySpace::Unified;
604 LOG(INFO) <<
"copy from CPU";
605 gpuIndex.copyFrom(&cpuIndex);
606 gpuIndex.setNumProbes(nprobe);
608 LOG(INFO) <<
"compare";
610 faiss::gpu::compareIndices(cpuIndex, gpuIndex,
611 numQuery, dim, k,
"Unified Memory",
bool useFloat16IVFStorage
bool useFloat16
Whether or not data is stored as float16.
int device
GPU device on which the index is resident.
GpuIndexFlatConfig flatConfig
Configuration for the coarse quantizer object.
IndicesOptions indicesOptions
Index storage options for the GPU.
MetricType
Some algorithms support both an inner product version and a L2 search version.