10 #include "../../IndexFlat.h"
11 #include "../../IndexIVFFlat.h"
12 #include "../GpuIndexIVFFlat.h"
13 #include "../StandardGpuResources.h"
14 #include "../utils/DeviceUtils.h"
15 #include "../test/TestUtils.h"
17 #include <gtest/gtest.h>
18 #include <glog/logging.h>
23 constexpr
float kF16MaxRelErr = 0.3f;
24 constexpr
float kF32MaxRelErr = 0.03f;
29 numAdd = faiss::gpu::randVal(4000, 20000);
30 dim = faiss::gpu::randVal(64, 200);
32 numCentroids = std::sqrt((
float) numAdd);
33 numTrain = numCentroids * 40;
34 nprobe = faiss::gpu::randVal(10, numCentroids);
35 numQuery = faiss::gpu::randVal(32, 100);
36 k = std::min(faiss::gpu::randVal(10, 30), numAdd / 40);
37 indicesOpt = faiss::gpu::randSelect({
38 faiss::gpu::INDICES_CPU,
39 faiss::gpu::INDICES_32_BIT,
40 faiss::gpu::INDICES_64_BIT});
42 device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
45 std::string toString()
const {
46 std::stringstream str;
47 str <<
"IVFFlat device " << device
48 <<
" numVecs " << numAdd
50 <<
" numCentroids " << numCentroids
51 <<
" nprobe " << nprobe
52 <<
" numQuery " << numQuery
54 <<
" indicesOpt " << indicesOpt;
67 faiss::gpu::IndicesOptions indicesOpt;
71 bool useFloat16CoarseQuantizer,
73 int dimOverride = -1) {
74 for (
int tries = 0; tries < 3; ++tries) {
76 opt.dim = dimOverride != -1 ? dimOverride : opt.dim;
78 std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
79 std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
84 metricType == faiss::METRIC_L2 ?
88 opt.dim, opt.numCentroids, metricType);
89 cpuIndex.train(opt.numTrain, trainVecs.data());
90 cpuIndex.add(opt.numAdd, addVecs.data());
91 cpuIndex.nprobe = opt.nprobe;
97 config.
device = opt.device;
105 cpuIndex.metric_type,
107 gpuIndex.copyFrom(&cpuIndex);
108 gpuIndex.setNumProbes(opt.nprobe);
110 bool compFloat16 = useFloat16CoarseQuantizer || useFloat16;
111 faiss::gpu::compareIndices(cpuIndex, gpuIndex,
112 opt.numQuery, opt.dim, opt.k, opt.toString(),
113 compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
117 compFloat16 ? 0.70f : 0.1f,
118 compFloat16 ? 0.65f : 0.015f);
123 bool useFloat16CoarseQuantizer,
125 for (
int tries = 0; tries < 5; ++tries) {
128 std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
129 std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
134 metricType == faiss::METRIC_L2 ?
141 cpuIndex.train(opt.numTrain, trainVecs.data());
142 cpuIndex.nprobe = opt.nprobe;
148 config.
device = opt.device;
156 cpuIndex.metric_type,
158 gpuIndex.copyFrom(&cpuIndex);
159 gpuIndex.setNumProbes(opt.nprobe);
161 cpuIndex.add(opt.numAdd, addVecs.data());
162 gpuIndex.add(opt.numAdd, addVecs.data());
164 bool compFloat16 = useFloat16CoarseQuantizer || useFloat16;
165 faiss::gpu::compareIndices(cpuIndex, gpuIndex,
166 opt.numQuery, opt.dim, opt.k, opt.toString(),
167 compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
168 compFloat16 ? 0.70f : 0.1f,
169 compFloat16 ? 0.30f : 0.015f);
173 void copyToTest(
bool useFloat16CoarseQuantizer,
176 std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
177 std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
183 config.
device = opt.device;
193 gpuIndex.train(opt.numTrain, trainVecs.data());
194 gpuIndex.add(opt.numAdd, addVecs.data());
195 gpuIndex.setNumProbes(opt.nprobe);
202 gpuIndex.copyTo(&cpuIndex);
204 EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
205 EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
207 EXPECT_EQ(cpuIndex.d, gpuIndex.d);
208 EXPECT_EQ(cpuIndex.d, opt.dim);
209 EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
210 EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
213 bool compFloat16 = useFloat16CoarseQuantizer || useFloat16;
214 faiss::gpu::compareIndices(cpuIndex, gpuIndex,
215 opt.numQuery, opt.dim, opt.k, opt.toString(),
216 compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
217 compFloat16 ? 0.70f : 0.1f,
218 compFloat16 ? 0.30f : 0.015f);
221 void copyFromTest(
bool useFloat16CoarseQuantizer,
224 std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
225 std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
232 cpuIndex.nprobe = opt.nprobe;
233 cpuIndex.train(opt.numTrain, trainVecs.data());
234 cpuIndex.add(opt.numAdd, addVecs.data());
241 config.
device = opt.device;
251 gpuIndex.setNumProbes(1);
253 gpuIndex.copyFrom(&cpuIndex);
255 EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
256 EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
258 EXPECT_EQ(cpuIndex.d, gpuIndex.d);
259 EXPECT_EQ(cpuIndex.d, opt.dim);
260 EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
261 EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
264 bool compFloat16 = useFloat16CoarseQuantizer || useFloat16;
265 faiss::gpu::compareIndices(cpuIndex, gpuIndex,
266 opt.numQuery, opt.dim, opt.k, opt.toString(),
267 compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
268 compFloat16 ? 0.70f : 0.1f,
269 compFloat16 ? 0.30f : 0.015f);
272 TEST(TestGpuIndexIVFFlat, Float32_32_Add_L2) {
273 addTest(faiss::METRIC_L2,
false,
false);
276 TEST(TestGpuIndexIVFFlat, Float32_32_Add_IP) {
277 addTest(faiss::METRIC_INNER_PRODUCT,
false,
false);
280 TEST(TestGpuIndexIVFFlat, Float32_16_Add_L2) {
281 addTest(faiss::METRIC_L2,
false,
true);
284 TEST(TestGpuIndexIVFFlat, Float32_16_Add_IP) {
285 addTest(faiss::METRIC_INNER_PRODUCT,
false,
true);
288 TEST(TestGpuIndexIVFFlat, Float16_32_Add_L2) {
289 addTest(faiss::METRIC_L2,
true,
false);
292 TEST(TestGpuIndexIVFFlat, Float16_32_Add_IP) {
293 addTest(faiss::METRIC_INNER_PRODUCT,
true,
false);
300 TEST(TestGpuIndexIVFFlat, Float32_Query_L2) {
301 queryTest(faiss::METRIC_L2,
false,
false);
304 TEST(TestGpuIndexIVFFlat, Float32_Query_IP) {
305 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
false);
308 TEST(TestGpuIndexIVFFlat, Float16_Query_L2) {
309 queryTest(faiss::METRIC_L2,
false,
true);
312 TEST(TestGpuIndexIVFFlat, Float16_Query_IP) {
313 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
true);
318 TEST(TestGpuIndexIVFFlat, Float16_32_Query_L2) {
319 queryTest(faiss::METRIC_L2,
true,
false);
322 TEST(TestGpuIndexIVFFlat, Float16_32_Query_IP) {
323 queryTest(faiss::METRIC_INNER_PRODUCT,
true,
false);
331 TEST(TestGpuIndexIVFFlat, Float32_Query_L2_64) {
332 queryTest(faiss::METRIC_L2,
false,
false, 64);
335 TEST(TestGpuIndexIVFFlat, Float32_Query_IP_64) {
336 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
false, 64);
339 TEST(TestGpuIndexIVFFlat, Float16_Query_L2_64) {
340 queryTest(faiss::METRIC_L2,
false,
true, 64);
343 TEST(TestGpuIndexIVFFlat, Float16_Query_IP_64) {
344 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
true, 64);
347 TEST(TestGpuIndexIVFFlat, Float32_Query_L2_128) {
348 queryTest(faiss::METRIC_L2,
false,
false, 128);
351 TEST(TestGpuIndexIVFFlat, Float32_Query_IP_128) {
352 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
false, 128);
355 TEST(TestGpuIndexIVFFlat, Float16_Query_L2_128) {
356 queryTest(faiss::METRIC_L2,
false,
true, 128);
359 TEST(TestGpuIndexIVFFlat, Float16_Query_IP_128) {
360 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
true, 128);
365 TEST(TestGpuIndexIVFFlat, Float16_Query_L2_256) {
366 queryTest(faiss::METRIC_L2,
false,
true, 256);
369 TEST(TestGpuIndexIVFFlat, Float16_Query_IP_256) {
370 queryTest(faiss::METRIC_INNER_PRODUCT,
false,
true, 256);
377 TEST(TestGpuIndexIVFFlat, Float32_16_CopyTo) {
378 copyToTest(
false,
true);
381 TEST(TestGpuIndexIVFFlat, Float32_32_CopyTo) {
382 copyToTest(
false,
false);
385 TEST(TestGpuIndexIVFFlat, Float32_negative) {
388 auto trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
389 auto addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
392 for (
auto& f : trainVecs) {
393 f = std::abs(f) * -1.0f;
396 for (
auto& f : addVecs) {
397 f *= std::abs(f) * -1.0f;
404 opt.dim, opt.numCentroids,
405 faiss::METRIC_INNER_PRODUCT);
406 cpuIndex.train(opt.numTrain, trainVecs.data());
407 cpuIndex.add(opt.numAdd, addVecs.data());
408 cpuIndex.nprobe = opt.nprobe;
414 config.
device = opt.device;
420 cpuIndex.metric_type,
422 gpuIndex.copyFrom(&cpuIndex);
423 gpuIndex.setNumProbes(opt.nprobe);
426 auto queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
429 for (
auto& f : queryVecs) {
433 bool compFloat16 =
false;
434 faiss::gpu::compareIndices(queryVecs,
436 opt.numQuery, opt.dim, opt.k, opt.toString(),
437 compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
441 compFloat16 ? 0.99f : 0.1f,
442 compFloat16 ? 0.65f : 0.015f);
449 TEST(TestGpuIndexIVFFlat, QueryNaN) {
452 std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
453 std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
459 config.
device = opt.device;
469 gpuIndex.setNumProbes(opt.nprobe);
471 gpuIndex.train(opt.numTrain, trainVecs.data());
472 gpuIndex.add(opt.numAdd, addVecs.data());
475 std::vector<float> nans(numQuery * opt.dim,
476 std::numeric_limits<float>::quiet_NaN());
478 std::vector<float> distances(numQuery * opt.k, 0);
479 std::vector<faiss::Index::idx_t> indices(numQuery * opt.k, 0);
481 gpuIndex.search(numQuery,
487 for (
int q = 0; q < numQuery; ++q) {
488 for (
int k = 0; k < opt.k; ++k) {
489 EXPECT_EQ(indices[q * opt.k + k], -1);
490 EXPECT_EQ(distances[q * opt.k + k], std::numeric_limits<float>::max());
495 TEST(TestGpuIndexIVFFlat, AddNaN) {
502 config.
device = opt.device;
512 gpuIndex.setNumProbes(opt.nprobe);
515 std::vector<float> nans(numNans * opt.dim,
516 std::numeric_limits<float>::quiet_NaN());
519 for (
int i = 0; i < opt.dim; ++i) {
523 std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
524 gpuIndex.train(opt.numTrain, trainVecs.data());
527 EXPECT_EQ(gpuIndex.ntotal, 0);
528 gpuIndex.add(numNans, nans.data());
531 EXPECT_EQ(gpuIndex.ntotal, 1);
533 std::vector<float> queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
534 std::vector<float> distance(opt.numQuery * opt.k, 0);
535 std::vector<faiss::Index::idx_t> indices(opt.numQuery * opt.k, 0);
538 gpuIndex.search(opt.numQuery, queryVecs.data(), opt.k,
539 distance.data(), indices.data());
543 TEST(TestGpuIndexIVFFlat, UnifiedMemory) {
546 int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
548 if (!faiss::gpu::getFullUnifiedMemSupport(device)) {
554 int numCentroids = 1024;
558 (size_t) 1024 * 1024 * 1024 * 24 / ((
size_t) dim *
sizeof(float));
559 size_t numTrain = numCentroids * 40;
564 LOG(INFO) <<
"generating vecs";
565 std::vector<float> trainVecs = faiss::gpu::randVecs(numTrain, dim);
566 std::vector<float> addVecs = faiss::gpu::randVecs(numAdd, dim);
568 LOG(INFO) <<
"train CPU";
571 LOG(INFO) <<
"train CPU";
572 cpuIndex.train(numTrain, trainVecs.data());
573 LOG(INFO) <<
"add CPU";
574 cpuIndex.add(numAdd, addVecs.data());
575 cpuIndex.nprobe = nprobe;
582 config.
memorySpace = faiss::gpu::MemorySpace::Unified;
589 LOG(INFO) <<
"copy from CPU";
590 gpuIndex.copyFrom(&cpuIndex);
591 gpuIndex.setNumProbes(nprobe);
593 LOG(INFO) <<
"compare";
595 faiss::gpu::compareIndices(cpuIndex, gpuIndex,
596 numQuery, dim, k,
"Unified Memory",
602 int main(
int argc,
char** argv) {
603 testing::InitGoogleTest(&argc, argv);
606 faiss::gpu::setTestSeed(100);
608 return RUN_ALL_TESTS();
bool useFloat16IVFStorage
bool useFloat16
Whether or not data is stored as float16.
int device
GPU device on which the index is resident.
GpuIndexFlatConfig flatConfig
Configuration for the coarse quantizer object.
IndicesOptions indicesOptions
Index storage options for the GPU.
MetricType
Some algorithms support both an inner product version and a L2 search version.