/** * Copyright (c) Facebook, Inc. and its affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include #include #include #include #include #include #include #include #include #include void pickEncoding(int& codes, int& dim) { std::vector codeSizes{ 3, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 96 }; // Above 32 doesn't work with no precomputed codes std::vector dimSizes{4, 8, 10, 12, 16, 20, 24, 28, 32}; while (true) { codes = codeSizes[faiss::gpu::randVal(0, codeSizes.size() - 1)]; dim = codes * dimSizes[faiss::gpu::randVal(0, dimSizes.size() - 1)]; // for such a small test, super-low or high dim is more likely to // generate comparison errors if (dim < 256 && dim >= 64) { return; } } } struct Options { Options() { numAdd = faiss::gpu::randVal(2000, 5000); numCentroids = std::sqrt((float) numAdd); numTrain = numCentroids * 40; pickEncoding(codes, dim); // TODO: Change back to `faiss::gpu::randVal(3, 7)` when we officially // support non-multiple of 8 subcodes for IVFPQ. bitsPerCode = 8; nprobe = std::min(faiss::gpu::randVal(40, 1000), numCentroids); numQuery = faiss::gpu::randVal(1, 8); // Due to the approximate nature of the query and of floating point // differences between GPU and CPU, to stay within our error bounds, only // use a small k k = std::min(faiss::gpu::randVal(5, 20), numAdd / 40); usePrecomputed = faiss::gpu::randBool(); indicesOpt = faiss::gpu::randSelect({ faiss::gpu::INDICES_CPU, faiss::gpu::INDICES_32_BIT, faiss::gpu::INDICES_64_BIT}); if (codes > 48) { // large codes can only fit using float16 useFloat16 = true; } else { useFloat16 = faiss::gpu::randBool(); } device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1); } std::string toString() const { std::stringstream str; str << "IVFPQ device " << device << " numVecs " << numAdd << " dim " << dim << " numCentroids " << numCentroids << " codes " << codes << " bitsPerCode " << bitsPerCode << " nprobe " << nprobe << " numQuery " << numQuery << " k " << k << " usePrecomputed " << usePrecomputed << " indicesOpt " << indicesOpt << " useFloat16 " << useFloat16; return str.str(); } float getCompareEpsilon() const { return 0.03f; } float getPctMaxDiff1() const { return useFloat16 ? 0.30f : 0.10f; } float getPctMaxDiffN() const { return useFloat16 ? 0.05f : 0.02f; } int numAdd; int numCentroids; int numTrain; int codes; int dim; int bitsPerCode; int nprobe; int numQuery; int k; bool usePrecomputed; faiss::gpu::IndicesOptions indicesOpt; bool useFloat16; int device; }; TEST(TestGpuIndexIVFPQ, Query_L2) { for (int tries = 0; tries < 2; ++tries) { Options opt; std::vector trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim); faiss::IndexFlatL2 coarseQuantizer(opt.dim); faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids, opt.codes, opt.bitsPerCode); cpuIndex.nprobe = opt.nprobe; cpuIndex.train(opt.numTrain, trainVecs.data()); cpuIndex.add(opt.numAdd, addVecs.data()); faiss::gpu::StandardGpuResources res; res.noTempMemory(); faiss::gpu::GpuIndexIVFPQConfig config; config.device = opt.device; config.usePrecomputedTables = opt.usePrecomputed; config.indicesOptions = opt.indicesOpt; config.useFloat16LookupTables = opt.useFloat16; faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config); gpuIndex.setNumProbes(opt.nprobe); faiss::gpu::compareIndices(cpuIndex, gpuIndex, opt.numQuery, opt.dim, opt.k, opt.toString(), opt.getCompareEpsilon(), opt.getPctMaxDiff1(), opt.getPctMaxDiffN()); } } TEST(TestGpuIndexIVFPQ, Query_IP) { for (int tries = 0; tries < 2; ++tries) { Options opt; std::vector trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim); faiss::IndexFlatIP coarseQuantizer(opt.dim); faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids, opt.codes, opt.bitsPerCode); cpuIndex.metric_type = faiss::MetricType::METRIC_INNER_PRODUCT; cpuIndex.nprobe = opt.nprobe; cpuIndex.train(opt.numTrain, trainVecs.data()); cpuIndex.add(opt.numAdd, addVecs.data()); faiss::gpu::StandardGpuResources res; res.noTempMemory(); faiss::gpu::GpuIndexIVFPQConfig config; config.device = opt.device; config.usePrecomputedTables = false; // not supported/required for IP config.indicesOptions = opt.indicesOpt; config.useFloat16LookupTables = opt.useFloat16; faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config); gpuIndex.setNumProbes(opt.nprobe); faiss::gpu::compareIndices(cpuIndex, gpuIndex, opt.numQuery, opt.dim, opt.k, opt.toString(), opt.getCompareEpsilon(), opt.getPctMaxDiff1(), opt.getPctMaxDiffN()); } } TEST(TestGpuIndexIVFPQ, Float16Coarse) { Options opt; std::vector trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim); faiss::IndexFlatL2 coarseQuantizer(opt.dim); faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids, opt.codes, opt.bitsPerCode); cpuIndex.nprobe = opt.nprobe; cpuIndex.train(opt.numTrain, trainVecs.data()); faiss::gpu::StandardGpuResources res; res.noTempMemory(); faiss::gpu::GpuIndexIVFPQConfig config; config.device = opt.device; config.flatConfig.useFloat16 = true; config.usePrecomputedTables = opt.usePrecomputed; config.indicesOptions = opt.indicesOpt; config.useFloat16LookupTables = opt.useFloat16; faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config); gpuIndex.setNumProbes(opt.nprobe); gpuIndex.add(opt.numAdd, addVecs.data()); cpuIndex.add(opt.numAdd, addVecs.data()); faiss::gpu::compareIndices(cpuIndex, gpuIndex, opt.numQuery, opt.dim, opt.k, opt.toString(), opt.getCompareEpsilon(), opt.getPctMaxDiff1(), opt.getPctMaxDiffN()); } TEST(TestGpuIndexIVFPQ, Add_L2) { for (int tries = 0; tries < 2; ++tries) { Options opt; std::vector trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim); faiss::IndexFlatL2 coarseQuantizer(opt.dim); faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids, opt.codes, opt.bitsPerCode); cpuIndex.nprobe = opt.nprobe; cpuIndex.train(opt.numTrain, trainVecs.data()); faiss::gpu::StandardGpuResources res; res.noTempMemory(); faiss::gpu::GpuIndexIVFPQConfig config; config.device = opt.device; config.usePrecomputedTables = opt.usePrecomputed; config.indicesOptions = opt.indicesOpt; config.useFloat16LookupTables = opt.useFloat16; faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config); gpuIndex.setNumProbes(opt.nprobe); gpuIndex.add(opt.numAdd, addVecs.data()); cpuIndex.add(opt.numAdd, addVecs.data()); faiss::gpu::compareIndices(cpuIndex, gpuIndex, opt.numQuery, opt.dim, opt.k, opt.toString(), opt.getCompareEpsilon(), opt.getPctMaxDiff1(), opt.getPctMaxDiffN()); } } TEST(TestGpuIndexIVFPQ, Add_IP) { for (int tries = 0; tries < 2; ++tries) { Options opt; std::vector trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim); faiss::IndexFlatIP coarseQuantizer(opt.dim); faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids, opt.codes, opt.bitsPerCode); cpuIndex.metric_type = faiss::MetricType::METRIC_INNER_PRODUCT; cpuIndex.nprobe = opt.nprobe; cpuIndex.train(opt.numTrain, trainVecs.data()); faiss::gpu::StandardGpuResources res; res.noTempMemory(); faiss::gpu::GpuIndexIVFPQConfig config; config.device = opt.device; config.usePrecomputedTables = opt.usePrecomputed; config.indicesOptions = opt.indicesOpt; config.useFloat16LookupTables = opt.useFloat16; faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config); gpuIndex.setNumProbes(opt.nprobe); gpuIndex.add(opt.numAdd, addVecs.data()); cpuIndex.add(opt.numAdd, addVecs.data()); faiss::gpu::compareIndices(cpuIndex, gpuIndex, opt.numQuery, opt.dim, opt.k, opt.toString(), opt.getCompareEpsilon(), opt.getPctMaxDiff1(), opt.getPctMaxDiffN()); } } TEST(TestGpuIndexIVFPQ, CopyTo) { Options opt; std::vector trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim); faiss::gpu::StandardGpuResources res; res.noTempMemory(); faiss::gpu::GpuIndexIVFPQConfig config; config.device = opt.device; config.usePrecomputedTables = opt.usePrecomputed; config.indicesOptions = opt.indicesOpt; config.useFloat16LookupTables = opt.useFloat16; faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, opt.dim, opt.numCentroids, opt.codes, opt.bitsPerCode, faiss::METRIC_L2, config); gpuIndex.setNumProbes(opt.nprobe); gpuIndex.train(opt.numTrain, trainVecs.data()); gpuIndex.add(opt.numAdd, addVecs.data()); // Use garbage values to see if we overwrite them faiss::IndexFlatL2 cpuQuantizer(1); faiss::IndexIVFPQ cpuIndex(&cpuQuantizer, 1, 1, 1, 1); gpuIndex.copyTo(&cpuIndex); EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal); EXPECT_EQ(gpuIndex.ntotal, opt.numAdd); EXPECT_EQ(cpuIndex.d, gpuIndex.d); EXPECT_EQ(cpuIndex.d, opt.dim); EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists()); EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes()); EXPECT_EQ(cpuIndex.pq.M, gpuIndex.getNumSubQuantizers()); EXPECT_EQ(gpuIndex.getNumSubQuantizers(), opt.codes); EXPECT_EQ(cpuIndex.pq.nbits, gpuIndex.getBitsPerCode()); EXPECT_EQ(gpuIndex.getBitsPerCode(), opt.bitsPerCode); // Query both objects; results should be equivalent faiss::gpu::compareIndices(cpuIndex, gpuIndex, opt.numQuery, opt.dim, opt.k, opt.toString(), opt.getCompareEpsilon(), opt.getPctMaxDiff1(), opt.getPctMaxDiffN()); } TEST(TestGpuIndexIVFPQ, CopyFrom) { Options opt; std::vector trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim); faiss::IndexFlatL2 coarseQuantizer(opt.dim); faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids, opt.codes, opt.bitsPerCode); cpuIndex.nprobe = opt.nprobe; cpuIndex.train(opt.numTrain, trainVecs.data()); cpuIndex.add(opt.numAdd, addVecs.data()); // Use garbage values to see if we overwrite them faiss::gpu::StandardGpuResources res; res.noTempMemory(); faiss::gpu::GpuIndexIVFPQConfig config; config.device = opt.device; config.usePrecomputedTables = opt.usePrecomputed; config.indicesOptions = opt.indicesOpt; config.useFloat16LookupTables = opt.useFloat16; faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, 1, 1, 1, 1, faiss::METRIC_L2, config); gpuIndex.setNumProbes(1); gpuIndex.copyFrom(&cpuIndex); // Make sure we are equivalent EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal); EXPECT_EQ(gpuIndex.ntotal, opt.numAdd); EXPECT_EQ(cpuIndex.d, gpuIndex.d); EXPECT_EQ(cpuIndex.d, opt.dim); EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists()); EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes()); EXPECT_EQ(cpuIndex.pq.M, gpuIndex.getNumSubQuantizers()); EXPECT_EQ(gpuIndex.getNumSubQuantizers(), opt.codes); EXPECT_EQ(cpuIndex.pq.nbits, gpuIndex.getBitsPerCode()); EXPECT_EQ(gpuIndex.getBitsPerCode(), opt.bitsPerCode); // Query both objects; results should be equivalent faiss::gpu::compareIndices(cpuIndex, gpuIndex, opt.numQuery, opt.dim, opt.k, opt.toString(), opt.getCompareEpsilon(), opt.getPctMaxDiff1(), opt.getPctMaxDiffN()); } TEST(TestGpuIndexIVFPQ, QueryNaN) { Options opt; std::vector trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim); faiss::gpu::StandardGpuResources res; res.noTempMemory(); faiss::gpu::GpuIndexIVFPQConfig config; config.device = opt.device; config.usePrecomputedTables = opt.usePrecomputed; config.indicesOptions = opt.indicesOpt; config.useFloat16LookupTables = opt.useFloat16; faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, opt.dim, opt.numCentroids, opt.codes, opt.bitsPerCode, faiss::METRIC_L2, config); gpuIndex.setNumProbes(opt.nprobe); gpuIndex.train(opt.numTrain, trainVecs.data()); gpuIndex.add(opt.numAdd, addVecs.data()); int numQuery = 5; std::vector nans(numQuery * opt.dim, std::numeric_limits::quiet_NaN()); std::vector distances(numQuery * opt.k, 0); std::vector indices(numQuery * opt.k, 0); gpuIndex.search(numQuery, nans.data(), opt.k, distances.data(), indices.data()); for (int q = 0; q < numQuery; ++q) { for (int k = 0; k < opt.k; ++k) { EXPECT_EQ(indices[q * opt.k + k], -1); EXPECT_EQ(distances[q * opt.k + k], std::numeric_limits::max()); } } } TEST(TestGpuIndexIVFPQ, AddNaN) { Options opt; faiss::gpu::StandardGpuResources res; res.noTempMemory(); faiss::gpu::GpuIndexIVFPQConfig config; config.device = opt.device; config.usePrecomputedTables = opt.usePrecomputed; config.indicesOptions = opt.indicesOpt; config.useFloat16LookupTables = opt.useFloat16; faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, opt.dim, opt.numCentroids, opt.codes, opt.bitsPerCode, faiss::METRIC_L2, config); gpuIndex.setNumProbes(opt.nprobe); int numNans = 10; std::vector nans(numNans * opt.dim, std::numeric_limits::quiet_NaN()); // Make one vector valid, which should actually add for (int i = 0; i < opt.dim; ++i) { nans[i] = 0.0f; } std::vector trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); gpuIndex.train(opt.numTrain, trainVecs.data()); // should not crash EXPECT_EQ(gpuIndex.ntotal, 0); gpuIndex.add(numNans, nans.data()); std::vector queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim); std::vector distance(opt.numQuery * opt.k, 0); std::vector indices(opt.numQuery * opt.k, 0); // should not crash gpuIndex.search(opt.numQuery, queryVecs.data(), opt.k, distance.data(), indices.data()); } TEST(TestGpuIndexIVFPQ, UnifiedMemory) { // Construct on a random device to test multi-device, if we have // multiple devices int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1); if (!faiss::gpu::getFullUnifiedMemSupport(device)) { return; } int dim = 128; int numCentroids = 256; // Unfortunately it would take forever to add 24 GB in IVFPQ data, // so just perform a small test with data allocated in the unified // memory address space size_t numAdd = 10000; size_t numTrain = numCentroids * 40; int numQuery = 10; int k = 10; int nprobe = 8; int codes = 8; int bitsPerCode = 8; std::vector trainVecs = faiss::gpu::randVecs(numTrain, dim); std::vector addVecs = faiss::gpu::randVecs(numAdd, dim); faiss::IndexFlatL2 quantizer(dim); faiss::IndexIVFPQ cpuIndex(&quantizer, dim, numCentroids, codes, bitsPerCode); cpuIndex.train(numTrain, trainVecs.data()); cpuIndex.add(numAdd, addVecs.data()); cpuIndex.nprobe = nprobe; faiss::gpu::StandardGpuResources res; res.noTempMemory(); faiss::gpu::GpuIndexIVFPQConfig config; config.device = device; config.memorySpace = faiss::gpu::MemorySpace::Unified; faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, dim, numCentroids, codes, bitsPerCode, faiss::METRIC_L2, config); gpuIndex.copyFrom(&cpuIndex); gpuIndex.setNumProbes(nprobe); faiss::gpu::compareIndices(cpuIndex, gpuIndex, numQuery, dim, k, "Unified Memory", 0.015f, 0.1f, 0.015f); } int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); // just run with a fixed test seed faiss::gpu::setTestSeed(100); return RUN_ALL_TESTS(); }