298 lines
8.2 KiB
C++
298 lines
8.2 KiB
C++
/**
|
|
* Copyright (c) 2015-present, Facebook, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* This source code is licensed under the CC-by-NC license found in the
|
|
* LICENSE file in the root directory of this source tree.
|
|
*/
|
|
|
|
// Copyright 2004-present Facebook. All Rights Reserved.
|
|
|
|
#include "../../IndexFlat.h"
|
|
#include "../GpuIndexFlat.h"
|
|
#include "../StandardGpuResources.h"
|
|
#include "../utils/DeviceUtils.h"
|
|
#include "../test/TestUtils.h"
|
|
#include <gtest/gtest.h>
|
|
#include <sstream>
|
|
#include <vector>
|
|
|
|
// FIXME: figure out a better way to test fp16
|
|
constexpr float kF16MaxRelErr = 0.07f;
|
|
constexpr float kF32MaxRelErr = 6e-3f;
|
|
|
|
void testFlat(bool useL2,
|
|
bool useFloat16,
|
|
bool useTransposed,
|
|
int kOverride = -1) {
|
|
int numVecs = faiss::gpu::randVal(1000, 20000);
|
|
int dim = faiss::gpu::randVal(50, 800);
|
|
int numQuery = faiss::gpu::randVal(1, 512);
|
|
|
|
// Due to loss of precision in a float16 accumulator, for large k,
|
|
// the number of differences is pretty huge. Restrict ourselves to a
|
|
// fairly small `k` for float16
|
|
int k = useFloat16 ?
|
|
std::min(faiss::gpu::randVal(1, 50), numVecs) :
|
|
std::min(faiss::gpu::randVal(1, 1024), numVecs);
|
|
if (kOverride > 0) {
|
|
k = kOverride;
|
|
}
|
|
|
|
faiss::IndexFlatIP cpuIndexIP(dim);
|
|
faiss::IndexFlatL2 cpuIndexL2(dim);
|
|
|
|
faiss::IndexFlat* cpuIndex =
|
|
useL2 ? (faiss::IndexFlat*) &cpuIndexL2 : (faiss::IndexFlat*) &cpuIndexIP;
|
|
|
|
// Construct on a random device to test multi-device, if we have
|
|
// multiple devices
|
|
int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
|
|
|
|
faiss::gpu::StandardGpuResources res;
|
|
res.noTempMemory();
|
|
|
|
|
|
faiss::gpu::GpuIndexFlatConfig config;
|
|
config.device = device;
|
|
config.useFloat16 = useFloat16;
|
|
config.storeTransposed = useTransposed;
|
|
|
|
faiss::gpu::GpuIndexFlatIP gpuIndexIP(&res, dim, config);
|
|
faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);
|
|
|
|
faiss::gpu::GpuIndexFlat* gpuIndex =
|
|
useL2 ? (faiss::gpu::GpuIndexFlat*) &gpuIndexL2 :
|
|
(faiss::gpu::GpuIndexFlat*) &gpuIndexIP;
|
|
|
|
std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
|
|
cpuIndex->add(numVecs, vecs.data());
|
|
gpuIndex->add(numVecs, vecs.data());
|
|
|
|
std::stringstream str;
|
|
str << (useL2 ? "L2" : "IP") << " numVecs " << numVecs
|
|
<< " dim " << dim
|
|
<< " useFloat16 " << useFloat16
|
|
<< " transposed " << useTransposed
|
|
<< " numQuery " << numQuery
|
|
<< " k " << k;
|
|
|
|
// To some extent, we depend upon the relative error for the test
|
|
// for float16
|
|
faiss::gpu::compareIndices(*cpuIndex, *gpuIndex, numQuery, dim, k, str.str(),
|
|
useFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
|
|
// FIXME: the fp16 bounds are
|
|
// useless when math (the accumulator) is
|
|
// in fp16. Figure out another way to test
|
|
useFloat16 ? 0.99f : 0.1f,
|
|
useFloat16 ? 0.65f : 0.015f);
|
|
}
|
|
|
|
TEST(TestGpuIndexFlat, IP_Float32) {
|
|
for (int tries = 0; tries < 5; ++tries) {
|
|
faiss::gpu::newTestSeed();
|
|
testFlat(false, false, false);
|
|
testFlat(false, false, true);
|
|
}
|
|
}
|
|
|
|
TEST(TestGpuIndexFlat, L2_Float32) {
|
|
for (int tries = 0; tries < 5; ++tries) {
|
|
faiss::gpu::newTestSeed();
|
|
testFlat(true, false, false);
|
|
testFlat(true, false, true);
|
|
}
|
|
}
|
|
|
|
// test specialized k == 1 codepath
|
|
TEST(TestGpuIndexFlat, L2_Float32_K1) {
|
|
for (int tries = 0; tries < 5; ++tries) {
|
|
faiss::gpu::newTestSeed();
|
|
testFlat(true, false, false, 1);
|
|
testFlat(true, false, true, 1);
|
|
}
|
|
}
|
|
|
|
TEST(TestGpuIndexFlat, IP_Float16) {
|
|
for (int tries = 0; tries < 5; ++tries) {
|
|
faiss::gpu::newTestSeed();
|
|
testFlat(false, true, false);
|
|
testFlat(false, true, false);
|
|
}
|
|
}
|
|
|
|
TEST(TestGpuIndexFlat, L2_Float16) {
|
|
for (int tries = 0; tries < 5; ++tries) {
|
|
faiss::gpu::newTestSeed();
|
|
testFlat(true, true, false);
|
|
testFlat(true, true, true);
|
|
}
|
|
}
|
|
|
|
// test specialized k == 1 codepath
|
|
TEST(TestGpuIndexFlat, L2_Float16_K1) {
|
|
for (int tries = 0; tries < 5; ++tries) {
|
|
faiss::gpu::newTestSeed();
|
|
testFlat(true, true, false, 1);
|
|
testFlat(true, true, true, 1);
|
|
}
|
|
}
|
|
|
|
TEST(TestGpuIndexFlat, QueryEmpty) {
|
|
faiss::gpu::StandardGpuResources res;
|
|
res.noTempMemory();
|
|
|
|
faiss::gpu::GpuIndexFlatConfig config;
|
|
config.device = 0;
|
|
config.useFloat16 = false;
|
|
config.storeTransposed = false;
|
|
|
|
int dim = 128;
|
|
faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
|
|
|
|
// Querying an empty index should not blow up, and just return
|
|
// (FLT_MAX, -1)
|
|
int numQuery = 10;
|
|
int k = 50;
|
|
std::vector<float> queries(numQuery * dim, 1.0f);
|
|
|
|
std::vector<float> dist(numQuery * k, 0);
|
|
std::vector<faiss::Index::idx_t> ind(numQuery * k);
|
|
|
|
gpuIndex.search(numQuery, queries.data(), k, dist.data(), ind.data());
|
|
|
|
for (auto d : dist) {
|
|
EXPECT_EQ(d, std::numeric_limits<float>::max());
|
|
}
|
|
|
|
for (auto i : ind) {
|
|
EXPECT_EQ(i, -1);
|
|
}
|
|
}
|
|
|
|
TEST(TestGpuIndexFlat, CopyFrom) {
|
|
faiss::gpu::newTestSeed();
|
|
|
|
int numVecs = faiss::gpu::randVal(100, 200);
|
|
int dim = faiss::gpu::randVal(1, 1000);
|
|
|
|
faiss::IndexFlatL2 cpuIndex(dim);
|
|
|
|
std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
|
|
cpuIndex.add(numVecs, vecs.data());
|
|
|
|
faiss::gpu::StandardGpuResources res;
|
|
res.noTempMemory();
|
|
|
|
// Fill with garbage values
|
|
int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
|
|
|
|
faiss::gpu::GpuIndexFlatConfig config;
|
|
config.device = 0;
|
|
config.useFloat16 = false;
|
|
config.storeTransposed = false;
|
|
|
|
faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, 2000, config);
|
|
gpuIndex.copyFrom(&cpuIndex);
|
|
|
|
EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
|
|
EXPECT_EQ(gpuIndex.ntotal, numVecs);
|
|
|
|
EXPECT_EQ(cpuIndex.d, gpuIndex.d);
|
|
EXPECT_EQ(cpuIndex.d, dim);
|
|
|
|
int idx = faiss::gpu::randVal(0, numVecs - 1);
|
|
|
|
std::vector<float> gpuVals(dim);
|
|
gpuIndex.reconstruct(idx, gpuVals.data());
|
|
|
|
std::vector<float> cpuVals(dim);
|
|
cpuIndex.reconstruct(idx, cpuVals.data());
|
|
|
|
EXPECT_EQ(gpuVals, cpuVals);
|
|
}
|
|
|
|
TEST(TestGpuIndexFlat, CopyTo) {
|
|
faiss::gpu::newTestSeed();
|
|
|
|
faiss::gpu::StandardGpuResources res;
|
|
res.noTempMemory();
|
|
|
|
int numVecs = faiss::gpu::randVal(100, 200);
|
|
int dim = faiss::gpu::randVal(1, 1000);
|
|
|
|
int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
|
|
|
|
faiss::gpu::GpuIndexFlatConfig config;
|
|
config.device = device;
|
|
config.useFloat16 = false;
|
|
config.storeTransposed = false;
|
|
|
|
faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
|
|
|
|
std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
|
|
gpuIndex.add(numVecs, vecs.data());
|
|
|
|
// Fill with garbage values
|
|
faiss::IndexFlatL2 cpuIndex(2000);
|
|
gpuIndex.copyTo(&cpuIndex);
|
|
|
|
EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
|
|
EXPECT_EQ(gpuIndex.ntotal, numVecs);
|
|
|
|
EXPECT_EQ(cpuIndex.d, gpuIndex.d);
|
|
EXPECT_EQ(cpuIndex.d, dim);
|
|
|
|
int idx = faiss::gpu::randVal(0, numVecs - 1);
|
|
|
|
std::vector<float> gpuVals(dim);
|
|
gpuIndex.reconstruct(idx, gpuVals.data());
|
|
|
|
std::vector<float> cpuVals(dim);
|
|
cpuIndex.reconstruct(idx, cpuVals.data());
|
|
|
|
EXPECT_EQ(gpuVals, cpuVals);
|
|
}
|
|
|
|
TEST(TestGpuIndexFlat, UnifiedMemory) {
|
|
// Construct on a random device to test multi-device, if we have
|
|
// multiple devices
|
|
int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
|
|
|
|
if (!faiss::gpu::getFullUnifiedMemSupport(device)) {
|
|
return;
|
|
}
|
|
|
|
int dim = 256;
|
|
|
|
// FIXME: GpuIndexFlat doesn't support > 2^31 (vecs * dims) due to
|
|
// kernel indexing, so we can't test unified memory for memory
|
|
// oversubscription.
|
|
size_t numVecs = 50000;
|
|
int numQuery = 10;
|
|
int k = 10;
|
|
|
|
faiss::IndexFlatL2 cpuIndexL2(dim);
|
|
|
|
faiss::gpu::StandardGpuResources res;
|
|
res.noTempMemory();
|
|
|
|
faiss::gpu::GpuIndexFlatConfig config;
|
|
config.device = device;
|
|
config.memorySpace = faiss::gpu::MemorySpace::Unified;
|
|
|
|
faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);
|
|
|
|
std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
|
|
cpuIndexL2.add(numVecs, vecs.data());
|
|
gpuIndexL2.add(numVecs, vecs.data());
|
|
|
|
// To some extent, we depend upon the relative error for the test
|
|
// for float16
|
|
faiss::gpu::compareIndices(cpuIndexL2, gpuIndexL2,
|
|
numQuery, dim, k, "Unified Memory",
|
|
kF32MaxRelErr,
|
|
0.1f,
|
|
0.015f);
|
|
}
|