faiss/gpu/test/TestGpuSelect.cu

188 lines
5.5 KiB
Plaintext

/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the CC-by-NC license found in the
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "../utils/DeviceUtils.h"
#include "../utils/BlockSelectKernel.cuh"
#include "../utils/WarpSelectKernel.cuh"
#include "../utils/HostTensor.cuh"
#include "../utils/DeviceTensor.cuh"
#include "../test/TestUtils.h"
#include <algorithm>
#include <gtest/gtest.h>
#include <sstream>
#include <unordered_map>
#include <vector>
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
void testForSize(int rows, int cols, int k, bool dir, bool warp) {
std::vector<float> v = faiss::gpu::randVecs(rows, cols);
faiss::gpu::HostTensor<float, 2, true> hostVal({rows, cols});
for (int r = 0; r < rows; ++r) {
for (int c = 0; c < cols; ++c) {
hostVal[r][c] = v[r * cols + c];
}
}
// row -> (val -> idx)
std::unordered_map<int, std::vector<std::pair<int, float>>> hostOutValAndInd;
for (int r = 0; r < rows; ++r) {
std::vector<std::pair<int, float>> closest;
for (int c = 0; c < cols; ++c) {
closest.emplace_back(c, (float) hostVal[r][c]);
}
auto dirFalseFn =
[](std::pair<int, float>& a, std::pair<int, float>& b) {
return a.second < b.second;
};
auto dirTrueFn =
[](std::pair<int, float>& a, std::pair<int, float>& b) {
return a.second > b.second;
};
std::sort(closest.begin(), closest.end(), dir ? dirTrueFn : dirFalseFn);
hostOutValAndInd.emplace(r, closest);
}
// Select top-k on GPU
faiss::gpu::DeviceTensor<float, 2, true> gpuVal(hostVal, 0);
faiss::gpu::DeviceTensor<float, 2, true> gpuOutVal({rows, k});
faiss::gpu::DeviceTensor<int, 2, true> gpuOutInd({rows, k});
if (warp) {
faiss::gpu::runWarpSelect(gpuVal, gpuOutVal, gpuOutInd, dir, k, 0);
} else {
faiss::gpu::runBlockSelect(gpuVal, gpuOutVal, gpuOutInd, dir, k, 0);
}
// Copy back to CPU
faiss::gpu::HostTensor<float, 2, true> outVal(gpuOutVal, 0);
faiss::gpu::HostTensor<int, 2, true> outInd(gpuOutInd, 0);
for (int r = 0; r < rows; ++r) {
std::unordered_map<int, int> seenIndices;
for (int i = 0; i < k; ++i) {
float gpuV = outVal[r][i];
float cpuV = hostOutValAndInd[r][i].second;
EXPECT_EQ(gpuV, cpuV) <<
"rows " << rows << " cols " << cols << " k " << k << " dir " << dir
<< " row " << r << " ind " << i;
// If there are identical elements in a row that should be
// within the top-k, then it is possible that the index can
// differ, because the order in which the GPU will see the
// equivalent values is different than the CPU (and will remain
// unspecified, since this is affected by the choice of
// k-selection algorithm that we use)
int gpuInd = outInd[r][i];
int cpuInd = hostOutValAndInd[r][i].first;
// We should never see duplicate indices, however
auto itSeenIndex = seenIndices.find(gpuInd);
EXPECT_EQ(itSeenIndex, seenIndices.end()) <<
"Row " << r << " user index " << gpuInd << " was seen at both " <<
itSeenIndex->second << " and " << i;
seenIndices[gpuInd] = i;
if (gpuInd != cpuInd) {
// Gather the values from the original data via index; the
// values should be the same
float gpuGatherV = hostVal[r][gpuInd];
float cpuGatherV = hostVal[r][cpuInd];
EXPECT_EQ(gpuGatherV, cpuGatherV) <<
"rows " << rows << " cols " << cols << " k " << k << " dir " << dir
<< " row " << r << " ind " << i << " source ind "
<< gpuInd << " " << cpuInd;
}
}
}
}
// General test
TEST(TestGpuSelect, test) {
for (int i = 0; i < 10; ++i) {
int rows = faiss::gpu::randVal(10, 100);
int cols = faiss::gpu::randVal(1, 30000);
int k = std::min(cols, faiss::gpu::randVal(1, 1024));
bool dir = faiss::gpu::randBool();
testForSize(rows, cols, k, dir, false);
}
}
// Test for k = 1
TEST(TestGpuSelect, test1) {
for (int i = 0; i < 5; ++i) {
int rows = faiss::gpu::randVal(10, 100);
int cols = faiss::gpu::randVal(1, 30000);
bool dir = faiss::gpu::randBool();
testForSize(rows, cols, 1, dir, false);
}
}
// Test for where k = #cols exactly (we are returning all the values,
// just sorted)
TEST(TestGpuSelect, testExact) {
for (int i = 0; i < 5; ++i) {
int rows = faiss::gpu::randVal(10, 100);
int cols = faiss::gpu::randVal(1, 1024);
bool dir = faiss::gpu::randBool();
testForSize(rows, cols, cols, dir, false);
}
}
// General test
TEST(TestGpuSelect, testWarp) {
for (int i = 0; i < 10; ++i) {
int rows = faiss::gpu::randVal(10, 100);
int cols = faiss::gpu::randVal(1, 30000);
int k = std::min(cols, faiss::gpu::randVal(1, 1024));
bool dir = faiss::gpu::randBool();
testForSize(rows, cols, k, dir, true);
}
}
// Test for k = 1
TEST(TestGpuSelect, test1Warp) {
for (int i = 0; i < 5; ++i) {
int rows = faiss::gpu::randVal(10, 100);
int cols = faiss::gpu::randVal(1, 30000);
bool dir = faiss::gpu::randBool();
testForSize(rows, cols, 1, dir, true);
}
}
// Test for where k = #cols exactly (we are returning all the values,
// just sorted)
TEST(TestGpuSelect, testExactWarp) {
for (int i = 0; i < 5; ++i) {
int rows = faiss::gpu::randVal(10, 100);
int cols = faiss::gpu::randVal(1, 1024);
bool dir = faiss::gpu::randBool();
testForSize(rows, cols, cols, dir, true);
}
}