faiss/gpu/test/TestUtils.cpp

280 lines
8.3 KiB
C++

/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "../test/TestUtils.h"
#include "../../utils.h"
#include <cmath>
#include <gtest/gtest.h>
#include <set>
#include <sstream>
#include <time.h>
#include <unordered_map>
namespace faiss { namespace gpu {
inline float relativeError(float a, float b) {
return std::abs(a - b) / (0.5f * (std::abs(a) + std::abs(b)));
}
// This seed is also used for the faiss float_rand API; in a test it
// is all within a single thread, so it is ok
long s_seed = 1;
void newTestSeed() {
struct timespec t;
clock_gettime(CLOCK_REALTIME, &t);
setTestSeed(t.tv_nsec);
}
void setTestSeed(long seed) {
printf("testing with random seed %ld\n", seed);
srand48(seed);
s_seed = seed;
}
int randVal(int a, int b) {
EXPECT_GE(a, 0);
EXPECT_LE(a, b);
return a + (lrand48() % (b + 1 - a));
}
bool randBool() {
return randSelect<bool>({true, false});
}
std::vector<float> randVecs(size_t num, size_t dim) {
std::vector<float> v(num * dim);
static bool first = true;
faiss::float_rand(v.data(), v.size(), s_seed);
// unfortunately we generate separate sets of vectors, and don't
// want the same values
++s_seed;
return v;
}
void compareIndices(faiss::Index& refIndex,
faiss::Index& testIndex,
int numQuery, int dim, int k,
const std::string& configMsg,
float maxRelativeError,
float pctMaxDiff1,
float pctMaxDiffN) {
auto queries = faiss::gpu::randVecs(numQuery, dim);
// Compare
std::vector<float> refDistance(numQuery * k, 0);
std::vector<faiss::Index::idx_t> refIndices(numQuery * k, -1);
refIndex.search(numQuery, queries.data(),
k, refDistance.data(), refIndices.data());
std::vector<float> testDistance(numQuery * k, 0);
std::vector<faiss::Index::idx_t> testIndices(numQuery * k, -1);
testIndex.search(numQuery, queries.data(),
k, testDistance.data(), testIndices.data());
faiss::gpu::compareLists(refDistance.data(),
refIndices.data(),
testDistance.data(),
testIndices.data(),
numQuery, k,
configMsg,
true, false, true,
maxRelativeError, pctMaxDiff1, pctMaxDiffN);
}
template <typename T>
inline T lookup(const T* p, int i, int j, int /*dim1*/, int dim2) {
return p[i * dim2 + j];
}
void compareLists(const float* refDist,
const faiss::Index::idx_t* refInd,
const float* testDist,
const faiss::Index::idx_t* testInd,
int dim1, int dim2,
const std::string& configMsg,
bool printBasicStats, bool printDiffs, bool assertOnErr,
float maxRelativeError,
float pctMaxDiff1,
float pctMaxDiffN) {
float maxAbsErr = 0.0f;
for (int i = 0; i < dim1 * dim2; ++i) {
maxAbsErr = std::max(maxAbsErr, std::abs(refDist[i] - testDist[i]));
}
int numResults = dim1 * dim2;
// query -> {index -> result position}
std::vector<std::unordered_map<faiss::Index::idx_t, int>> refIndexMap;
for (int query = 0; query < dim1; ++query) {
std::unordered_map<faiss::Index::idx_t, int> indices;
for (int result = 0; result < dim2; ++result) {
indices[lookup(refInd, query, result, dim1, dim2)] = result;
}
refIndexMap.emplace_back(std::move(indices));
}
// See how far off the indices are
// Keep track of the difference for each entry
std::vector<std::vector<int>> indexDiffs;
int diff1 = 0; // index differs by 1
int diffN = 0; // index differs by >1
int diffInf = 0; // index not found in the other
int nonUniqueIndices = 0;
double avgDiff = 0.0;
int maxDiff = 0;
float maxRelErr = 0.0f;
for (int query = 0; query < dim1; ++query) {
std::vector<int> diffs;
std::set<faiss::Index::idx_t> uniqueIndices;
auto& indices = refIndexMap[query];
for (int result = 0; result < dim2; ++result) {
auto t = lookup(testInd, query, result, dim1, dim2);
// All indices reported within a query should be unique; this is
// a serious error if is otherwise the case
bool uniqueIndex = uniqueIndices.count(t) == 0;
if (assertOnErr) {
EXPECT_TRUE(uniqueIndex) << configMsg
<< " " << query
<< " " << result
<< " " << t;
}
if (!uniqueIndex) {
++nonUniqueIndices;
} else {
uniqueIndices.insert(t);
}
auto it = indices.find(t);
if (it != indices.end()) {
int diff = std::abs(result - it->second);
diffs.push_back(diff);
if (diff == 1) {
++diff1;
maxDiff = std::max(diff, maxDiff);
} else if (diff > 1) {
++diffN;
maxDiff = std::max(diff, maxDiff);
}
avgDiff += (double) diff;
} else {
++diffInf;
diffs.push_back(-1);
// don't count this for maxDiff
}
auto refD = lookup(refDist, query, result, dim1, dim2);
auto testD = lookup(testDist, query, result, dim1, dim2);
float relErr = relativeError(refD, testD);
if (assertOnErr) {
EXPECT_LE(relErr, maxRelativeError) << configMsg
<< " (" << query << ", " << result
<< ") refD: " << refD
<< " testD: " << testD;
}
maxRelErr = std::max(maxRelErr, relErr);
}
indexDiffs.emplace_back(std::move(diffs));
}
if (assertOnErr) {
EXPECT_LE((float) (diff1 + diffN + diffInf),
(float) numResults * pctMaxDiff1) << configMsg;
// Don't count diffInf because that could be diff1 as far as we
// know
EXPECT_LE((float) diffN, (float) numResults * pctMaxDiffN) << configMsg;
}
avgDiff /= (double) numResults;
if (printBasicStats) {
if (!configMsg.empty()) {
printf("Config\n"
"----------------------------\n"
"%s\n",
configMsg.c_str());
}
printf("Result error and differences\n"
"----------------------------\n"
"max abs diff %.7f rel diff %.7f\n"
"idx diff avg: %.5g max: %d\n"
"idx diff of 1: %d (%.3f%% of queries)\n"
"idx diff of >1: %d (%.3f%% of queries)\n"
"idx diff not found: %d (%.3f%% of queries)"
" [typically a last element inversion]\n"
"non-unique indices: %d (a serious error if >0)\n",
maxAbsErr, maxRelErr,
avgDiff, maxDiff,
diff1, 100.0f * (float) diff1 / (float) numResults,
diffN, 100.0f * (float) diffN / (float) numResults,
diffInf, 100.0f * (float) diffInf / (float) numResults,
nonUniqueIndices);
}
if (printDiffs) {
printf("differences:\n");
printf("==================\n");
for (int query = 0; query < dim1; ++query) {
for (int result = 0; result < dim2; ++result) {
long refI = lookup(refInd, query, result, dim1, dim2);
long testI = lookup(testInd, query, result, dim1, dim2);
if (refI != testI) {
float refD = lookup(refDist, query, result, dim1, dim2);
float testD = lookup(testDist, query, result, dim1, dim2);
float maxDist = std::max(refD, testD);
float delta = std::abs(refD - testD);
float relErr = delta / maxDist;
if (refD == testD) {
printf("(%d, %d [%d]) (ref %ld tst %ld dist ==)\n",
query, result,
indexDiffs[query][result],
refI, testI);
} else {
printf("(%d, %d [%d]) (ref %ld tst %ld abs %.8f "
"rel %.8f ref %a tst %a)\n",
query, result,
indexDiffs[query][result],
refI, testI, delta, relErr, refD, testD);
}
}
}
}
}
}
} }