faiss/tests/test_ivfpq_codec.cpp
Lucas Hosseini a8118acbc5
Facebook sync (May 2019) + relicense (#838)
Changelog:

- changed license: BSD+Patents -> MIT
- propagates exceptions raised in sub-indexes of IndexShards and IndexReplicas
- support for searching several inverted lists in parallel (parallel_mode != 0)
- better support for PQ codes where nbit != 8 or 16
- IVFSpectralHash implementation: spectral hash codes inside an IVF
- 6-bit per component scalar quantizer (4 and 8 bit were already supported)
- combinations of inverted lists: HStackInvertedLists and VStackInvertedLists
- configurable number of threads for OnDiskInvertedLists prefetching (including 0=no prefetch)
- more test and demo code compatible with Python 3 (print with parentheses)
- refactored benchmark code: data loading is now in a single file
2019-05-28 16:17:22 +02:00

67 lines
1.6 KiB
C++

/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <cstdio>
#include <cstdlib>
#include <gtest/gtest.h>
#include <faiss/IndexIVFPQ.h>
#include <faiss/IndexFlat.h>
#include <faiss/utils.h>
namespace {
// dimension of the vectors to index
int d = 64;
// size of the database we plan to index
size_t nb = 8000;
double eval_codec_error (long ncentroids, long m, const std::vector<float> &v)
{
faiss::IndexFlatL2 coarse_quantizer (d);
faiss::IndexIVFPQ index (&coarse_quantizer, d,
ncentroids, m, 8);
index.pq.cp.niter = 10; // speed up train
index.train (nb, v.data());
// encode and decode to compute reconstruction error
std::vector<long> keys (nb);
std::vector<uint8_t> codes (nb * m);
index.encode_multiple (nb, keys.data(), v.data(), codes.data(), true);
std::vector<float> v2 (nb * d);
index.decode_multiple (nb, keys.data(), codes.data(), v2.data());
return faiss::fvec_L2sqr (v.data(), v2.data(), nb * d);
}
} // namespace
TEST(IVFPQ, codec) {
std::vector <float> database (nb * d);
for (size_t i = 0; i < nb * d; i++) {
database[i] = drand48();
}
double err0 = eval_codec_error(16, 8, database);
// should be more accurate as there are more coarse centroids
double err1 = eval_codec_error(128, 8, database);
EXPECT_GT(err0, err1);
// should be more accurate as there are more PQ codes
double err2 = eval_codec_error(16, 16, database);
EXPECT_GT(err0, err2);
}