12 #include <gtest/gtest.h>
14 #include <faiss/IndexIVFFlat.h>
15 #include <faiss/IndexIVFPQ.h>
16 #include <faiss/IndexFlat.h>
17 #include <faiss/MetaIndexes.h>
18 #include <faiss/FaissAssert.h>
19 #include <faiss/VectorTransform.h>
20 #include <faiss/OnDiskInvertedLists.h>
21 #include <faiss/IVFlib.h>
29 static pthread_mutex_t mutex;
33 Tempfilename (
const char *prefix =
nullptr) {
34 pthread_mutex_lock (&mutex);
35 char *cfname = tempnam (
nullptr, prefix);
38 pthread_mutex_unlock (&mutex);
42 if (access (filename.c_str(), F_OK)) {
43 unlink (filename.c_str());
48 return filename.c_str();
53 pthread_mutex_t Tempfilename::mutex = PTHREAD_MUTEX_INITIALIZER;
68 std::vector <float> database;
69 std::vector <float> queries;
70 std::vector<idx_t> ids;
73 CommonData(): database (nb * d), queries (nq * d), ids(nb), quantizer (d) {
75 for (
size_t i = 0; i < nb * d; i++) {
76 database[i] = drand48();
78 for (
size_t i = 0; i < nq * d; i++) {
79 queries[i] = drand48();
81 for (
int i = 0; i < nb; i++) {
82 ids[i] = 123 + 456 * i;
86 iflat.train(nb, database.data());
96 bool standard_merge =
true)
99 std::vector<idx_t> refI(k * nq);
100 std::vector<float> refD(k * nq);
102 index_shards->
search(nq, cd.queries.data(), k, refD.data(), refI.data());
103 Tempfilename filename;
105 std::vector<idx_t> newI(k * nq);
106 std::vector<float> newD(k * nq);
108 if (standard_merge) {
110 for (
int i = 1; i < nindex; i++) {
111 faiss::ivflib::merge_into(
112 index_shards->at(0), index_shards->at(i),
116 index_shards->sync_with_shard_indexes();
118 std::vector<const faiss::InvertedLists *> lists;
121 for (
int i = 0; i < nindex; i++) {
127 lists.push_back (index_ivf->invlists);
128 ntotal += index_ivf->
ntotal;
135 il->merge_from(lists.data(), lists.size());
141 index_shards->at(0)->
search(nq, cd.queries.data(),
142 k, newD.data(), newI.data());
145 for (
size_t i = 0; i < k * nq; i++) {
146 if (refI[i] != newI[i]) {
157 TEST(MERGE, merge_flat_no_ids) {
159 index_shards.own_fields =
true;
160 for (
int i = 0; i < nindex; i++) {
161 index_shards.add_shard (
165 index_shards.
add(nb, cd.database.data());
166 size_t prev_ntotal = index_shards.
ntotal;
167 int ndiff = compare_merged(&index_shards,
true);
168 EXPECT_EQ (prev_ntotal, index_shards.
ntotal);
174 TEST(MERGE, merge_flat) {
176 index_shards.own_fields =
true;
178 for (
int i = 0; i < nindex; i++) {
179 index_shards.add_shard (
184 index_shards.
add_with_ids(nb, cd.database.data(), cd.ids.data());
185 int ndiff = compare_merged(&index_shards,
false);
190 TEST(MERGE, merge_flat_vt) {
192 index_shards.own_fields =
true;
202 ipt.train(nb, cd.database.data());
205 for (
int i = 0; i < nindex; i++) {
211 index_shards.add_shard (ipt);
214 index_shards.
add_with_ids(nb, cd.database.data(), cd.ids.data());
215 size_t prev_ntotal = index_shards.
ntotal;
216 int ndiff = compare_merged(&index_shards,
false);
217 EXPECT_EQ (prev_ntotal, index_shards.
ntotal);
223 TEST(MERGE, merge_flat_ondisk) {
225 index_shards.own_fields =
true;
226 Tempfilename filename;
228 for (
int i = 0; i < nindex; i++) {
232 ivf->nlist, ivf->code_size,
234 ivf->replace_invlists(il,
true);
236 index_shards.add_shard (ivf);
240 index_shards.
add_with_ids(nb, cd.database.data(), cd.ids.data());
241 int ndiff = compare_merged(&index_shards,
false);
247 TEST(MERGE, merge_flat_ondisk_2) {
249 index_shards.own_fields =
true;
251 for (
int i = 0; i < nindex; i++) {
252 index_shards.add_shard (
256 index_shards.
add_with_ids(nb, cd.database.data(), cd.ids.data());
257 int ndiff = compare_merged(&index_shards,
false,
false);
Randomly rotate a set of vectors.
void add_with_ids(idx_t n, const float *x, const long *xids) override
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void add(idx_t n, const float *x) override
supported only for sub-indices that implement add_with_ids
long idx_t
all indices are this type
void replace_invlists(InvertedLists *il, bool own=false)
replace the inverted lists, old one is deallocated if own_invlists
idx_t ntotal
total nb of indexed vectors
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
bool is_trained
set if the Index does not require training, or if training is done already
size_t nlist
number of possible key values
size_t code_size
code size per vector in bytes