12 #include <gtest/gtest.h>
14 #include <faiss/IndexIVFFlat.h>
15 #include <faiss/IndexIVFPQ.h>
16 #include <faiss/IndexFlat.h>
17 #include <faiss/MetaIndexes.h>
18 #include <faiss/FaissAssert.h>
19 #include <faiss/VectorTransform.h>
20 #include <faiss/OnDiskInvertedLists.h>
30 void merge_into(Index *index0, Index *index1,
bool shift_ids) {
31 FAISS_THROW_IF_NOT (index0->d == index1->d);
32 IndexIVF *ivf0 =
dynamic_cast<IndexIVF *
>(index0);
33 IndexIVF *ivf1 =
dynamic_cast<IndexIVF *
>(index1);
36 IndexPreTransform *pt0 =
dynamic_cast<IndexPreTransform *
>(index0);
37 IndexPreTransform *pt1 =
dynamic_cast<IndexPreTransform *
>(index1);
40 FAISS_THROW_IF_NOT (pt0 && pt1);
41 FAISS_THROW_IF_NOT (pt0->chain.size() == pt1->chain.size());
42 for (
int i = 0; i < pt0->chain.size(); i++) {
43 FAISS_THROW_IF_NOT (
typeid(pt0->chain[i]) ==
typeid(pt1->chain[i]));
46 ivf0 =
dynamic_cast<IndexIVF *
>(pt0->index);
47 ivf1 =
dynamic_cast<IndexIVF *
>(pt1->index);
50 FAISS_THROW_IF_NOT (ivf0);
51 FAISS_THROW_IF_NOT (ivf1);
53 ivf0->merge_from (*ivf1, shift_ids ? ivf0->ntotal : 0);
56 index0->ntotal = ivf0->ntotal;
57 index1->ntotal = ivf1->ntotal;
65 static pthread_mutex_t mutex;
70 pthread_mutex_lock (&mutex);
71 filename = tempnam (
nullptr, prefix);
72 pthread_mutex_unlock (&mutex);
76 if (access (filename.c_str(), F_OK)) {
77 unlink (filename.c_str());
82 return filename.c_str();
87 pthread_mutex_t Tempfilename::mutex = PTHREAD_MUTEX_INITIALIZER;
102 std::vector <float> database;
103 std::vector <float> queries;
104 std::vector<idx_t> ids;
107 CommonData(): database (nb * d), queries (nq * d), ids(nb), quantizer (d) {
109 for (
size_t i = 0; i < nb * d; i++) {
110 database[i] = drand48();
112 for (
size_t i = 0; i < nq * d; i++) {
113 queries[i] = drand48();
115 for (
int i = 0; i < nb; i++) {
116 ids[i] = 123 + 456 * i;
120 iflat.
train(nb, database.data());
132 bool standard_merge =
true)
135 std::vector<idx_t> refI(k * nq);
136 std::vector<float> refD(k * nq);
138 index_shards->
search(nq, cd.queries.data(), k, refD.data(), refI.data());
141 std::vector<idx_t> newI(k * nq);
142 std::vector<float> newD(k * nq);
144 if (standard_merge) {
146 for (
int i = 1; i < nindex; i++) {
147 merge_into(index_shards->at(0), index_shards->at(i), shift_ids);
150 index_shards->sync_with_shard_indexes();
152 std::vector<const faiss::InvertedLists *> lists;
155 for (
int i = 0; i < nindex; i++) {
161 lists.push_back (index_ivf->invlists);
162 ntotal += index_ivf->
ntotal;
169 il->merge_from(lists.data(), lists.size());
171 index0->replace_invlists(il,
true);
175 index_shards->at(0)->
search(nq, cd.queries.data(),
176 k, newD.data(), newI.data());
179 for (
size_t i = 0; i < k * nq; i++) {
180 if (refI[i] != newI[i]) {
189 TEST(MERGE, merge_flat_no_ids) {
191 index_shards.own_fields =
true;
192 for (
int i = 0; i < nindex; i++) {
193 index_shards.add_shard (
197 index_shards.
add(nb, cd.database.data());
198 size_t prev_ntotal = index_shards.
ntotal;
199 int ndiff = compare_merged(&index_shards,
true);
200 EXPECT_EQ (prev_ntotal, index_shards.
ntotal);
206 TEST(MERGE, merge_flat) {
208 index_shards.own_fields =
true;
210 for (
int i = 0; i < nindex; i++) {
211 index_shards.add_shard (
216 index_shards.
add_with_ids(nb, cd.database.data(), cd.ids.data());
217 int ndiff = compare_merged(&index_shards,
false);
222 TEST(MERGE, merge_flat_vt) {
224 index_shards.own_fields =
true;
234 ipt.train(nb, cd.database.data());
237 for (
int i = 0; i < nindex; i++) {
243 index_shards.add_shard (ipt);
246 index_shards.
add_with_ids(nb, cd.database.data(), cd.ids.data());
247 size_t prev_ntotal = index_shards.
ntotal;
248 int ndiff = compare_merged(&index_shards,
false);
249 EXPECT_EQ (prev_ntotal, index_shards.
ntotal);
255 TEST(MERGE, merge_flat_ondisk) {
257 index_shards.own_fields =
true;
260 for (
int i = 0; i < nindex; i++) {
264 ivf->nlist, ivf->code_size,
266 ivf->replace_invlists(il,
true);
268 index_shards.add_shard (ivf);
272 index_shards.
add_with_ids(nb, cd.database.data(), cd.ids.data());
273 int ndiff = compare_merged(&index_shards,
false);
279 TEST(MERGE, merge_flat_ondisk_2) {
281 index_shards.own_fields =
true;
283 for (
int i = 0; i < nindex; i++) {
284 index_shards.add_shard (
288 index_shards.
add_with_ids(nb, cd.database.data(), cd.ids.data());
289 int ndiff = compare_merged(&index_shards,
false,
false);
Randomly rotate a set of vectors.
void add_with_ids(idx_t n, const float *x, const long *xids) override
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void train(idx_t n, const float *x) override
Trains the quantizer and calls train_residual to train sub-quantizers.
void add(idx_t n, const float *x) override
supported only for sub-indices that implement add_with_ids
long idx_t
all indices are this type
idx_t ntotal
total nb of indexed vectors
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
bool is_trained
set if the Index does not require training, or if training is done already
size_t nlist
number of possible key values
size_t code_size
code size per vector in bytes