// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. #include #include #include #include #include #include #include #include #include #include #include /* This demonstrates how to query several independent IVF indexes with a trained *index in common. This avoids to duplicate the coarse quantizer and metadata *in memory. **/ namespace { int d = 64; }; // namespace std::vector get_random_vectors(size_t n, int seed) { std::vector x(n * d); faiss::rand_smooth_vectors(n, d, x.data(), seed); seed++; return x; } /** InvetedLists implementation that dispatches the search to an InvertedList * object that is passed in at query time */ struct DispatchingInvertedLists : faiss::ReadOnlyInvertedLists { DispatchingInvertedLists(size_t nlist, size_t code_size) : faiss::ReadOnlyInvertedLists(nlist, code_size) { use_iterator = true; } faiss::InvertedListsIterator* get_iterator( size_t list_no, void* inverted_list_context = nullptr) const override { assert(inverted_list_context); auto il = static_cast(inverted_list_context); return il->get_iterator(list_no); } using idx_t = faiss::idx_t; size_t list_size(size_t list_no) const override { FAISS_THROW_MSG("use iterator interface"); } const uint8_t* get_codes(size_t list_no) const override { FAISS_THROW_MSG("use iterator interface"); } const idx_t* get_ids(size_t list_no) const override { FAISS_THROW_MSG("use iterator interface"); } }; TEST(COMMON, test_common_trained_index) { int N = 3; // number of independent indexes int nt = 500; // training vectors int nb = 200; // nb database vectors per index int nq = 10; // nb queries performed on each index int k = 4; // restults requested per query // construct and build an "empty index": a trained index that does not // itself hold any data std::unique_ptr empty_index(dynamic_cast( faiss::index_factory(d, "IVF32,PQ8np"))); auto xt = get_random_vectors(nt, 123); empty_index->train(nt, xt.data()); empty_index->nprobe = 4; // reference run: build one index for each set of db / queries and record // results std::vector> ref_I(N); for (int i = 0; i < N; i++) { // clone the empty index std::unique_ptr index( faiss::clone_index(empty_index.get())); auto xb = get_random_vectors(nb, 1234 + i); auto xq = get_random_vectors(nq, 12345 + i); // add vectors and perform a search index->add(nb, xb.data()); std::vector D(k * nq); std::vector I(k * nq); index->search(nq, xq.data(), k, D.data(), I.data()); // record result as reference ref_I[i] = I; } // build a set of inverted lists for each independent index std::vector sub_invlists; for (int i = 0; i < N; i++) { // swap in other inverted lists sub_invlists.emplace_back(empty_index->nlist, empty_index->code_size); faiss::InvertedLists* invlists = &sub_invlists.back(); // replace_invlists swaps in a new InvertedLists for an existing index empty_index->replace_invlists(invlists, false); empty_index->reset(); // reset id counter to 0 // populate inverted lists auto xb = get_random_vectors(nb, 1234 + i); empty_index->add(nb, xb.data()); } // perform search dispatching to the sub-invlists. At search time, we don't // use replace_invlists because that would wreak havoc in a multithreaded // context DispatchingInvertedLists di(empty_index->nlist, empty_index->code_size); empty_index->replace_invlists(&di, false); std::vector> new_I(N); // run searches in the independent indexes but with a common empty_index #pragma omp parallel for for (int i = 0; i < N; i++) { auto xq = get_random_vectors(nq, 12345 + i); std::vector D(k * nq); std::vector I(k * nq); // here we set to what sub-index the queries should be directed faiss::SearchParametersIVF params; params.nprobe = empty_index->nprobe; params.inverted_list_context = &sub_invlists[i]; empty_index->search(nq, xq.data(), k, D.data(), I.data(), ¶ms); new_I[i] = I; } // compare with reference reslt for (int i = 0; i < N; i++) { ASSERT_EQ(ref_I[i], new_I[i]); } }