16 #include <gtest/gtest.h>
18 #include <faiss/IndexIVF.h>
19 #include <faiss/IndexBinaryIVF.h>
20 #include <faiss/AutoTune.h>
21 #include <faiss/index_io.h>
22 #include <faiss/IVFlib.h>
23 #include <faiss/VectorTransform.h>
25 using namespace faiss;
47 std::vector<float> make_data(
size_t n)
49 std::vector <float> database (n * d);
50 for (
size_t i = 0; i < n * d; i++) {
51 database[i] = drand48();
56 std::unique_ptr<Index> make_trained_index(
const char *index_type,
60 d, index_type, metric_type));
61 auto xt = make_data(nt);
62 index->
train(nt, xt.data());
67 std::vector<idx_t> search_index(
Index *index,
const float *xq) {
68 std::vector<idx_t> I(k * nq);
69 std::vector<float> D(k * nq);
70 index->
search (nq, xq, k, D.data(), I.data());
83 void test_lowlevel_access (
const char *index_key,
MetricType metric) {
84 std::unique_ptr<Index> index = make_trained_index(index_key, metric);
86 auto xb = make_data (nb);
87 index->add(nb, xb.data());
95 const float * xbt = xb.data();
96 std::unique_ptr<float []> del_xbt;
101 if (xbt != xb.data()) {
102 del_xbt.reset((
float*)xbt);
106 IndexIVF * index_ivf = ivflib::extract_index_ivf (index.get());
115 std::vector<idx_t> list_nos (nb);
116 std::vector<uint8_t> codes (index_ivf->
code_size * nb);
118 index_ivf->
encode_vectors (nb, xbt, list_nos.data(), codes.data());
124 for (
int list_no = 0; list_no < index_ivf->
nlist; list_no++) {
127 size_t list_size = il->
list_size (list_no);
128 for (
int i = 0; i < list_size; i++) {
129 const uint8_t *ref_code = ivf_codes.get() + i * il->
code_size;
130 const uint8_t *new_code =
131 codes.data() + ivf_ids[i] * il->
code_size;
132 EXPECT_EQ (memcmp(ref_code, new_code, il->
code_size), 0);
143 auto xq = make_data (nq);
144 auto ref_I = search_index (index.get(), xq.data());
147 const float * xqt = xq.data();
148 std::unique_ptr<float []> del_xqt;
152 if (xqt != xq.data()) {
153 del_xqt.reset((
float*)xqt);
158 int nprobe = index_ivf->
nprobe;
160 std::vector<idx_t> q_lists (nq * nprobe);
161 std::vector<float> q_dis (nq * nprobe);
164 q_dis.data(), q_lists.data());
167 std::unique_ptr<InvertedListScanner> scanner (
170 for (
int i = 0; i < nq; i++) {
171 std::vector<idx_t> I (k, -1);
172 float default_dis = metric == METRIC_L2 ? HUGE_VAL : -HUGE_VAL;
173 std::vector<float> D (k, default_dis);
175 scanner->set_query (xqt + i * dt);
177 for (
int j = 0; j < nprobe; j++) {
178 int list_no = q_lists[i * nprobe + j];
179 if (list_no < 0)
continue;
180 scanner->set_list (list_no, q_dis[i * nprobe + j]);
185 scanner->scan_codes (
189 D.data(), I.data(), k);
194 for (
int jj = 0; jj < k; jj++) {
199 float computed_D = scanner->distance_to_code (
202 EXPECT_EQ (computed_D, D[jj]);
208 if (metric == METRIC_L2) {
209 maxheap_reorder (k, D.data(), I.data());
211 minheap_reorder (k, D.data(), I.data());
215 for (
int j = 0; j < k; j++) {
216 EXPECT_EQ (I[j], ref_I[i * k + j]);
231 TEST(TestLowLevelIVF, IVFFlatL2) {
232 test_lowlevel_access (
"IVF32,Flat", METRIC_L2);
235 TEST(TestLowLevelIVF, PCAIVFFlatL2) {
236 test_lowlevel_access (
"PCAR16,IVF32,Flat", METRIC_L2);
239 TEST(TestLowLevelIVF, IVFFlatIP) {
240 test_lowlevel_access (
"IVF32,Flat", METRIC_INNER_PRODUCT);
243 TEST(TestLowLevelIVF, IVFSQL2) {
244 test_lowlevel_access (
"IVF32,SQ8", METRIC_L2);
247 TEST(TestLowLevelIVF, IVFSQIP) {
248 test_lowlevel_access (
"IVF32,SQ8", METRIC_INNER_PRODUCT);
252 TEST(TestLowLevelIVF, IVFPQL2) {
253 test_lowlevel_access (
"IVF32,PQ4np", METRIC_L2);
256 TEST(TestLowLevelIVF, IVFPQIP) {
257 test_lowlevel_access (
"IVF32,PQ4np", METRIC_INNER_PRODUCT);
271 std::vector<uint8_t> make_data_binary(
size_t n)
274 std::vector <uint8_t> database (n * nbit / 8);
275 for (
size_t i = 0; i < n * d; i++) {
276 database[i] = lrand48();
281 std::unique_ptr<IndexBinary> make_trained_index_binary(
const char *index_type)
283 auto index = std::unique_ptr<IndexBinary>(index_binary_factory(
285 auto xt = make_data_binary (nt);
286 index->train(nt, xt.data());
291 void test_lowlevel_access_binary (
const char *index_key) {
292 std::unique_ptr<IndexBinary> index =
293 make_trained_index_binary (index_key);
301 auto xb = make_data_binary (nb);
302 index->add(nb, xb.data());
304 std::vector<idx_t> list_nos (nb);
319 auto xq = make_data_binary (nq);
321 std::vector<idx_t> I_ref(k * nq);
322 std::vector<int32_t> D_ref(k * nq);
323 index->search (nq, xq.data(), k, D_ref.data(), I_ref.data());
326 int nprobe = index_ivf->
nprobe;
328 std::vector<idx_t> q_lists (nq * nprobe);
329 std::vector<int32_t> q_dis (nq * nprobe);
333 q_dis.data(), q_lists.data());
336 std::unique_ptr<BinaryInvertedListScanner> scanner (
337 index_ivf->get_InvertedListScanner());
339 for (
int i = 0; i < nq; i++) {
340 std::vector<idx_t> I (k, -1);
341 uint32_t default_dis = 1 << 30;
342 std::vector<int32_t> D (k, default_dis);
344 scanner->set_query (xq.data() + i * index_ivf->
code_size);
346 for (
int j = 0; j < nprobe; j++) {
347 int list_no = q_lists[i * nprobe + j];
348 if (list_no < 0)
continue;
349 scanner->set_list (list_no, q_dis[i * nprobe + j]);
354 scanner->scan_codes (
358 D.data(), I.data(), k);
363 for (
int jj = 0; jj < k; jj++) {
368 float computed_D = scanner->distance_to_code (
371 EXPECT_EQ (computed_D, D[jj]);
376 printf(
"new before reroder: [");
377 for (
int j = 0; j < k; j++)
378 printf(
"%ld,%d ", I[j], D[j]);
382 heap_reorder<CMax<int32_t, int64_t> > (k, D.data(), I.data());
385 for (
int j = 0; j < k; j++)
386 printf(
"%ld,%d ", I_ref[j], D_ref[j]);
388 for (
int j = 0; j < k; j++)
389 printf(
"%ld,%d ", I[j], D[j]);
393 for (
int j = 0; j < k; j++) {
397 EXPECT_LE (D[j], D_ref[i * k + k - 1]);
398 if (D[j] < D_ref[i * k + k - 1]) {
401 if (I[j] == I_ref[i * k + j2])
break;
406 EXPECT_EQ(D[j], D_ref[i * k + j2]);
420 TEST(TestLowLevelIVF, IVFBinary) {
421 test_lowlevel_access_binary (
"BIVF32");
427 void test_threaded_search (
const char *index_key,
MetricType metric) {
428 std::unique_ptr<Index> index = make_trained_index(index_key, metric);
430 auto xb = make_data (nb);
431 index->add(nb, xb.data());
439 const float * xbt = xb.data();
440 std::unique_ptr<float []> del_xbt;
445 if (xbt != xb.data()) {
446 del_xbt.reset((
float*)xbt);
450 IndexIVF * index_ivf = ivflib::extract_index_ivf (index.get());
459 auto xq = make_data (nq);
460 auto ref_I = search_index (index.get(), xq.data());
463 const float * xqt = xq.data();
464 std::unique_ptr<float []> del_xqt;
468 if (xqt != xq.data()) {
469 del_xqt.reset((
float*)xqt);
474 int nprobe = index_ivf->
nprobe;
476 std::vector<idx_t> q_lists (nq * nprobe);
477 std::vector<float> q_dis (nq * nprobe);
480 q_dis.data(), q_lists.data());
486 for (
int i = 0; i < nq; i++) {
489 std::vector<idx_t> I (k * nproc, -1);
490 float default_dis = metric == METRIC_L2 ? HUGE_VAL : -HUGE_VAL;
491 std::vector<float> D (k * nproc, default_dis);
493 auto search_function = [index_ivf, &I, &D, dt, i, nproc,
494 xqt, nprobe, &q_dis, &q_lists]
499 std::unique_ptr<InvertedListScanner> scanner (
502 idx_t *local_I = I.data() + rank * k;
503 float *local_D = D.data() + rank * k;
505 scanner->set_query (xqt + i * dt);
507 for (
int j = rank; j < nprobe; j += nproc) {
508 int list_no = q_lists[i * nprobe + j];
509 if (list_no < 0)
continue;
510 scanner->set_list (list_no, q_dis[i * nprobe + j]);
512 scanner->scan_codes (
516 local_D, local_I, k);
523 std::vector<std::thread> threads;
524 for (
int rank = 0; rank < nproc; rank++) {
525 threads.emplace_back(search_function, rank);
529 for (
int rank = 0; rank < nproc; rank++) {
530 threads[rank].join();
531 if (rank == 0)
continue;
533 if (metric == METRIC_L2) {
534 maxheap_addn (k, D.data(), I.data(),
536 I.data() + rank * k, k);
538 minheap_addn (k, D.data(), I.data(),
540 I.data() + rank * k, k);
545 if (metric == METRIC_L2) {
546 maxheap_reorder (k, D.data(), I.data());
548 minheap_reorder (k, D.data(), I.data());
552 for (
int j = 0; j < k; j++) {
553 EXPECT_EQ (I[j], ref_I[i * k + j]);
563 TEST(TestLowLevelIVF, ThreadedSearch) {
564 test_threaded_search (
"IVF32,Flat", METRIC_L2);
virtual void encode_vectors(idx_t n, const float *x, const idx_t *list_nos, uint8_t *codes) const =0
void train(idx_t n, const float *x) override
virtual void search(idx_t n, const uint8_t *x, idx_t k, int32_t *distances, idx_t *labels) const =0
size_t nprobe
number of probes at query time
size_t nprobe
number of probes at query time
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
virtual size_t list_size(size_t list_no) const =0
get the size of a list
IndexBinary * quantizer
quantizer that maps vectors to inverted lists
int code_size
number of bytes per vector ( = d / 8 )
size_t code_size
code size per vector in bytes
void assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k=1)
long idx_t
all indices are this type
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
virtual void set_index_parameter(Index *index, const std::string &name, double val) const
set one of the parameters
InvertedLists * invlists
Acess to the actual data.
Index * quantizer
quantizer that maps vectors to inverted lists
virtual InvertedListScanner * get_InvertedListScanner(bool store_pairs=false) const
get a scanner for this index (store_pairs means ignore labels)
Index * index_factory(int d, const char *description_in, MetricType metric)
size_t nlist
number of possible key values
size_t code_size
code size per vector in bytes
MetricType
Some algorithms support both an inner product version and a L2 search version.
InvertedLists * invlists
Acess to the actual data.