Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
test_sliding_ivf.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #include <cstdio>
10 #include <cstdlib>
11 
12 #include <memory>
13 #include <vector>
14 
15 #include <gtest/gtest.h>
16 
17 #include <faiss/IndexIVF.h>
18 #include <faiss/AutoTune.h>
19 #include <faiss/index_io.h>
20 #include <faiss/IVFlib.h>
21 
22 using namespace faiss;
23 
24 typedef Index::idx_t idx_t;
25 
26 
27 // dimension of the vectors to index
28 int d = 32;
29 
30 // nb of training vectors
31 size_t nt = 5000;
32 
33 // size of the database points per window step
34 size_t nb = 1000;
35 
36 // nb of queries
37 size_t nq = 200;
38 
39 
40 int total_size = 40;
41 int window_size = 10;
42 
43 
44 
45 
46 
47 std::vector<float> make_data(size_t n)
48 {
49  std::vector <float> database (n * d);
50  for (size_t i = 0; i < n * d; i++) {
51  database[i] = drand48();
52  }
53  return database;
54 }
55 
56 std::unique_ptr<Index> make_trained_index(const char *index_type)
57 {
58  auto index = std::unique_ptr<Index>(index_factory(d, index_type));
59  auto xt = make_data(nt * d);
60  index->train(nt, xt.data());
61  ParameterSpace().set_index_parameter (index.get(), "nprobe", 4);
62  return index;
63 }
64 
65 std::vector<idx_t> search_index(Index *index, const float *xq) {
66  int k = 10;
67  std::vector<idx_t> I(k * nq);
68  std::vector<float> D(k * nq);
69  index->search (nq, xq, k, D.data(), I.data());
70  return I;
71 }
72 
73 
74 
75 
76 
77 /*************************************************************
78  * Test functions for a given index type
79  *************************************************************/
80 
81 
82 // make a few slices of indexes that can be merged
83 void make_index_slices (const Index* trained_index,
84  std::vector<std::unique_ptr<Index> > & sub_indexes) {
85 
86  for (int i = 0; i < total_size; i++) {
87  sub_indexes.emplace_back (clone_index (trained_index));
88 
89  printf ("preparing sub-index # %d\n", i);
90 
91  Index * index = sub_indexes.back().get();
92 
93  auto xb = make_data(nb * d);
94  std::vector<long> ids (nb);
95  for (int j = 0; j < nb; j++) {
96  ids[j] = lrand48();
97  }
98  index->add_with_ids (nb, xb.data(), ids.data());
99  }
100 
101 }
102 
103 // build merged index explicitly at sliding window position i
104 Index *make_merged_index(
105  const Index* trained_index,
106  const std::vector<std::unique_ptr<Index> > & sub_indexes,
107  int i) {
108 
109  Index * merged_index = clone_index (trained_index);
110  for (int j = i - window_size + 1; j <= i; j++) {
111  if (j < 0 || j >= total_size) continue;
112  std::unique_ptr<Index> sub_index (
113  clone_index (sub_indexes[j].get()));
114  IndexIVF *ivf0 = ivflib::extract_index_ivf (merged_index);
115  IndexIVF *ivf1 = ivflib::extract_index_ivf (sub_index.get());
116  ivf0->merge_from (*ivf1, 0);
117  merged_index->ntotal = ivf0->ntotal;
118  }
119  return merged_index;
120 }
121 
122 int test_sliding_window (const char *index_key) {
123 
124  std::unique_ptr<Index> trained_index = make_trained_index(index_key);
125 
126  // make the index slices
127  std::vector<std::unique_ptr<Index> > sub_indexes;
128 
129  make_index_slices (trained_index.get(), sub_indexes);
130 
131  // now slide over the windows
132  std::unique_ptr<Index> index (clone_index (trained_index.get()));
133  ivflib::SlidingIndexWindow window (index.get());
134 
135  auto xq = make_data (nq * d);
136 
137  for (int i = 0; i < total_size + window_size; i++) {
138 
139  printf ("doing step %d / %d\n", i, total_size + window_size);
140 
141  // update the index
142  window.step (i < total_size ? sub_indexes[i].get() : nullptr,
143  i >= window_size);
144  printf (" current n_slice = %d\n", window.n_slice);
145 
146  auto new_res = search_index (index.get(), xq.data());
147 
148  std::unique_ptr<Index> merged_index (
149  make_merged_index (trained_index.get(), sub_indexes, i));
150 
151  auto ref_res = search_index (merged_index.get(), xq.data ());
152 
153  EXPECT_EQ (ref_res.size(), new_res.size());
154 
155  EXPECT_EQ (ref_res, new_res);
156  }
157  return 0;
158 }
159 
160 
161 int test_sliding_invlists (const char *index_key) {
162 
163  std::unique_ptr<Index> trained_index = make_trained_index(index_key);
164 
165  // make the index slices
166  std::vector<std::unique_ptr<Index> > sub_indexes;
167 
168  make_index_slices (trained_index.get(), sub_indexes);
169 
170  // now slide over the windows
171  std::unique_ptr<Index> index (clone_index (trained_index.get()));
172  IndexIVF * index_ivf = ivflib::extract_index_ivf (index.get());
173 
174  auto xq = make_data (nq * d);
175 
176  for (int i = 0; i < total_size + window_size; i++) {
177 
178  printf ("doing step %d / %d\n", i, total_size + window_size);
179 
180  // update the index
181  std::vector<const InvertedLists*> ils;
182  for (int j = i - window_size + 1; j <= i; j++) {
183  if (j < 0 || j >= total_size) continue;
184  ils.push_back (ivflib::extract_index_ivf (
185  sub_indexes[j].get())->invlists);
186  }
187  if (ils.size() == 0) continue;
188 
190  new ConcatenatedInvertedLists (ils.size(), ils.data());
191 
192  // will be deleted by the index
193  index_ivf->replace_invlists (ci, true);
194 
195  printf (" nb invlists = %ld\n", ils.size());
196 
197  auto new_res = search_index (index.get(), xq.data());
198 
199  std::unique_ptr<Index> merged_index (
200  make_merged_index (trained_index.get(), sub_indexes, i));
201 
202  auto ref_res = search_index (merged_index.get(), xq.data ());
203 
204  EXPECT_EQ (ref_res.size(), new_res.size());
205 
206  size_t ndiff = 0;
207  for (size_t j = 0; j < ref_res.size(); j++) {
208  if (ref_res[j] != new_res[j])
209  ndiff++;
210  }
211  printf(" nb differences: %ld / %ld\n",
212  ndiff, ref_res.size());
213  EXPECT_EQ (ref_res, new_res);
214  }
215  return 0;
216 }
217 
218 
219 
220 
221 
222 /*************************************************************
223  * Test entry points
224  *************************************************************/
225 
226 TEST(SlidingWindow, IVFFlat) {
227  test_sliding_window ("IVF32,Flat");
228 }
229 
230 TEST(SlidingWindow, PCAIVFFlat) {
231  test_sliding_window ("PCA24,IVF32,Flat");
232 }
233 
234 TEST(SlidingInvlists, IVFFlat) {
235  test_sliding_invlists ("IVF32,Flat");
236 }
237 
238 TEST(SlidingInvlists, PCAIVFFlat) {
239  test_sliding_invlists ("PCA24,IVF32,Flat");
240 }
void train(idx_t n, const float *x) override
virtual void add_with_ids(idx_t n, const float *x, const long *xids)
Definition: Index.cpp:42
long idx_t
all indices are this type
Definition: Index.h:64
void replace_invlists(InvertedLists *il, bool own=false)
replace the inverted lists, old one is deallocated if own_invlists
Definition: IndexIVF.cpp:486
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
virtual void set_index_parameter(Index *index, const std::string &name, double val) const
set one of the parameters
Definition: AutoTune.cpp:452
Index * index_factory(int d, const char *description_in, MetricType metric)
Definition: AutoTune.cpp:722
virtual void merge_from(IndexIVF &other, idx_t add_id)
Definition: IndexIVF.cpp:472