Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/hoss/faiss/IVFlib.cpp
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 // -*- c++ -*-
9 
10 #include "IVFlib.h"
11 
12 #include <memory>
13 
14 #include "VectorTransform.h"
15 #include "FaissAssert.h"
16 
17 
18 
19 namespace faiss { namespace ivflib {
20 
21 
22 void check_compatible_for_merge (const Index * index0,
23  const Index * index1)
24 {
25 
26  const faiss::IndexPreTransform *pt0 =
27  dynamic_cast<const faiss::IndexPreTransform *>(index0);
28 
29  if (pt0) {
30  const faiss::IndexPreTransform *pt1 =
31  dynamic_cast<const faiss::IndexPreTransform *>(index1);
32  FAISS_THROW_IF_NOT_MSG (pt1, "both indexes should be pretransforms");
33 
34  FAISS_THROW_IF_NOT (pt0->chain.size() == pt1->chain.size());
35  for (int i = 0; i < pt0->chain.size(); i++) {
36  FAISS_THROW_IF_NOT (typeid(pt0->chain[i]) == typeid(pt1->chain[i]));
37  }
38 
39  index0 = pt0->index;
40  index1 = pt1->index;
41  }
42  FAISS_THROW_IF_NOT (typeid(index0) == typeid(index1));
43  FAISS_THROW_IF_NOT (index0->d == index1->d &&
44  index0->metric_type == index1->metric_type);
45 
46  const faiss::IndexIVF *ivf0 = dynamic_cast<const faiss::IndexIVF *>(index0);
47  if (ivf0) {
48  const faiss::IndexIVF *ivf1 =
49  dynamic_cast<const faiss::IndexIVF *>(index1);
50  FAISS_THROW_IF_NOT (ivf1);
51 
52  ivf0->check_compatible_for_merge (*ivf1);
53  }
54 
55  // TODO: check as thoroughfully for other index types
56 
57 }
58 
59 const IndexIVF * extract_index_ivf (const Index * index)
60 {
61  if (auto *pt =
62  dynamic_cast<const IndexPreTransform *>(index)) {
63  index = pt->index;
64  }
65 
66  auto *ivf = dynamic_cast<const IndexIVF *>(index);
67 
68  FAISS_THROW_IF_NOT (ivf);
69 
70  return ivf;
71 }
72 
73 IndexIVF * extract_index_ivf (Index * index) {
74  return const_cast<IndexIVF*> (extract_index_ivf ((const Index*)(index)));
75 }
76 
77 void merge_into(faiss::Index *index0, faiss::Index *index1, bool shift_ids) {
78 
79  check_compatible_for_merge (index0, index1);
80  IndexIVF * ivf0 = extract_index_ivf (index0);
81  IndexIVF * ivf1 = extract_index_ivf (index1);
82 
83  ivf0->merge_from (*ivf1, shift_ids ? ivf0->ntotal : 0);
84 
85  // useful for IndexPreTransform
86  index0->ntotal = ivf0->ntotal;
87  index1->ntotal = ivf1->ntotal;
88 }
89 
90 
91 
92 void search_centroid(faiss::Index *index,
93  const float* x, int n,
94  idx_t* centroid_ids)
95 {
96  std::unique_ptr<float[]> del;
97  if (auto index_pre = dynamic_cast<faiss::IndexPreTransform*>(index)) {
98  x = index_pre->apply_chain(n, x);
99  del.reset((float*)x);
100  index = index_pre->index;
101  }
102  faiss::IndexIVF* index_ivf = dynamic_cast<faiss::IndexIVF*>(index);
103  assert(index_ivf);
104  index_ivf->quantizer->assign(n, x, centroid_ids);
105 }
106 
107 
108 
109 void search_and_return_centroids(faiss::Index *index,
110  size_t n,
111  const float* xin,
112  long k,
113  float *distances,
114  idx_t* labels,
115  idx_t* query_centroid_ids,
116  idx_t* result_centroid_ids)
117 {
118  const float *x = xin;
119  std::unique_ptr<float []> del;
120  if (auto index_pre = dynamic_cast<faiss::IndexPreTransform*>(index)) {
121  x = index_pre->apply_chain(n, x);
122  del.reset((float*)x);
123  index = index_pre->index;
124  }
125  faiss::IndexIVF* index_ivf = dynamic_cast<faiss::IndexIVF*>(index);
126  assert(index_ivf);
127 
128  size_t nprobe = index_ivf->nprobe;
129  std::vector<idx_t> cent_nos (n * nprobe);
130  std::vector<float> cent_dis (n * nprobe);
131  index_ivf->quantizer->search(
132  n, x, nprobe, cent_dis.data(), cent_nos.data());
133 
134  if (query_centroid_ids) {
135  for (size_t i = 0; i < n; i++)
136  query_centroid_ids[i] = cent_nos[i * nprobe];
137  }
138 
139  index_ivf->search_preassigned (n, x, k,
140  cent_nos.data(), cent_dis.data(),
141  distances, labels, true);
142 
143  for (size_t i = 0; i < n * k; i++) {
144  idx_t label = labels[i];
145  if (label < 0) {
146  if (result_centroid_ids)
147  result_centroid_ids[i] = -1;
148  } else {
149  long list_no = label >> 32;
150  long list_index = label & 0xffffffff;
151  if (result_centroid_ids)
152  result_centroid_ids[i] = list_no;
153  labels[i] = index_ivf->invlists->get_single_id(list_no, list_index);
154  }
155  }
156 }
157 
158 
160  n_slice = 0;
161  IndexIVF* index_ivf = const_cast<IndexIVF*>(extract_index_ivf (index));
162  ils = dynamic_cast<ArrayInvertedLists *> (index_ivf->invlists);
163  nlist = ils->nlist;
164  FAISS_THROW_IF_NOT_MSG (ils,
165  "only supports indexes with ArrayInvertedLists");
166  sizes.resize(nlist);
167 }
168 
169 template<class T>
170 static void shift_and_add (std::vector<T> & dst,
171  size_t remove,
172  const std::vector<T> & src)
173 {
174  if (remove > 0)
175  memmove (dst.data(), dst.data() + remove,
176  (dst.size() - remove) * sizeof (T));
177  size_t insert_point = dst.size() - remove;
178  dst.resize (insert_point + src.size());
179  memcpy (dst.data() + insert_point, src.data (), src.size() * sizeof(T));
180 }
181 
182 template<class T>
183 static void remove_from_begin (std::vector<T> & v,
184  size_t remove)
185 {
186  if (remove > 0)
187  v.erase (v.begin(), v.begin() + remove);
188 }
189 
190 void SlidingIndexWindow::step(const Index *sub_index, bool remove_oldest) {
191 
192  FAISS_THROW_IF_NOT_MSG (!remove_oldest || n_slice > 0,
193  "cannot remove slice: there is none");
194 
195  const ArrayInvertedLists *ils2 = nullptr;
196  if(sub_index) {
197  check_compatible_for_merge (index, sub_index);
198  ils2 = dynamic_cast<const ArrayInvertedLists*>(
199  extract_index_ivf (sub_index)->invlists);
200  FAISS_THROW_IF_NOT_MSG (ils2, "supports only ArrayInvertedLists");
201  }
202  IndexIVF *index_ivf = extract_index_ivf (index);
203 
204  if (remove_oldest && ils2) {
205  for (int i = 0; i < nlist; i++) {
206  std::vector<size_t> & sizesi = sizes[i];
207  size_t amount_to_remove = sizesi[0];
208  index_ivf->ntotal += ils2->ids[i].size() - amount_to_remove;
209 
210  shift_and_add (ils->ids[i], amount_to_remove, ils2->ids[i]);
211  shift_and_add (ils->codes[i], amount_to_remove * ils->code_size,
212  ils2->codes[i]);
213  for (int j = 0; j + 1 < n_slice; j++) {
214  sizesi[j] = sizesi[j + 1] - amount_to_remove;
215  }
216  sizesi[n_slice - 1] = ils->ids[i].size();
217  }
218  } else if (ils2) {
219  for (int i = 0; i < nlist; i++) {
220  index_ivf->ntotal += ils2->ids[i].size();
221  shift_and_add (ils->ids[i], 0, ils2->ids[i]);
222  shift_and_add (ils->codes[i], 0, ils2->codes[i]);
223  sizes[i].push_back(ils->ids[i].size());
224  }
225  n_slice++;
226  } else if (remove_oldest) {
227  for (int i = 0; i < nlist; i++) {
228  size_t amount_to_remove = sizes[i][0];
229  index_ivf->ntotal -= amount_to_remove;
230  remove_from_begin (ils->ids[i], amount_to_remove);
231  remove_from_begin (ils->codes[i],
232  amount_to_remove * ils->code_size);
233  for (int j = 0; j + 1 < n_slice; j++) {
234  sizes[i][j] = sizes[i][j + 1] - amount_to_remove;
235  }
236  sizes[i].pop_back ();
237  }
238  n_slice--;
239  } else {
240  FAISS_THROW_MSG ("nothing to do???");
241  }
242  index->ntotal = index_ivf->ntotal;
243 }
244 
245 
246 
247 // Get a subset of inverted lists [i0, i1). Works on IndexIVF's and
248 // IndexIVF's embedded in a IndexPreTransform
249 
251 get_invlist_range (const Index *index, long i0, long i1)
252 {
253  const IndexIVF *ivf = extract_index_ivf (index);
254 
255  FAISS_THROW_IF_NOT (0 <= i0 && i0 <= i1 && i1 <= ivf->nlist);
256 
257  const InvertedLists *src = ivf->invlists;
258 
259  ArrayInvertedLists * il = new ArrayInvertedLists(i1 - i0, src->code_size);
260 
261  for (long i = i0; i < i1; i++) {
262  il->add_entries(i - i0, src->list_size(i),
263  InvertedLists::ScopedIds (src, i).get(),
264  InvertedLists::ScopedCodes (src, i).get());
265  }
266  return il;
267 }
268 
269 
270 
271 void set_invlist_range (Index *index, long i0, long i1,
272  ArrayInvertedLists * src)
273 {
274  IndexIVF *ivf = extract_index_ivf (index);
275 
276  FAISS_THROW_IF_NOT (0 <= i0 && i0 <= i1 && i1 <= ivf->nlist);
277 
278  ArrayInvertedLists *dst = dynamic_cast<ArrayInvertedLists *>(ivf->invlists);
279  FAISS_THROW_IF_NOT_MSG (dst, "only ArrayInvertedLists supported");
280  FAISS_THROW_IF_NOT (src->nlist == i1 - i0 &&
281  dst->code_size == src->code_size);
282 
283  size_t ntotal = index->ntotal;
284  for (long i = i0 ; i < i1; i++) {
285  ntotal -= dst->list_size (i);
286  ntotal += src->list_size (i - i0);
287  std::swap (src->codes[i - i0], dst->codes[i]);
288  std::swap (src->ids[i - i0], dst->ids[i]);
289  }
290  ivf->ntotal = index->ntotal = ntotal;
291 }
292 
293 
294 void search_with_parameters (const Index *index,
295  idx_t n, const float *x, idx_t k,
296  float *distances, idx_t *labels,
297  IVFSearchParameters *params)
298 {
299  FAISS_THROW_IF_NOT (params);
300  const float *prev_x = x;
301  ScopeDeleter<float> del;
302 
303  if (auto ip = dynamic_cast<const IndexPreTransform *> (index)) {
304  x = ip->apply_chain (n, x);
305  if (x != prev_x) {
306  del.set(x);
307  }
308  index = ip->index;
309  }
310 
311  std::vector<idx_t> Iq(params->nprobe * n);
312  std::vector<float> Dq(params->nprobe * n);
313 
314  const IndexIVF *index_ivf = dynamic_cast<const IndexIVF *>(index);
315  FAISS_THROW_IF_NOT (index_ivf);
316 
317  index_ivf->quantizer->search(n, x, params->nprobe,
318  Dq.data(), Iq.data());
319 
320  index_ivf->search_preassigned(n, x, k, Iq.data(), Dq.data(),
321  distances, labels,
322  false, params);
323 }
324 
325 
326 
327 } } // namespace faiss::ivflib
Index * index
! chain of tranforms
virtual void search_preassigned(idx_t n, const float *x, idx_t k, const idx_t *assign, const float *centroid_dis, float *distances, idx_t *labels, bool store_pairs, const IVFSearchParameters *params=nullptr) const
Definition: IndexIVF.cpp:250
simple (default) implementation as an array of inverted lists
void check_compatible_for_merge(const IndexIVF &other) const
Definition: IndexIVF.cpp:710
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:97
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:34
virtual size_t list_size(size_t list_no) const =0
get the size of a list
size_t nlist
same as index-&gt;nlist
Definition: IVFlib.h:94
virtual idx_t get_single_id(size_t list_no, size_t offset) const
size_t code_size
code size per vector in bytes
Definition: InvertedLists.h:35
ArrayInvertedLists * ils
InvertedLists of index.
Definition: IVFlib.h:88
int n_slice
number of slices currently in index
Definition: IVFlib.h:91
std::vector< std::vector< size_t > > sizes
cumulative list sizes at each slice
Definition: IVFlib.h:97
SlidingIndexWindow(Index *index)
index should be initially empty and trained
Definition: IVFlib.cpp:159
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
void step(const Index *sub_index, bool remove_oldest)
Definition: IVFlib.cpp:190
size_t nlist
number of possible key values
Definition: InvertedLists.h:34
InvertedLists * invlists
Acess to the actual data.
Definition: IndexIVF.h:92
std::vector< std::vector< idx_t > > ids
Inverted lists for indexes.
Index * quantizer
quantizer that maps vectors to inverted lists
Definition: IndexIVF.h:32