Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/hoss/faiss/InvertedLists.cpp
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 // -*- c++ -*-
9 
10 #include "InvertedLists.h"
11 
12 #include <cstdio>
13 
14 #include "utils.h"
15 #include "FaissAssert.h"
16 
17 namespace faiss {
18 
19 using ScopedIds = InvertedLists::ScopedIds;
20 using ScopedCodes = InvertedLists::ScopedCodes;
21 
22 
23 /*****************************************
24  * InvertedLists implementation
25  ******************************************/
26 
27 InvertedLists::InvertedLists (size_t nlist, size_t code_size):
28  nlist (nlist), code_size (code_size)
29 {
30 }
31 
32 InvertedLists::~InvertedLists ()
33 {}
34 
35 InvertedLists::idx_t InvertedLists::get_single_id (
36  size_t list_no, size_t offset) const
37 {
38  assert (offset < list_size (list_no));
39  return get_ids(list_no)[offset];
40 }
41 
42 
43 void InvertedLists::release_codes (size_t, const uint8_t *) const
44 {}
45 
46 void InvertedLists::release_ids (size_t, const idx_t *) const
47 {}
48 
49 void InvertedLists::prefetch_lists (const idx_t *, int) const
50 {}
51 
53  size_t list_no, size_t offset) const
54 {
55  assert (offset < list_size (list_no));
56  return get_codes(list_no) + offset * code_size;
57 }
58 
59 size_t InvertedLists::add_entry (size_t list_no, idx_t theid,
60  const uint8_t *code)
61 {
62  return add_entries (list_no, 1, &theid, code);
63 }
64 
65 void InvertedLists::update_entry (size_t list_no, size_t offset,
66  idx_t id, const uint8_t *code)
67 {
68  update_entries (list_no, offset, 1, &id, code);
69 }
70 
71 void InvertedLists::reset () {
72  for (size_t i = 0; i < nlist; i++) {
73  resize (i, 0);
74  }
75 }
76 
77 void InvertedLists::merge_from (InvertedLists *oivf, size_t add_id) {
78 
79 #pragma omp parallel for
80  for (idx_t i = 0; i < nlist; i++) {
81  size_t list_size = oivf->list_size (i);
82  ScopedIds ids (oivf, i);
83  if (add_id == 0) {
84  add_entries (i, list_size, ids.get (),
85  ScopedCodes (oivf, i).get());
86  } else {
87  std::vector <idx_t> new_ids (list_size);
88 
89  for (size_t j = 0; j < list_size; j++) {
90  new_ids [j] = ids[j] + add_id;
91  }
92  add_entries (i, list_size, new_ids.data(),
93  ScopedCodes (oivf, i).get());
94  }
95  oivf->resize (i, 0);
96  }
97 }
98 
100  std::vector<int> hist(nlist);
101 
102  for (size_t i = 0; i < nlist; i++) {
103  hist[i] = list_size(i);
104  }
105 
106  return faiss::imbalance_factor(nlist, hist.data());
107 }
108 
110  std::vector<int> sizes(40);
111  for (size_t i = 0; i < nlist; i++) {
112  for (size_t j = 0; j < sizes.size(); j++) {
113  if ((list_size(i) >> j) == 0) {
114  sizes[j]++;
115  break;
116  }
117  }
118  }
119  for (size_t i = 0; i < sizes.size(); i++) {
120  if (sizes[i]) {
121  printf("list size in < %d: %d instances\n", 1 << i, sizes[i]);
122  }
123  }
124 }
125 
127  size_t tot = 0;
128  for (size_t i = 0; i < nlist; i++) {
129  tot += list_size(i);
130  }
131  return tot;
132 }
133 
134 /*****************************************
135  * ArrayInvertedLists implementation
136  ******************************************/
137 
138 ArrayInvertedLists::ArrayInvertedLists (size_t nlist, size_t code_size):
139  InvertedLists (nlist, code_size)
140 {
141  ids.resize (nlist);
142  codes.resize (nlist);
143 }
144 
145 size_t ArrayInvertedLists::add_entries (
146  size_t list_no, size_t n_entry,
147  const idx_t* ids_in, const uint8_t *code)
148 {
149  if (n_entry == 0) return 0;
150  assert (list_no < nlist);
151  size_t o = ids [list_no].size();
152  ids [list_no].resize (o + n_entry);
153  memcpy (&ids[list_no][o], ids_in, sizeof (ids_in[0]) * n_entry);
154  codes [list_no].resize ((o + n_entry) * code_size);
155  memcpy (&codes[list_no][o * code_size], code, code_size * n_entry);
156  return o;
157 }
158 
159 size_t ArrayInvertedLists::list_size(size_t list_no) const
160 {
161  assert (list_no < nlist);
162  return ids[list_no].size();
163 }
164 
165 const uint8_t * ArrayInvertedLists::get_codes (size_t list_no) const
166 {
167  assert (list_no < nlist);
168  return codes[list_no].data();
169 }
170 
171 
172 const InvertedLists::idx_t * ArrayInvertedLists::get_ids (size_t list_no) const
173 {
174  assert (list_no < nlist);
175  return ids[list_no].data();
176 }
177 
178 void ArrayInvertedLists::resize (size_t list_no, size_t new_size)
179 {
180  ids[list_no].resize (new_size);
181  codes[list_no].resize (new_size * code_size);
182 }
183 
184 void ArrayInvertedLists::update_entries (
185  size_t list_no, size_t offset, size_t n_entry,
186  const idx_t *ids_in, const uint8_t *codes_in)
187 {
188  assert (list_no < nlist);
189  assert (n_entry + offset <= ids[list_no].size());
190  memcpy (&ids[list_no][offset], ids_in, sizeof(ids_in[0]) * n_entry);
191  memcpy (&codes[list_no][offset * code_size], codes_in, code_size * n_entry);
192 }
193 
194 
195 ArrayInvertedLists::~ArrayInvertedLists ()
196 {}
197 
198 /*****************************************************************
199  * Meta-inverted list implementations
200  *****************************************************************/
201 
202 
203 size_t ReadOnlyInvertedLists::add_entries (
204  size_t , size_t ,
205  const idx_t* , const uint8_t *)
206 {
207  FAISS_THROW_MSG ("not implemented");
208 }
209 
210 void ReadOnlyInvertedLists::update_entries (size_t, size_t , size_t ,
211  const idx_t *, const uint8_t *)
212 {
213  FAISS_THROW_MSG ("not implemented");
214 }
215 
216 void ReadOnlyInvertedLists::resize (size_t , size_t )
217 {
218  FAISS_THROW_MSG ("not implemented");
219 }
220 
221 
222 
223 /*****************************************
224  * HStackInvertedLists implementation
225  ******************************************/
226 
228  int nil, const InvertedLists **ils_in):
229  ReadOnlyInvertedLists (nil > 0 ? ils_in[0]->nlist : 0,
230  nil > 0 ? ils_in[0]->code_size : 0)
231 {
232  FAISS_THROW_IF_NOT (nil > 0);
233  for (int i = 0; i < nil; i++) {
234  ils.push_back (ils_in[i]);
235  FAISS_THROW_IF_NOT (ils_in[i]->code_size == code_size &&
236  ils_in[i]->nlist == nlist);
237  }
238 }
239 
240 size_t HStackInvertedLists::list_size(size_t list_no) const
241 {
242  size_t sz = 0;
243  for (int i = 0; i < ils.size(); i++) {
244  const InvertedLists *il = ils[i];
245  sz += il->list_size (list_no);
246  }
247  return sz;
248 }
249 
250 const uint8_t * HStackInvertedLists::get_codes (size_t list_no) const
251 {
252  uint8_t *codes = new uint8_t [code_size * list_size(list_no)], *c = codes;
253 
254  for (int i = 0; i < ils.size(); i++) {
255  const InvertedLists *il = ils[i];
256  size_t sz = il->list_size(list_no) * code_size;
257  if (sz > 0) {
258  memcpy (c, ScopedCodes (il, list_no).get(), sz);
259  c += sz;
260  }
261  }
262  return codes;
263 }
264 
266  size_t list_no, size_t offset) const
267 {
268  for (int i = 0; i < ils.size(); i++) {
269  const InvertedLists *il = ils[i];
270  size_t sz = il->list_size (list_no);
271  if (offset < sz) {
272  // here we have to copy the code, otherwise it will crash at dealloc
273  uint8_t * code = new uint8_t [code_size];
274  memcpy (code, ScopedCodes (il, list_no, offset).get(), code_size);
275  return code;
276  }
277  offset -= sz;
278  }
279  FAISS_THROW_FMT ("offset %ld unknown", offset);
280 }
281 
282 
283 void HStackInvertedLists::release_codes (size_t, const uint8_t *codes) const {
284  delete [] codes;
285 }
286 
287 const Index::idx_t * HStackInvertedLists::get_ids (size_t list_no) const
288 {
289  idx_t *ids = new idx_t [list_size(list_no)], *c = ids;
290 
291  for (int i = 0; i < ils.size(); i++) {
292  const InvertedLists *il = ils[i];
293  size_t sz = il->list_size(list_no);
294  if (sz > 0) {
295  memcpy (c, ScopedIds (il, list_no).get(), sz * sizeof(idx_t));
296  c += sz;
297  }
298  }
299  return ids;
300 }
301 
303  size_t list_no, size_t offset) const
304 {
305 
306  for (int i = 0; i < ils.size(); i++) {
307  const InvertedLists *il = ils[i];
308  size_t sz = il->list_size (list_no);
309  if (offset < sz) {
310  return il->get_single_id (list_no, offset);
311  }
312  offset -= sz;
313  }
314  FAISS_THROW_FMT ("offset %ld unknown", offset);
315 }
316 
317 
318 void HStackInvertedLists::release_ids (size_t, const idx_t *ids) const {
319  delete [] ids;
320 }
321 
322 void HStackInvertedLists::prefetch_lists (const idx_t *list_nos, int nlist) const
323 {
324  for (int i = 0; i < ils.size(); i++) {
325  const InvertedLists *il = ils[i];
326  il->prefetch_lists (list_nos, nlist);
327  }
328 }
329 
330 /*****************************************
331  * SliceInvertedLists implementation
332  ******************************************/
333 
334 
335 namespace {
336 
337  using idx_t = InvertedLists::idx_t;
338 
339  idx_t translate_list_no (const SliceInvertedLists *sil,
340  idx_t list_no) {
341  FAISS_THROW_IF_NOT (list_no >= 0 && list_no < sil->nlist);
342  return list_no + sil->i0;
343  }
344 
345 };
346 
347 
348 
349 SliceInvertedLists::SliceInvertedLists (
350  const InvertedLists *il, idx_t i0, idx_t i1):
351  ReadOnlyInvertedLists (i1 - i0, il->code_size),
352  il (il), i0(i0), i1(i1)
353 {
354 
355 }
356 
357 size_t SliceInvertedLists::list_size(size_t list_no) const
358 {
359  return il->list_size (translate_list_no (this, list_no));
360 }
361 
362 const uint8_t * SliceInvertedLists::get_codes (size_t list_no) const
363 {
364  return il->get_codes (translate_list_no (this, list_no));
365 }
366 
368  size_t list_no, size_t offset) const
369 {
370  return il->get_single_code (translate_list_no (this, list_no), offset);
371 }
372 
373 
375  size_t list_no, const uint8_t *codes) const {
376  return il->release_codes (translate_list_no (this, list_no), codes);
377 }
378 
379 const Index::idx_t * SliceInvertedLists::get_ids (size_t list_no) const
380 {
381  return il->get_ids (translate_list_no (this, list_no));
382 }
383 
385  size_t list_no, size_t offset) const
386 {
387  return il->get_single_id (translate_list_no (this, list_no), offset);
388 }
389 
390 
391 void SliceInvertedLists::release_ids (size_t list_no, const idx_t *ids) const {
392  return il->release_ids (translate_list_no (this, list_no), ids);
393 }
394 
395 void SliceInvertedLists::prefetch_lists (const idx_t *list_nos, int nlist) const
396 {
397  std::vector<idx_t> translated_list_nos;
398  for (int j = 0; j < nlist; j++) {
399  idx_t list_no = list_nos[j];
400  if (list_no < 0) continue;
401  translated_list_nos.push_back (translate_list_no (this, list_no));
402  }
403  il->prefetch_lists (translated_list_nos.data(),
404  translated_list_nos.size());
405 }
406 
407 
408 /*****************************************
409  * VStackInvertedLists implementation
410  ******************************************/
411 
412 namespace {
413 
414  using idx_t = InvertedLists::idx_t;
415 
416  // find the invlist this number belongs to
417  int translate_list_no (const VStackInvertedLists *vil,
418  idx_t list_no) {
419  FAISS_THROW_IF_NOT (list_no >= 0 && list_no < vil->nlist);
420  int i0 = 0, i1 = vil->ils.size();
421  const idx_t *cumsz = vil->cumsz.data();
422  while (i0 + 1 < i1) {
423  int imed = (i0 + i1) / 2;
424  if (list_no >= cumsz[imed]) {
425  i0 = imed;
426  } else {
427  i1 = imed;
428  }
429  }
430  assert(list_no >= cumsz[i0] && list_no < cumsz[i0 + 1]);
431  return i0;
432  }
433 
434  idx_t sum_il_sizes (int nil, const InvertedLists **ils_in) {
435  idx_t tot = 0;
436  for (int i = 0; i < nil; i++) {
437  tot += ils_in[i]->nlist;
438  }
439  return tot;
440  }
441 
442 };
443 
444 
445 
447  int nil, const InvertedLists **ils_in):
448  ReadOnlyInvertedLists (sum_il_sizes(nil, ils_in),
449  nil > 0 ? ils_in[0]->code_size : 0)
450 {
451  FAISS_THROW_IF_NOT (nil > 0);
452  cumsz.resize (nil + 1);
453  for (int i = 0; i < nil; i++) {
454  ils.push_back (ils_in[i]);
455  FAISS_THROW_IF_NOT (ils_in[i]->code_size == code_size);
456  cumsz[i + 1] = cumsz[i] + ils_in[i]->nlist;
457  }
458 }
459 
460 size_t VStackInvertedLists::list_size(size_t list_no) const
461 {
462  int i = translate_list_no (this, list_no);
463  list_no -= cumsz[i];
464  return ils[i]->list_size (list_no);
465 }
466 
467 const uint8_t * VStackInvertedLists::get_codes (size_t list_no) const
468 {
469  int i = translate_list_no (this, list_no);
470  list_no -= cumsz[i];
471  return ils[i]->get_codes (list_no);
472 }
473 
475  size_t list_no, size_t offset) const
476 {
477  int i = translate_list_no (this, list_no);
478  list_no -= cumsz[i];
479  return ils[i]->get_single_code (list_no, offset);
480 }
481 
482 
484  size_t list_no, const uint8_t *codes) const {
485  int i = translate_list_no (this, list_no);
486  list_no -= cumsz[i];
487  return ils[i]->release_codes (list_no, codes);
488 }
489 
490 const Index::idx_t * VStackInvertedLists::get_ids (size_t list_no) const
491 {
492  int i = translate_list_no (this, list_no);
493  list_no -= cumsz[i];
494  return ils[i]->get_ids (list_no);
495 }
496 
498  size_t list_no, size_t offset) const
499 {
500  int i = translate_list_no (this, list_no);
501  list_no -= cumsz[i];
502  return ils[i]->get_single_id (list_no, offset);
503 }
504 
505 
506 void VStackInvertedLists::release_ids (size_t list_no, const idx_t *ids) const {
507  int i = translate_list_no (this, list_no);
508  list_no -= cumsz[i];
509  return ils[i]->release_ids (list_no, ids);
510 }
511 
513  const idx_t *list_nos, int nlist) const
514 {
515  std::vector<int> ilno (nlist, -1);
516  std::vector<int> n_per_il (ils.size(), 0);
517  for (int j = 0; j < nlist; j++) {
518  idx_t list_no = list_nos[j];
519  if (list_no < 0) continue;
520  int i = ilno[j] = translate_list_no (this, list_no);
521  n_per_il[i]++;
522  }
523  std::vector<int> cum_n_per_il (ils.size() + 1, 0);
524  for (int j = 0; j < ils.size(); j++) {
525  cum_n_per_il[j + 1] = cum_n_per_il[j] + n_per_il[j];
526  }
527  std::vector<idx_t> sorted_list_nos (cum_n_per_il.back());
528  for (int j = 0; j < nlist; j++) {
529  idx_t list_no = list_nos[j];
530  if (list_no < 0) continue;
531  int i = ilno[j];
532  list_no -= cumsz[i];
533  sorted_list_nos[cum_n_per_il[i]++] = list_no;
534  }
535 
536  int i0 = 0;
537  for (int j = 0; j < ils.size(); j++) {
538  int i1 = i0 + n_per_il[j];
539  if (i1 > i0) {
540  ils[j]->prefetch_lists (sorted_list_nos.data() + i0,
541  i1 - i0);
542  }
543  i0 = i1;
544  }
545 }
546 
547 
548 
549 /*****************************************
550  * MaskedInvertedLists implementation
551  ******************************************/
552 
553 
554 MaskedInvertedLists::MaskedInvertedLists (const InvertedLists *il0,
555  const InvertedLists *il1):
556  ReadOnlyInvertedLists (il0->nlist, il0->code_size),
557  il0 (il0), il1 (il1)
558 {
559  FAISS_THROW_IF_NOT (il1->nlist == nlist);
560  FAISS_THROW_IF_NOT (il1->code_size == code_size);
561 }
562 
563 size_t MaskedInvertedLists::list_size(size_t list_no) const
564 {
565  size_t sz = il0->list_size(list_no);
566  return sz ? sz : il1->list_size(list_no);
567 }
568 
569 const uint8_t * MaskedInvertedLists::get_codes (size_t list_no) const
570 {
571  size_t sz = il0->list_size(list_no);
572  return (sz ? il0 : il1)->get_codes(list_no);
573 }
574 
575 const idx_t * MaskedInvertedLists::get_ids (size_t list_no) const
576 {
577  size_t sz = il0->list_size (list_no);
578  return (sz ? il0 : il1)->get_ids (list_no);
579 }
580 
582  size_t list_no, const uint8_t *codes) const
583 {
584  size_t sz = il0->list_size (list_no);
585  (sz ? il0 : il1)->release_codes (list_no, codes);
586 }
587 
588 void MaskedInvertedLists::release_ids (size_t list_no, const idx_t *ids) const
589 {
590  size_t sz = il0->list_size (list_no);
591  (sz ? il0 : il1)->release_ids (list_no, ids);
592 }
593 
594 idx_t MaskedInvertedLists::get_single_id (size_t list_no, size_t offset) const
595 {
596  size_t sz = il0->list_size (list_no);
597  return (sz ? il0 : il1)->get_single_id (list_no, offset);
598 }
599 
601  size_t list_no, size_t offset) const
602 {
603  size_t sz = il0->list_size (list_no);
604  return (sz ? il0 : il1)->get_single_code (list_no, offset);
605 }
606 
608  const idx_t *list_nos, int nlist) const
609 {
610  std::vector<idx_t> list0, list1;
611  for (int i = 0; i < nlist; i++) {
612  idx_t list_no = list_nos[i];
613  if (list_no < 0) continue;
614  size_t sz = il0->list_size(list_no);
615  (sz ? list0 : list1).push_back (list_no);
616  }
617  il0->prefetch_lists (list0.data(), list0.size());
618  il1->prefetch_lists (list1.data(), list1.size());
619 }
620 
621 
622 
623 } // namespace faiss
const uint8_t * get_codes(size_t list_no) const override
void prefetch_lists(const idx_t *list_nos, int nlist) const override
idx_t get_single_id(size_t list_no, size_t offset) const override
void release_ids(size_t list_no, const idx_t *ids) const override
release ids returned by get_ids
void release_ids(size_t list_no, const idx_t *ids) const override
release ids returned by get_ids
const idx_t * get_ids(size_t list_no) const override
virtual const idx_t * get_ids(size_t list_no) const =0
double imbalance_factor() const
1= perfectly balanced, &gt;1: imbalanced
const idx_t * get_ids(size_t list_no) const override
idx_t get_single_id(size_t list_no, size_t offset) const override
vertical slice of indexes in another InvertedLists
void prefetch_lists(const idx_t *list_nos, int nlist) const override
virtual size_t list_size(size_t list_no) const =0
get the size of a list
double imbalance_factor(int n, int k, const long *assign)
a balanced assignment has a IF of 1
Definition: utils.cpp:1304
size_t list_size(size_t list_no) const override
get the size of a list
const uint8_t * get_single_code(size_t list_no, size_t offset) const override
const uint8_t * get_single_code(size_t list_no, size_t offset) const override
void merge_from(InvertedLists *oivf, size_t add_id)
move all entries from oivf (empty on output)
virtual idx_t get_single_id(size_t list_no, size_t offset) const
long idx_t
all indices are this type
Definition: Index.h:62
size_t code_size
code size per vector in bytes
Definition: InvertedLists.h:35
virtual const uint8_t * get_single_code(size_t list_no, size_t offset) const
VStackInvertedLists(int nil, const InvertedLists **ils)
build InvertedLists by concatenating nil of them
size_t compute_ntotal() const
sum up list sizes
const uint8_t * get_codes(size_t list_no) const override
virtual size_t add_entry(size_t list_no, idx_t theid, const uint8_t *code)
add one entry to an inverted list
const uint8_t * get_codes(size_t list_no) const override
size_t list_size(size_t list_no) const override
get the size of a list
void prefetch_lists(const idx_t *list_nos, int nlist) const override
size_t list_size(size_t list_no) const override
get the size of a list
void prefetch_lists(const idx_t *list_nos, int nlist) const override
HStackInvertedLists(int nil, const InvertedLists **ils)
build InvertedLists by concatenating nil of them
const uint8_t * get_single_code(size_t list_no, size_t offset) const override
idx_t get_single_id(size_t list_no, size_t offset) const override
const uint8_t * get_codes(size_t list_no) const override
const idx_t * get_ids(size_t list_no) const override
size_t list_size(size_t list_no) const override
get the size of a list
size_t nlist
number of possible key values
Definition: InvertedLists.h:34
const uint8_t * get_single_code(size_t list_no, size_t offset) const override
std::vector< std::vector< idx_t > > ids
Inverted lists for indexes.
void release_ids(size_t list_no, const idx_t *ids) const override
release ids returned by get_ids
const idx_t * get_ids(size_t list_no) const override
void print_stats() const
display some stats about the inverted lists
virtual const uint8_t * get_codes(size_t list_no) const =0
virtual void prefetch_lists(const idx_t *list_nos, int nlist) const
virtual void release_ids(size_t list_no, const idx_t *ids) const
release ids returned by get_ids
void release_codes(size_t list_no, const uint8_t *codes) const override
release codes returned by get_codes (default implementation is nop
const uint8_t * get_codes(size_t list_no) const override
void release_ids(size_t list_no, const idx_t *ids) const override
release ids returned by get_ids
void release_codes(size_t list_no, const uint8_t *codes) const override
release codes returned by get_codes (default implementation is nop
size_t list_size(size_t list_no) const override
get the size of a list
virtual void release_codes(size_t list_no, const uint8_t *codes) const
release codes returned by get_codes (default implementation is nop
idx_t get_single_id(size_t list_no, size_t offset) const override
void release_codes(size_t list_no, const uint8_t *codes) const override
release codes returned by get_codes (default implementation is nop
void release_codes(size_t list_no, const uint8_t *codes) const override
release codes returned by get_codes (default implementation is nop
const idx_t * get_ids(size_t list_no) const override