10 #include "InvertedLists.h"
15 #include "FaissAssert.h"
19 using ScopedIds = InvertedLists::ScopedIds;
20 using ScopedCodes = InvertedLists::ScopedCodes;
27 InvertedLists::InvertedLists (
size_t nlist,
size_t code_size):
28 nlist (nlist), code_size (code_size)
32 InvertedLists::~InvertedLists ()
36 size_t list_no,
size_t offset)
const
39 return get_ids(list_no)[offset];
53 size_t list_no,
size_t offset)
const
62 return add_entries (list_no, 1, &theid, code);
65 void InvertedLists::update_entry (
size_t list_no,
size_t offset,
66 idx_t
id,
const uint8_t *code)
68 update_entries (list_no, offset, 1, &
id, code);
71 void InvertedLists::reset () {
72 for (
size_t i = 0; i <
nlist; i++) {
79 #pragma omp parallel for
80 for (idx_t i = 0; i <
nlist; i++) {
84 add_entries (i, list_size, ids.get (),
87 std::vector <idx_t> new_ids (list_size);
90 new_ids [j] = ids[j] + add_id;
92 add_entries (i, list_size, new_ids.data(),
100 std::vector<int> hist(
nlist);
102 for (
size_t i = 0; i <
nlist; i++) {
110 std::vector<int> sizes(40);
111 for (
size_t i = 0; i <
nlist; i++) {
112 for (
size_t j = 0; j < sizes.size(); j++) {
119 for (
size_t i = 0; i < sizes.size(); i++) {
121 printf(
"list size in < %d: %d instances\n", 1 << i, sizes[i]);
128 for (
size_t i = 0; i <
nlist; i++) {
138 ArrayInvertedLists::ArrayInvertedLists (
size_t nlist,
size_t code_size):
142 codes.resize (nlist);
145 size_t ArrayInvertedLists::add_entries (
146 size_t list_no,
size_t n_entry,
147 const idx_t* ids_in,
const uint8_t *code)
149 if (n_entry == 0)
return 0;
150 assert (list_no < nlist);
151 size_t o =
ids [list_no].size();
152 ids [list_no].resize (o + n_entry);
153 memcpy (&
ids[list_no][o], ids_in,
sizeof (ids_in[0]) * n_entry);
154 codes [list_no].resize ((o + n_entry) * code_size);
155 memcpy (&codes[list_no][o * code_size], code, code_size * n_entry);
161 assert (list_no < nlist);
162 return ids[list_no].size();
167 assert (list_no < nlist);
168 return codes[list_no].data();
174 assert (list_no < nlist);
175 return ids[list_no].data();
178 void ArrayInvertedLists::resize (
size_t list_no,
size_t new_size)
180 ids[list_no].resize (new_size);
181 codes[list_no].resize (new_size * code_size);
184 void ArrayInvertedLists::update_entries (
185 size_t list_no,
size_t offset,
size_t n_entry,
186 const idx_t *ids_in,
const uint8_t *codes_in)
188 assert (list_no < nlist);
189 assert (n_entry + offset <=
ids[list_no].size());
190 memcpy (&
ids[list_no][offset], ids_in,
sizeof(ids_in[0]) * n_entry);
191 memcpy (&codes[list_no][offset * code_size], codes_in, code_size * n_entry);
195 ArrayInvertedLists::~ArrayInvertedLists ()
203 size_t ReadOnlyInvertedLists::add_entries (
205 const idx_t* ,
const uint8_t *)
207 FAISS_THROW_MSG (
"not implemented");
210 void ReadOnlyInvertedLists::update_entries (
size_t,
size_t ,
size_t ,
211 const idx_t *,
const uint8_t *)
213 FAISS_THROW_MSG (
"not implemented");
216 void ReadOnlyInvertedLists::resize (
size_t ,
size_t )
218 FAISS_THROW_MSG (
"not implemented");
230 nil > 0 ? ils_in[0]->code_size : 0)
232 FAISS_THROW_IF_NOT (nil > 0);
233 for (
int i = 0; i < nil; i++) {
234 ils.push_back (ils_in[i]);
235 FAISS_THROW_IF_NOT (ils_in[i]->code_size == code_size &&
236 ils_in[i]->nlist == nlist);
243 for (
int i = 0; i < ils.size(); i++) {
252 uint8_t *codes =
new uint8_t [code_size *
list_size(list_no)], *c = codes;
254 for (
int i = 0; i < ils.size(); i++) {
266 size_t list_no,
size_t offset)
const
268 for (
int i = 0; i < ils.size(); i++) {
273 uint8_t * code =
new uint8_t [
code_size];
274 memcpy (code,
ScopedCodes (il, list_no, offset).
get(), code_size);
279 FAISS_THROW_FMT (
"offset %ld unknown", offset);
289 idx_t *ids =
new idx_t [
list_size(list_no)], *c = ids;
291 for (
int i = 0; i < ils.size(); i++) {
295 memcpy (c,
ScopedIds (il, list_no).
get(), sz *
sizeof(idx_t));
303 size_t list_no,
size_t offset)
const
306 for (
int i = 0; i < ils.size(); i++) {
314 FAISS_THROW_FMT (
"offset %ld unknown", offset);
324 for (
int i = 0; i < ils.size(); i++) {
337 using idx_t = InvertedLists::idx_t;
341 FAISS_THROW_IF_NOT (list_no >= 0 && list_no < sil->nlist);
342 return list_no + sil->i0;
349 SliceInvertedLists::SliceInvertedLists (
350 const InvertedLists *il, idx_t i0, idx_t i1):
351 ReadOnlyInvertedLists (i1 - i0, il->code_size),
352 il (il), i0(i0), i1(i1)
359 return il->
list_size (translate_list_no (
this, list_no));
364 return il->
get_codes (translate_list_no (
this, list_no));
368 size_t list_no,
size_t offset)
const
370 return il->
get_single_code (translate_list_no (
this, list_no), offset);
375 size_t list_no,
const uint8_t *codes)
const {
376 return il->
release_codes (translate_list_no (
this, list_no), codes);
381 return il->
get_ids (translate_list_no (
this, list_no));
385 size_t list_no,
size_t offset)
const
387 return il->
get_single_id (translate_list_no (
this, list_no), offset);
392 return il->
release_ids (translate_list_no (
this, list_no), ids);
397 std::vector<idx_t> translated_list_nos;
398 for (
int j = 0; j <
nlist; j++) {
399 idx_t list_no = list_nos[j];
400 if (list_no < 0)
continue;
401 translated_list_nos.push_back (translate_list_no (
this, list_no));
404 translated_list_nos.size());
414 using idx_t = InvertedLists::idx_t;
419 FAISS_THROW_IF_NOT (list_no >= 0 && list_no < vil->nlist);
420 int i0 = 0, i1 = vil->ils.size();
421 const idx_t *cumsz = vil->cumsz.data();
422 while (i0 + 1 < i1) {
423 int imed = (i0 + i1) / 2;
424 if (list_no >= cumsz[imed]) {
430 assert(list_no >= cumsz[i0] && list_no < cumsz[i0 + 1]);
434 idx_t sum_il_sizes (
int nil,
const InvertedLists **ils_in) {
436 for (
int i = 0; i < nil; i++) {
437 tot += ils_in[i]->nlist;
449 nil > 0 ? ils_in[0]->code_size : 0)
451 FAISS_THROW_IF_NOT (nil > 0);
452 cumsz.resize (nil + 1);
453 for (
int i = 0; i < nil; i++) {
454 ils.push_back (ils_in[i]);
455 FAISS_THROW_IF_NOT (ils_in[i]->code_size == code_size);
456 cumsz[i + 1] = cumsz[i] + ils_in[i]->
nlist;
462 int i = translate_list_no (
this, list_no);
464 return ils[i]->list_size (list_no);
469 int i = translate_list_no (
this, list_no);
471 return ils[i]->get_codes (list_no);
475 size_t list_no,
size_t offset)
const
477 int i = translate_list_no (
this, list_no);
479 return ils[i]->get_single_code (list_no, offset);
484 size_t list_no,
const uint8_t *codes)
const {
485 int i = translate_list_no (
this, list_no);
487 return ils[i]->release_codes (list_no, codes);
492 int i = translate_list_no (
this, list_no);
494 return ils[i]->get_ids (list_no);
498 size_t list_no,
size_t offset)
const
500 int i = translate_list_no (
this, list_no);
502 return ils[i]->get_single_id (list_no, offset);
507 int i = translate_list_no (
this, list_no);
509 return ils[i]->release_ids (list_no, ids);
513 const idx_t *list_nos,
int nlist)
const
515 std::vector<int> ilno (nlist, -1);
516 std::vector<int> n_per_il (ils.size(), 0);
517 for (
int j = 0; j <
nlist; j++) {
518 idx_t list_no = list_nos[j];
519 if (list_no < 0)
continue;
520 int i = ilno[j] = translate_list_no (
this, list_no);
523 std::vector<int> cum_n_per_il (ils.size() + 1, 0);
524 for (
int j = 0; j < ils.size(); j++) {
525 cum_n_per_il[j + 1] = cum_n_per_il[j] + n_per_il[j];
527 std::vector<idx_t> sorted_list_nos (cum_n_per_il.back());
528 for (
int j = 0; j <
nlist; j++) {
529 idx_t list_no = list_nos[j];
530 if (list_no < 0)
continue;
533 sorted_list_nos[cum_n_per_il[i]++] = list_no;
537 for (
int j = 0; j < ils.size(); j++) {
538 int i1 = i0 + n_per_il[j];
540 ils[j]->prefetch_lists (sorted_list_nos.data() + i0,
554 MaskedInvertedLists::MaskedInvertedLists (
const InvertedLists *il0,
559 FAISS_THROW_IF_NOT (il1->
nlist == nlist);
560 FAISS_THROW_IF_NOT (il1->
code_size == code_size);
566 return sz ? sz : il1->
list_size(list_no);
572 return (sz ? il0 : il1)->get_codes(list_no);
578 return (sz ? il0 : il1)->get_ids (list_no);
582 size_t list_no,
const uint8_t *codes)
const
597 return (sz ? il0 : il1)->get_single_id (list_no, offset);
601 size_t list_no,
size_t offset)
const
604 return (sz ? il0 : il1)->get_single_code (list_no, offset);
608 const idx_t *list_nos,
int nlist)
const
610 std::vector<idx_t> list0, list1;
611 for (
int i = 0; i <
nlist; i++) {
612 idx_t list_no = list_nos[i];
613 if (list_no < 0)
continue;
615 (sz ? list0 : list1).push_back (list_no);
const uint8_t * get_codes(size_t list_no) const override
void prefetch_lists(const idx_t *list_nos, int nlist) const override
idx_t get_single_id(size_t list_no, size_t offset) const override
void release_ids(size_t list_no, const idx_t *ids) const override
release ids returned by get_ids
void release_ids(size_t list_no, const idx_t *ids) const override
release ids returned by get_ids
const idx_t * get_ids(size_t list_no) const override
virtual const idx_t * get_ids(size_t list_no) const =0
double imbalance_factor() const
1= perfectly balanced, >1: imbalanced
const idx_t * get_ids(size_t list_no) const override
idx_t get_single_id(size_t list_no, size_t offset) const override
vertical slice of indexes in another InvertedLists
void prefetch_lists(const idx_t *list_nos, int nlist) const override
virtual size_t list_size(size_t list_no) const =0
get the size of a list
double imbalance_factor(int n, int k, const long *assign)
a balanced assignment has a IF of 1
size_t list_size(size_t list_no) const override
get the size of a list
const uint8_t * get_single_code(size_t list_no, size_t offset) const override
const uint8_t * get_single_code(size_t list_no, size_t offset) const override
void merge_from(InvertedLists *oivf, size_t add_id)
move all entries from oivf (empty on output)
virtual idx_t get_single_id(size_t list_no, size_t offset) const
long idx_t
all indices are this type
size_t code_size
code size per vector in bytes
virtual const uint8_t * get_single_code(size_t list_no, size_t offset) const
VStackInvertedLists(int nil, const InvertedLists **ils)
build InvertedLists by concatenating nil of them
size_t compute_ntotal() const
sum up list sizes
const uint8_t * get_codes(size_t list_no) const override
virtual size_t add_entry(size_t list_no, idx_t theid, const uint8_t *code)
add one entry to an inverted list
const uint8_t * get_codes(size_t list_no) const override
size_t list_size(size_t list_no) const override
get the size of a list
void prefetch_lists(const idx_t *list_nos, int nlist) const override
size_t list_size(size_t list_no) const override
get the size of a list
void prefetch_lists(const idx_t *list_nos, int nlist) const override
HStackInvertedLists(int nil, const InvertedLists **ils)
build InvertedLists by concatenating nil of them
const uint8_t * get_single_code(size_t list_no, size_t offset) const override
idx_t get_single_id(size_t list_no, size_t offset) const override
const uint8_t * get_codes(size_t list_no) const override
const idx_t * get_ids(size_t list_no) const override
size_t list_size(size_t list_no) const override
get the size of a list
size_t nlist
number of possible key values
const uint8_t * get_single_code(size_t list_no, size_t offset) const override
std::vector< std::vector< idx_t > > ids
Inverted lists for indexes.
void release_ids(size_t list_no, const idx_t *ids) const override
release ids returned by get_ids
const idx_t * get_ids(size_t list_no) const override
void print_stats() const
display some stats about the inverted lists
virtual const uint8_t * get_codes(size_t list_no) const =0
virtual void prefetch_lists(const idx_t *list_nos, int nlist) const
virtual void release_ids(size_t list_no, const idx_t *ids) const
release ids returned by get_ids
void release_codes(size_t list_no, const uint8_t *codes) const override
release codes returned by get_codes (default implementation is nop
const uint8_t * get_codes(size_t list_no) const override
void release_ids(size_t list_no, const idx_t *ids) const override
release ids returned by get_ids
void release_codes(size_t list_no, const uint8_t *codes) const override
release codes returned by get_codes (default implementation is nop
size_t list_size(size_t list_no) const override
get the size of a list
virtual void release_codes(size_t list_no, const uint8_t *codes) const
release codes returned by get_codes (default implementation is nop
idx_t get_single_id(size_t list_no, size_t offset) const override
void release_codes(size_t list_no, const uint8_t *codes) const override
release codes returned by get_codes (default implementation is nop
void release_codes(size_t list_no, const uint8_t *codes) const override
release codes returned by get_codes (default implementation is nop
const idx_t * get_ids(size_t list_no) const override