11 #include "OnDiskInvertedLists.h"
15 #include <unordered_set>
19 #include <sys/types.h>
21 #include "FaissAssert.h"
40 pthread_mutex_t mutex1;
41 pthread_cond_t level1_cv;
42 pthread_cond_t level2_cv;
43 pthread_cond_t level3_cv;
45 std::unordered_set<int> level1_holders;
51 pthread_mutex_init(&mutex1,
nullptr);
52 pthread_cond_init(&level1_cv,
nullptr);
53 pthread_cond_init(&level2_cv,
nullptr);
54 pthread_cond_init(&level3_cv,
nullptr);
56 level2_in_use =
false;
57 level3_in_use =
false;
61 pthread_cond_destroy(&level1_cv);
62 pthread_cond_destroy(&level2_cv);
63 pthread_cond_destroy(&level3_cv);
64 pthread_mutex_destroy(&mutex1);
68 pthread_mutex_lock(&mutex1);
69 while (level3_in_use || level1_holders.count(no) > 0) {
70 pthread_cond_wait(&level1_cv, &mutex1);
72 level1_holders.insert(no);
73 pthread_mutex_unlock(&mutex1);
76 void unlock_1(
int no) {
77 pthread_mutex_lock(&mutex1);
78 assert(level1_holders.count(no) == 1);
79 level1_holders.erase(no);
81 pthread_cond_signal(&level3_cv);
83 pthread_cond_broadcast(&level1_cv);
85 pthread_mutex_unlock(&mutex1);
89 pthread_mutex_lock(&mutex1);
92 pthread_cond_signal(&level3_cv);
94 while (level2_in_use) {
95 pthread_cond_wait(&level2_cv, &mutex1);
98 pthread_mutex_unlock(&mutex1);
102 pthread_mutex_lock(&mutex1);
103 level2_in_use =
false;
105 pthread_cond_signal(&level2_cv);
106 pthread_mutex_unlock(&mutex1);
110 pthread_mutex_lock(&mutex1);
111 level3_in_use =
true;
114 while (level1_holders.size() > n_level2) {
115 pthread_cond_wait(&level3_cv, &mutex1);
121 level3_in_use =
false;
123 pthread_cond_broadcast(&level1_cv);
124 pthread_mutex_unlock(&mutex1);
128 pthread_mutex_lock(&mutex1);
129 printf(
"State: level3_in_use=%d n_level2=%d level1_holders: [", level3_in_use, n_level2);
130 for (
int k : level1_holders) {
134 pthread_mutex_unlock(&mutex1);
151 std::vector<Thread> threads;
153 pthread_mutex_t mutex;
156 static int global_cs;
162 pthread_mutex_init (&mutex,
nullptr);
165 static void* prefetch_list (
void * arg) {
166 Thread *th =
static_cast<Thread*
>(arg);
168 th->od->locks->lock_1(th->list_no);
169 size_t n = th->od->list_size(th->list_no);
171 const uint8_t *codes = th->od->get_codes(th->list_no);
173 for (
size_t i = 0; i < n;i++) {
176 const long *codes8 = (
const long*)codes;
177 long n8 = n * th->od->code_size / 8;
179 for (
size_t i = 0; i < n8;i++) {
182 th->od->locks->unlock_1(th->list_no);
187 void prefetch_lists (
const long *list_nos,
int n) {
188 pthread_mutex_lock (&mutex);
189 for (
auto &th: threads) {
190 if (th.list_no != -1) {
191 pthread_join (th.pth,
nullptr);
195 for (
int i = 0; i < n; i++) {
196 long list_no = list_nos[i];
197 Thread & th = threads[i];
198 if (list_no >= 0 && od->
list_size(list_no) > 0) {
199 th.list_no = list_no;
201 pthread_create (&th.pth,
nullptr, prefetch_list, &th);
206 pthread_mutex_unlock (&mutex);
209 ~OngoingPrefetch () {
210 pthread_mutex_lock (&mutex);
211 for (
auto &th: threads) {
212 if (th.list_no != -1) {
213 pthread_join (th.pth,
nullptr);
216 pthread_mutex_unlock (&mutex);
217 pthread_mutex_destroy (&mutex);
222 int OnDiskInvertedLists::OngoingPrefetch::global_cs = 0;
227 pf->prefetch_lists (list_nos, n);
237 void OnDiskInvertedLists::do_mmap ()
239 const char *rw_flags = read_only ?
"r" :
"r+";
240 int prot = read_only ? PROT_READ : PROT_WRITE | PROT_READ;
241 FILE *f = fopen (filename.c_str(), rw_flags);
242 FAISS_THROW_IF_NOT_FMT (f,
"could not open %s in mode %s: %s",
243 filename.c_str(), rw_flags, strerror(errno));
245 uint8_t * ptro = (uint8_t*)mmap (
nullptr, totsize,
246 prot, MAP_SHARED, fileno (f), 0);
248 FAISS_THROW_IF_NOT_FMT (ptro != MAP_FAILED,
249 "could not mmap %s: %s",
257 void OnDiskInvertedLists::update_totsize (
size_t new_size)
261 if (ptr !=
nullptr) {
262 int err = munmap (ptr, totsize);
263 FAISS_THROW_IF_NOT_FMT (err == 0,
"mumap error: %s",
268 FILE *f = fopen (filename.c_str(),
"w");
269 FAISS_THROW_IF_NOT_FMT (f,
"could not open %s in mode W: %s",
270 filename.c_str(), strerror(errno));
274 if (new_size > totsize) {
275 if (!slots.empty() &&
276 slots.back().offset + slots.back().capacity == totsize) {
277 slots.back().capacity += new_size - totsize;
279 slots.push_back (Slot(totsize, new_size - totsize));
282 assert(!
"not implemented");
288 printf (
"resizing %s to %ld bytes\n", filename.c_str(), totsize);
290 int err = truncate (filename.c_str(), totsize);
292 FAISS_THROW_IF_NOT_FMT (err == 0,
"truncate %s to %ld: %s",
293 filename.c_str(), totsize,
307 #define INVALID_OFFSET (size_t)(-1)
309 OnDiskInvertedLists::List::List ():
310 size (0), capacity (0), offset (INVALID_OFFSET)
313 OnDiskInvertedLists::Slot::Slot (
size_t offset,
size_t capacity):
314 offset (offset), capacity (capacity)
317 OnDiskInvertedLists::Slot::Slot ():
318 offset (0), capacity (0)
323 OnDiskInvertedLists::OnDiskInvertedLists (
324 size_t nlist,
size_t code_size,
325 const char *filename):
334 lists.resize (nlist);
339 OnDiskInvertedLists::OnDiskInvertedLists ():
345 pf (new OngoingPrefetch (this))
349 OnDiskInvertedLists::~OnDiskInvertedLists ()
354 if (ptr !=
nullptr) {
355 int err = munmap (ptr, totsize);
356 FAISS_THROW_IF_NOT_FMT (err == 0,
368 return lists[list_no].size;
374 if (lists[list_no].offset == INVALID_OFFSET) {
378 return ptr + lists[list_no].offset;
383 if (lists[list_no].offset == INVALID_OFFSET) {
387 return (
const idx_t*)(ptr + lists[list_no].offset +
392 void OnDiskInvertedLists::update_entries (
393 size_t list_no,
size_t offset,
size_t n_entry,
394 const idx_t *ids_in,
const uint8_t *codes_in)
396 FAISS_THROW_IF_NOT (!read_only);
397 if (n_entry == 0)
return;
398 const List & l = lists[list_no];
399 assert (n_entry + offset <= l.size);
400 idx_t *ids =
const_cast<idx_t*
>(
get_ids (list_no));
401 memcpy (ids + offset, ids_in,
sizeof(ids_in[0]) * n_entry);
402 uint8_t *codes =
const_cast<uint8_t*
>(
get_codes (list_no));
403 memcpy (codes + offset *
code_size, codes_in, code_size * n_entry);
406 size_t OnDiskInvertedLists::add_entries (
407 size_t list_no,
size_t n_entry,
408 const idx_t* ids,
const uint8_t *code)
410 FAISS_THROW_IF_NOT (!read_only);
411 locks->lock_1 (list_no);
413 resize_locked (list_no, n_entry + o);
414 update_entries (list_no, o, n_entry, ids, code);
415 locks->unlock_1 (list_no);
419 void OnDiskInvertedLists::resize (
size_t list_no,
size_t new_size)
421 FAISS_THROW_IF_NOT (!read_only);
422 locks->lock_1 (list_no);
423 resize_locked (list_no, new_size);
424 locks->unlock_1 (list_no);
429 void OnDiskInvertedLists::resize_locked (
size_t list_no,
size_t new_size)
431 List & l = lists[list_no];
433 if (new_size <= l.capacity &&
434 new_size > l.capacity / 2) {
442 free_slot (l.offset, l.capacity);
449 new_l.size = new_size;
451 while (new_l.capacity < new_size) {
454 new_l.offset = allocate_slot (
455 new_l.capacity * (
sizeof(idx_t) +
code_size));
459 if (l.offset != new_l.offset) {
460 size_t n = std::min (new_size, l.size);
463 memcpy (ptr + new_l.offset + new_l.capacity *
code_size,
464 get_ids (list_no), n *
sizeof(idx_t));
468 lists[list_no] = new_l;
472 size_t OnDiskInvertedLists::allocate_slot (
size_t capacity) {
475 auto it = slots.begin();
476 while (it != slots.end() && it->capacity < capacity) {
480 if (it == slots.end()) {
482 size_t new_size = totsize == 0 ? 32 : totsize * 2;
483 while (new_size - totsize < capacity)
486 update_totsize(new_size);
489 while (it != slots.end() && it->capacity < capacity) {
492 assert (it != slots.end());
495 size_t o = it->offset;
496 if (it->capacity == capacity) {
500 it->capacity -= capacity;
501 it->offset += capacity;
509 void OnDiskInvertedLists::free_slot (
size_t offset,
size_t capacity) {
512 if (capacity == 0)
return;
514 auto it = slots.begin();
515 while (it != slots.end() && it->offset <= offset) {
519 size_t inf = 1UL << 60;
521 size_t end_prev = inf;
522 if (it != slots.begin()) {
525 end_prev = prev->offset + prev->capacity;
528 size_t begin_next = 1L << 60;
529 if (it != slots.end()) {
530 begin_next = it->offset;
533 assert (end_prev == inf || offset >= end_prev);
534 assert (offset + capacity <= begin_next);
536 if (offset == end_prev) {
539 if (offset + capacity == begin_next) {
540 prev->capacity += capacity + it->capacity;
543 prev->capacity += capacity;
546 if (offset + capacity == begin_next) {
547 it->offset -= capacity;
548 it->capacity += capacity;
550 slots.insert (it, Slot (offset, capacity));
562 size_t OnDiskInvertedLists::merge_from (
const InvertedLists **ils,
int n_il)
564 FAISS_THROW_IF_NOT_MSG (totsize == 0,
"works only on an empty InvertedLists");
566 std::vector<size_t> sizes (
nlist);
567 for (
int i = 0; i < n_il; i++) {
568 const InvertedLists *il = ils[i];
569 FAISS_THROW_IF_NOT (il->nlist ==
nlist && il->code_size ==
code_size);
571 for (
size_t j = 0; j <
nlist; j++) {
572 sizes [j] += il->list_size(j);
578 for (
size_t j = 0; j <
nlist; j++) {
581 lists[j].capacity = sizes[j];
582 lists[j].offset = cums;
583 cums += lists[j].capacity * (
sizeof(idx_t) +
code_size);
586 update_totsize (cums);
588 #pragma omp parallel for
589 for (
size_t j = 0; j <
nlist; j++) {
591 for (
int i = 0; i < n_il; i++) {
592 const InvertedLists *il = ils[i];
593 size_t n_entry = il->list_size(j);
595 update_entries (j, l.size - n_entry, n_entry,
599 assert (l.size == l.capacity);
const idx_t * get_ids(size_t list_no) const override
size_t code_size
code size per vector in bytes
long idx_t
all indices are this type
const uint8_t * get_codes(size_t list_no) const override
size_t list_size(size_t list_no) const override
get the size of a list
size_t nlist
number of possible key values
void prefetch_lists(const long *list_nos, int nlist) const override