10 #include "OnDiskInvertedLists.h"
14 #include <unordered_set>
18 #include <sys/types.h>
20 #include "FaissAssert.h"
40 pthread_mutex_t mutex1;
41 pthread_cond_t level1_cv;
42 pthread_cond_t level2_cv;
43 pthread_cond_t level3_cv;
45 std::unordered_set<int> level1_holders;
51 pthread_mutex_init(&mutex1,
nullptr);
52 pthread_cond_init(&level1_cv,
nullptr);
53 pthread_cond_init(&level2_cv,
nullptr);
54 pthread_cond_init(&level3_cv,
nullptr);
56 level2_in_use =
false;
57 level3_in_use =
false;
61 pthread_cond_destroy(&level1_cv);
62 pthread_cond_destroy(&level2_cv);
63 pthread_cond_destroy(&level3_cv);
64 pthread_mutex_destroy(&mutex1);
68 pthread_mutex_lock(&mutex1);
69 while (level3_in_use || level1_holders.count(no) > 0) {
70 pthread_cond_wait(&level1_cv, &mutex1);
72 level1_holders.insert(no);
73 pthread_mutex_unlock(&mutex1);
76 void unlock_1(
int no) {
77 pthread_mutex_lock(&mutex1);
78 assert(level1_holders.count(no) == 1);
79 level1_holders.erase(no);
81 pthread_cond_signal(&level3_cv);
83 pthread_cond_broadcast(&level1_cv);
85 pthread_mutex_unlock(&mutex1);
89 pthread_mutex_lock(&mutex1);
92 pthread_cond_signal(&level3_cv);
94 while (level2_in_use) {
95 pthread_cond_wait(&level2_cv, &mutex1);
98 pthread_mutex_unlock(&mutex1);
102 pthread_mutex_lock(&mutex1);
103 level2_in_use =
false;
105 pthread_cond_signal(&level2_cv);
106 pthread_mutex_unlock(&mutex1);
110 pthread_mutex_lock(&mutex1);
111 level3_in_use =
true;
114 while (level1_holders.size() > n_level2) {
115 pthread_cond_wait(&level3_cv, &mutex1);
121 level3_in_use =
false;
123 pthread_cond_broadcast(&level1_cv);
124 pthread_mutex_unlock(&mutex1);
128 pthread_mutex_lock(&mutex1);
129 printf(
"State: level3_in_use=%d n_level2=%d level1_holders: [", level3_in_use, n_level2);
130 for (
int k : level1_holders) {
134 pthread_mutex_unlock(&mutex1);
150 idx_t list_no = pf->get_next_list();
151 if(list_no == -1)
return false;
153 od->locks->lock_1 (list_no);
156 const uint8_t *codes = od->
get_codes (list_no);
158 for (
size_t i = 0; i < n;i++) {
161 const idx_t *codes8 = (
const idx_t*)codes;
164 for (
size_t i = 0; i < n8;i++) {
167 od->locks->unlock_1(list_no);
175 std::vector<Thread> threads;
177 pthread_mutex_t list_ids_mutex;
178 std::vector<idx_t> list_ids;
182 pthread_mutex_t mutex;
185 static int global_cs;
191 pthread_mutex_init (&mutex,
nullptr);
192 pthread_mutex_init (&list_ids_mutex,
nullptr);
196 static void* prefetch_list (
void * arg) {
197 Thread *th =
static_cast<Thread*
>(arg);
199 while (th->one_list()) ;
204 idx_t get_next_list () {
206 pthread_mutex_lock (&list_ids_mutex);
207 if (cur_list >= 0 && cur_list < list_ids.size()) {
208 list_no = list_ids[cur_list++];
210 pthread_mutex_unlock (&list_ids_mutex);
214 void prefetch_lists (
const idx_t *list_nos,
int n) {
215 pthread_mutex_lock (&mutex);
216 pthread_mutex_lock (&list_ids_mutex);
218 pthread_mutex_unlock (&list_ids_mutex);
219 for (
auto &th: threads) {
220 pthread_join (th.pth,
nullptr);
225 int nt = std::min (n, od->prefetch_nthread);
229 for (
int i = 0; i < n; i++) {
230 idx_t list_no = list_nos[i];
231 if (list_no >= 0 && od->
list_size(list_no) > 0) {
232 list_ids.push_back (list_no);
237 for (Thread &th: threads) {
239 pthread_create (&th.pth,
nullptr, prefetch_list, &th);
242 pthread_mutex_unlock (&mutex);
245 ~OngoingPrefetch () {
246 pthread_mutex_lock (&mutex);
247 for (
auto &th: threads) {
248 pthread_join (th.pth,
nullptr);
250 pthread_mutex_unlock (&mutex);
251 pthread_mutex_destroy (&mutex);
252 pthread_mutex_destroy (&list_ids_mutex);
257 int OnDiskInvertedLists::OngoingPrefetch::global_cs = 0;
262 pf->prefetch_lists (list_nos, n);
272 void OnDiskInvertedLists::do_mmap ()
274 const char *rw_flags = read_only ?
"r" :
"r+";
275 int prot = read_only ? PROT_READ : PROT_WRITE | PROT_READ;
276 FILE *f = fopen (filename.c_str(), rw_flags);
277 FAISS_THROW_IF_NOT_FMT (f,
"could not open %s in mode %s: %s",
278 filename.c_str(), rw_flags, strerror(errno));
280 uint8_t * ptro = (uint8_t*)mmap (
nullptr, totsize,
281 prot, MAP_SHARED, fileno (f), 0);
283 FAISS_THROW_IF_NOT_FMT (ptro != MAP_FAILED,
284 "could not mmap %s: %s",
292 void OnDiskInvertedLists::update_totsize (
size_t new_size)
296 if (ptr !=
nullptr) {
297 int err = munmap (ptr, totsize);
298 FAISS_THROW_IF_NOT_FMT (err == 0,
"munmap error: %s",
303 FILE *f = fopen (filename.c_str(),
"w");
304 FAISS_THROW_IF_NOT_FMT (f,
"could not open %s in mode W: %s",
305 filename.c_str(), strerror(errno));
309 if (new_size > totsize) {
310 if (!slots.empty() &&
311 slots.back().offset + slots.back().capacity == totsize) {
312 slots.back().capacity += new_size - totsize;
314 slots.push_back (Slot(totsize, new_size - totsize));
317 assert(!
"not implemented");
323 printf (
"resizing %s to %ld bytes\n", filename.c_str(), totsize);
325 int err = truncate (filename.c_str(), totsize);
327 FAISS_THROW_IF_NOT_FMT (err == 0,
"truncate %s to %ld: %s",
328 filename.c_str(), totsize,
342 #define INVALID_OFFSET (size_t)(-1)
344 OnDiskInvertedLists::List::List ():
345 size (0), capacity (0), offset (INVALID_OFFSET)
348 OnDiskInvertedLists::Slot::Slot (
size_t offset,
size_t capacity):
349 offset (offset), capacity (capacity)
352 OnDiskInvertedLists::Slot::Slot ():
353 offset (0), capacity (0)
358 OnDiskInvertedLists::OnDiskInvertedLists (
359 size_t nlist,
size_t code_size,
360 const char *filename):
368 prefetch_nthread (32)
370 lists.resize (nlist);
375 OnDiskInvertedLists::OnDiskInvertedLists ():
380 OnDiskInvertedLists::~OnDiskInvertedLists ()
385 if (ptr !=
nullptr) {
386 int err = munmap (ptr, totsize);
388 fprintf(stderr,
"mumap error: %s",
400 return lists[list_no].size;
406 if (lists[list_no].offset == INVALID_OFFSET) {
410 return ptr + lists[list_no].offset;
415 if (lists[list_no].offset == INVALID_OFFSET) {
419 return (
const idx_t*)(ptr + lists[list_no].offset +
424 void OnDiskInvertedLists::update_entries (
425 size_t list_no,
size_t offset,
size_t n_entry,
426 const idx_t *ids_in,
const uint8_t *codes_in)
428 FAISS_THROW_IF_NOT (!read_only);
429 if (n_entry == 0)
return;
430 const List & l = lists[list_no];
431 assert (n_entry + offset <= l.size);
432 idx_t *ids =
const_cast<idx_t*
>(
get_ids (list_no));
433 memcpy (ids + offset, ids_in,
sizeof(ids_in[0]) * n_entry);
434 uint8_t *codes =
const_cast<uint8_t*
>(
get_codes (list_no));
435 memcpy (codes + offset *
code_size, codes_in, code_size * n_entry);
438 size_t OnDiskInvertedLists::add_entries (
439 size_t list_no,
size_t n_entry,
440 const idx_t* ids,
const uint8_t *code)
442 FAISS_THROW_IF_NOT (!read_only);
443 locks->lock_1 (list_no);
445 resize_locked (list_no, n_entry + o);
446 update_entries (list_no, o, n_entry, ids, code);
447 locks->unlock_1 (list_no);
451 void OnDiskInvertedLists::resize (
size_t list_no,
size_t new_size)
453 FAISS_THROW_IF_NOT (!read_only);
454 locks->lock_1 (list_no);
455 resize_locked (list_no, new_size);
456 locks->unlock_1 (list_no);
461 void OnDiskInvertedLists::resize_locked (
size_t list_no,
size_t new_size)
463 List & l = lists[list_no];
465 if (new_size <= l.capacity &&
466 new_size > l.capacity / 2) {
474 free_slot (l.offset, l.capacity);
481 new_l.size = new_size;
483 while (new_l.capacity < new_size) {
486 new_l.offset = allocate_slot (
487 new_l.capacity * (
sizeof(idx_t) +
code_size));
491 if (l.offset != new_l.offset) {
492 size_t n = std::min (new_size, l.size);
495 memcpy (ptr + new_l.offset + new_l.capacity *
code_size,
496 get_ids (list_no), n *
sizeof(idx_t));
500 lists[list_no] = new_l;
504 size_t OnDiskInvertedLists::allocate_slot (
size_t capacity) {
507 auto it = slots.begin();
508 while (it != slots.end() && it->capacity < capacity) {
512 if (it == slots.end()) {
514 size_t new_size = totsize == 0 ? 32 : totsize * 2;
515 while (new_size - totsize < capacity)
518 update_totsize(new_size);
521 while (it != slots.end() && it->capacity < capacity) {
524 assert (it != slots.end());
527 size_t o = it->offset;
528 if (it->capacity == capacity) {
532 it->capacity -= capacity;
533 it->offset += capacity;
541 void OnDiskInvertedLists::free_slot (
size_t offset,
size_t capacity) {
544 if (capacity == 0)
return;
546 auto it = slots.begin();
547 while (it != slots.end() && it->offset <= offset) {
551 size_t inf = 1UL << 60;
553 size_t end_prev = inf;
554 if (it != slots.begin()) {
557 end_prev = prev->offset + prev->capacity;
560 size_t begin_next = 1L << 60;
561 if (it != slots.end()) {
562 begin_next = it->offset;
565 assert (end_prev == inf || offset >= end_prev);
566 assert (offset + capacity <= begin_next);
568 if (offset == end_prev) {
571 if (offset + capacity == begin_next) {
572 prev->capacity += capacity + it->capacity;
575 prev->capacity += capacity;
578 if (offset + capacity == begin_next) {
579 it->offset -= capacity;
580 it->capacity += capacity;
582 slots.insert (it, Slot (offset, capacity));
594 size_t OnDiskInvertedLists::merge_from (
const InvertedLists **ils,
int n_il,
597 FAISS_THROW_IF_NOT_MSG (totsize == 0,
"works only on an empty InvertedLists");
599 std::vector<size_t> sizes (
nlist);
600 for (
int i = 0; i < n_il; i++) {
601 const InvertedLists *il = ils[i];
602 FAISS_THROW_IF_NOT (il->nlist ==
nlist && il->code_size ==
code_size);
604 for (
size_t j = 0; j <
nlist; j++) {
605 sizes [j] += il->list_size(j);
611 for (
size_t j = 0; j <
nlist; j++) {
614 lists[j].capacity = sizes[j];
615 lists[j].offset = cums;
616 cums += lists[j].capacity * (
sizeof(idx_t) +
code_size);
619 update_totsize (cums);
625 #pragma omp parallel for
626 for (
size_t j = 0; j <
nlist; j++) {
628 for (
int i = 0; i < n_il; i++) {
629 const InvertedLists *il = ils[i];
630 size_t n_entry = il->list_size(j);
632 update_entries (j, l.size - n_entry, n_entry,
633 ScopedIds(il, j).
get(),
634 ScopedCodes(il, j).
get());
636 assert (l.size == l.capacity);
642 if (t1 - last_t > 500) {
643 printf(
"merged %ld lists in %.3f s\r",
644 nmerged, (t1 - t0) / 1000.0);
661 FAISS_THROW_IF_NOT(0 <= l0 && l0 <= l1 && l1 <= nlist);
663 std::vector<List> new_lists (l1 - l0);
664 memcpy (new_lists.data(), &lists[l0], (l1 - l0) *
sizeof(
List));
666 lists.swap(new_lists);
void crop_invlists(size_t l0, size_t l1)
restrict the inverted lists to l0:l1 without touching the mmapped region
const idx_t * get_ids(size_t list_no) const override
void prefetch_lists(const idx_t *list_nos, int nlist) const override
long idx_t
all indices are this type
size_t code_size
code size per vector in bytes
const uint8_t * get_codes(size_t list_no) const override
double getmillisecs()
ms elapsed since some arbitrary epoch
size_t list_size(size_t list_no) const override
get the size of a list
size_t nlist
number of possible key values