Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/tmp/faiss/OnDiskInvertedLists.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // -*- c++ -*-
10 
11 #include "OnDiskInvertedLists.h"
12 
13 #include <pthread.h>
14 
15 #include <unordered_set>
16 
17 #include <sys/mman.h>
18 #include <unistd.h>
19 #include <sys/types.h>
20 
21 #include "FaissAssert.h"
22 
23 
24 namespace faiss {
25 
26 
27 /**********************************************
28  * LockLevels
29  **********************************************/
30 
31 
32 struct LockLevels {
33  /* There n times lock1(n), one lock2 and one lock3
34  * Invariants:
35  * a single thread can hold one lock1(n) for some n
36  * a single thread can hold lock2, if it holds lock1(n) for some n
37  * a single thread can hold lock3, if it holds lock1(n) for some n
38  * AND lock2 AND no other thread holds lock1(m) for m != n
39  */
40  pthread_mutex_t mutex1;
41  pthread_cond_t level1_cv;
42  pthread_cond_t level2_cv;
43  pthread_cond_t level3_cv;
44 
45  std::unordered_set<int> level1_holders; // which level1 locks are held
46  int n_level2; // nb threads that wait on level2
47  bool level3_in_use; // a threads waits on level3
48  bool level2_in_use;
49 
50  LockLevels() {
51  pthread_mutex_init(&mutex1, nullptr);
52  pthread_cond_init(&level1_cv, nullptr);
53  pthread_cond_init(&level2_cv, nullptr);
54  pthread_cond_init(&level3_cv, nullptr);
55  n_level2 = 0;
56  level2_in_use = false;
57  level3_in_use = false;
58  }
59 
60  ~LockLevels() {
61  pthread_cond_destroy(&level1_cv);
62  pthread_cond_destroy(&level2_cv);
63  pthread_cond_destroy(&level3_cv);
64  pthread_mutex_destroy(&mutex1);
65  }
66 
67  void lock_1(int no) {
68  pthread_mutex_lock(&mutex1);
69  while (level3_in_use || level1_holders.count(no) > 0) {
70  pthread_cond_wait(&level1_cv, &mutex1);
71  }
72  level1_holders.insert(no);
73  pthread_mutex_unlock(&mutex1);
74  }
75 
76  void unlock_1(int no) {
77  pthread_mutex_lock(&mutex1);
78  assert(level1_holders.count(no) == 1);
79  level1_holders.erase(no);
80  if (level3_in_use) { // a writer is waiting
81  pthread_cond_signal(&level3_cv);
82  } else {
83  pthread_cond_broadcast(&level1_cv);
84  }
85  pthread_mutex_unlock(&mutex1);
86  }
87 
88  void lock_2() {
89  pthread_mutex_lock(&mutex1);
90  n_level2 ++;
91  if (level3_in_use) { // tell waiting level3 that we are blocked
92  pthread_cond_signal(&level3_cv);
93  }
94  while (level2_in_use) {
95  pthread_cond_wait(&level2_cv, &mutex1);
96  }
97  level2_in_use = true;
98  pthread_mutex_unlock(&mutex1);
99  }
100 
101  void unlock_2() {
102  pthread_mutex_lock(&mutex1);
103  level2_in_use = false;
104  n_level2 --;
105  pthread_cond_signal(&level2_cv);
106  pthread_mutex_unlock(&mutex1);
107  }
108 
109  void lock_3() {
110  pthread_mutex_lock(&mutex1);
111  level3_in_use = true;
112  // wait until there are no level1 holders anymore except the
113  // ones that are waiting on level2 (we are holding lock2)
114  while (level1_holders.size() > n_level2) {
115  pthread_cond_wait(&level3_cv, &mutex1);
116  }
117  // don't release the lock!
118  }
119 
120  void unlock_3() {
121  level3_in_use = false;
122  // wake up all level1_holders
123  pthread_cond_broadcast(&level1_cv);
124  pthread_mutex_unlock(&mutex1);
125  }
126 
127  void print () {
128  pthread_mutex_lock(&mutex1);
129  printf("State: level3_in_use=%d n_level2=%d level1_holders: [", level3_in_use, n_level2);
130  for (int k : level1_holders) {
131  printf("%d ", k);
132  }
133  printf("]\n");
134  pthread_mutex_unlock(&mutex1);
135  }
136 
137 };
138 
139 /**********************************************
140  * OngoingPrefetch
141  **********************************************/
142 
144 
145  struct Thread {
146  pthread_t pth;
147  const OnDiskInvertedLists *od;
148  int64_t list_no;
149  };
150 
151  std::vector<Thread> threads;
152 
153  pthread_mutex_t mutex;
154 
155  // pretext to avoid code below to be optimized out
156  static int global_cs;
157 
158  const OnDiskInvertedLists *od;
159 
160  OngoingPrefetch (const OnDiskInvertedLists *od): od (od)
161  {
162  pthread_mutex_init (&mutex, nullptr);
163  }
164 
165  static void* prefetch_list (void * arg) {
166  Thread *th = static_cast<Thread*>(arg);
167 
168  th->od->locks->lock_1(th->list_no);
169  size_t n = th->od->list_size(th->list_no);
170  const Index::idx_t *idx = th->od->get_ids(th->list_no);
171  const uint8_t *codes = th->od->get_codes(th->list_no);
172  int cs = 0;
173  for (size_t i = 0; i < n;i++) {
174  cs += idx[i];
175  }
176  const long *codes8 = (const long*)codes;
177  long n8 = n * th->od->code_size / 8;
178 
179  for (size_t i = 0; i < n8;i++) {
180  cs += codes8[i];
181  }
182  th->od->locks->unlock_1(th->list_no);
183  global_cs += cs & 1;
184  return nullptr;
185  }
186 
187  void prefetch_lists (const long *list_nos, int n) {
188  pthread_mutex_lock (&mutex);
189  for (auto &th: threads) {
190  if (th.list_no != -1) {
191  pthread_join (th.pth, nullptr);
192  }
193  }
194  threads.resize (n);
195  for (int i = 0; i < n; i++) {
196  long list_no = list_nos[i];
197  Thread & th = threads[i];
198  if (list_no >= 0 && od->list_size(list_no) > 0) {
199  th.list_no = list_no;
200  th.od = od;
201  pthread_create (&th.pth, nullptr, prefetch_list, &th);
202  } else {
203  th.list_no = -1;
204  }
205  }
206  pthread_mutex_unlock (&mutex);
207  }
208 
209  ~OngoingPrefetch () {
210  pthread_mutex_lock (&mutex);
211  for (auto &th: threads) {
212  if (th.list_no != -1) {
213  pthread_join (th.pth, nullptr);
214  }
215  }
216  pthread_mutex_unlock (&mutex);
217  pthread_mutex_destroy (&mutex);
218  }
219 
220 };
221 
222 int OnDiskInvertedLists::OngoingPrefetch::global_cs = 0;
223 
224 
225 void OnDiskInvertedLists::prefetch_lists (const long *list_nos, int n) const
226 {
227  pf->prefetch_lists (list_nos, n);
228 }
229 
230 
231 
232 /**********************************************
233  * OnDiskInvertedLists: mmapping
234  **********************************************/
235 
236 
237 void OnDiskInvertedLists::do_mmap ()
238 {
239  const char *rw_flags = read_only ? "r" : "r+";
240  int prot = read_only ? PROT_READ : PROT_WRITE | PROT_READ;
241  FILE *f = fopen (filename.c_str(), rw_flags);
242  FAISS_THROW_IF_NOT_FMT (f, "could not open %s in mode %s: %s",
243  filename.c_str(), rw_flags, strerror(errno));
244 
245  uint8_t * ptro = (uint8_t*)mmap (nullptr, totsize,
246  prot, MAP_SHARED, fileno (f), 0);
247 
248  FAISS_THROW_IF_NOT_FMT (ptro != MAP_FAILED,
249  "could not mmap %s: %s",
250  filename.c_str(),
251  strerror(errno));
252  ptr = ptro;
253  fclose (f);
254 
255 }
256 
257 void OnDiskInvertedLists::update_totsize (size_t new_size)
258 {
259 
260  // unmap file
261  if (ptr != nullptr) {
262  int err = munmap (ptr, totsize);
263  FAISS_THROW_IF_NOT_FMT (err == 0, "mumap error: %s",
264  strerror(errno));
265  }
266  if (totsize == 0) {
267  // must create file before truncating it
268  FILE *f = fopen (filename.c_str(), "w");
269  FAISS_THROW_IF_NOT_FMT (f, "could not open %s in mode W: %s",
270  filename.c_str(), strerror(errno));
271  fclose (f);
272  }
273 
274  if (new_size > totsize) {
275  if (!slots.empty() &&
276  slots.back().offset + slots.back().capacity == totsize) {
277  slots.back().capacity += new_size - totsize;
278  } else {
279  slots.push_back (Slot(totsize, new_size - totsize));
280  }
281  } else {
282  assert(!"not implemented");
283  }
284 
285  totsize = new_size;
286 
287  // create file
288  printf ("resizing %s to %ld bytes\n", filename.c_str(), totsize);
289 
290  int err = truncate (filename.c_str(), totsize);
291 
292  FAISS_THROW_IF_NOT_FMT (err == 0, "truncate %s to %ld: %s",
293  filename.c_str(), totsize,
294  strerror(errno));
295  do_mmap ();
296 }
297 
298 
299 
300 
301 
302 
303 /**********************************************
304  * OnDiskInvertedLists
305  **********************************************/
306 
307 #define INVALID_OFFSET (size_t)(-1)
308 
309 OnDiskInvertedLists::List::List ():
310  size (0), capacity (0), offset (INVALID_OFFSET)
311 {}
312 
313 OnDiskInvertedLists::Slot::Slot (size_t offset, size_t capacity):
314  offset (offset), capacity (capacity)
315 {}
316 
317 OnDiskInvertedLists::Slot::Slot ():
318  offset (0), capacity (0)
319 {}
320 
321 
322 
323 OnDiskInvertedLists::OnDiskInvertedLists (
324  size_t nlist, size_t code_size,
325  const char *filename):
326  InvertedLists (nlist, code_size),
327  filename (filename),
328  totsize (0),
329  ptr (nullptr),
330  read_only (false),
331  locks (new LockLevels ()),
332  pf (new OngoingPrefetch (this))
333 {
334  lists.resize (nlist);
335 
336  // slots starts empty
337 }
338 
339 OnDiskInvertedLists::OnDiskInvertedLists ():
340  InvertedLists (0, 0),
341  totsize (0),
342  ptr (nullptr),
343  read_only (false),
344  locks (new LockLevels ()),
345  pf (new OngoingPrefetch (this))
346 {
347 }
348 
349 OnDiskInvertedLists::~OnDiskInvertedLists ()
350 {
351  delete pf;
352 
353  // unmap all lists
354  if (ptr != nullptr) {
355  int err = munmap (ptr, totsize);
356  FAISS_THROW_IF_NOT_FMT (err == 0,
357  "mumap error: %s",
358  strerror(errno));
359  }
360  delete locks;
361 }
362 
363 
364 
365 
366 size_t OnDiskInvertedLists::list_size(size_t list_no) const
367 {
368  return lists[list_no].size;
369 }
370 
371 
372 const uint8_t * OnDiskInvertedLists::get_codes (size_t list_no) const
373 {
374  if (lists[list_no].offset == INVALID_OFFSET) {
375  return nullptr;
376  }
377 
378  return ptr + lists[list_no].offset;
379 }
380 
381 const Index::idx_t * OnDiskInvertedLists::get_ids (size_t list_no) const
382 {
383  if (lists[list_no].offset == INVALID_OFFSET) {
384  return nullptr;
385  }
386 
387  return (const idx_t*)(ptr + lists[list_no].offset +
388  code_size * lists[list_no].capacity);
389 }
390 
391 
392 void OnDiskInvertedLists::update_entries (
393  size_t list_no, size_t offset, size_t n_entry,
394  const idx_t *ids_in, const uint8_t *codes_in)
395 {
396  FAISS_THROW_IF_NOT (!read_only);
397  if (n_entry == 0) return;
398  const List & l = lists[list_no];
399  assert (n_entry + offset <= l.size);
400  idx_t *ids = const_cast<idx_t*>(get_ids (list_no));
401  memcpy (ids + offset, ids_in, sizeof(ids_in[0]) * n_entry);
402  uint8_t *codes = const_cast<uint8_t*>(get_codes (list_no));
403  memcpy (codes + offset * code_size, codes_in, code_size * n_entry);
404 }
405 
406 size_t OnDiskInvertedLists::add_entries (
407  size_t list_no, size_t n_entry,
408  const idx_t* ids, const uint8_t *code)
409 {
410  FAISS_THROW_IF_NOT (!read_only);
411  locks->lock_1 (list_no);
412  size_t o = list_size (list_no);
413  resize_locked (list_no, n_entry + o);
414  update_entries (list_no, o, n_entry, ids, code);
415  locks->unlock_1 (list_no);
416  return o;
417 }
418 
419 void OnDiskInvertedLists::resize (size_t list_no, size_t new_size)
420 {
421  FAISS_THROW_IF_NOT (!read_only);
422  locks->lock_1 (list_no);
423  resize_locked (list_no, new_size);
424  locks->unlock_1 (list_no);
425 }
426 
427 
428 
429 void OnDiskInvertedLists::resize_locked (size_t list_no, size_t new_size)
430 {
431  List & l = lists[list_no];
432 
433  if (new_size <= l.capacity &&
434  new_size > l.capacity / 2) {
435  l.size = new_size;
436  return;
437  }
438 
439  // otherwise we release the current slot, and find a new one
440 
441  locks->lock_2 ();
442  free_slot (l.offset, l.capacity);
443 
444  List new_l;
445 
446  if (new_size == 0) {
447  new_l = List();
448  } else {
449  new_l.size = new_size;
450  new_l.capacity = 1;
451  while (new_l.capacity < new_size) {
452  new_l.capacity *= 2;
453  }
454  new_l.offset = allocate_slot (
455  new_l.capacity * (sizeof(idx_t) + code_size));
456  }
457 
458  // copy common data
459  if (l.offset != new_l.offset) {
460  size_t n = std::min (new_size, l.size);
461  if (n > 0) {
462  memcpy (ptr + new_l.offset, get_codes(list_no), n * code_size);
463  memcpy (ptr + new_l.offset + new_l.capacity * code_size,
464  get_ids (list_no), n * sizeof(idx_t));
465  }
466  }
467 
468  lists[list_no] = new_l;
469  locks->unlock_2 ();
470 }
471 
472 size_t OnDiskInvertedLists::allocate_slot (size_t capacity) {
473  // should hold lock2
474 
475  auto it = slots.begin();
476  while (it != slots.end() && it->capacity < capacity) {
477  it++;
478  }
479 
480  if (it == slots.end()) {
481  // not enough capacity
482  size_t new_size = totsize == 0 ? 32 : totsize * 2;
483  while (new_size - totsize < capacity)
484  new_size *= 2;
485  locks->lock_3 ();
486  update_totsize(new_size);
487  locks->unlock_3 ();
488  it = slots.begin();
489  while (it != slots.end() && it->capacity < capacity) {
490  it++;
491  }
492  assert (it != slots.end());
493  }
494 
495  size_t o = it->offset;
496  if (it->capacity == capacity) {
497  slots.erase (it);
498  } else {
499  // take from beginning of slot
500  it->capacity -= capacity;
501  it->offset += capacity;
502  }
503 
504  return o;
505 }
506 
507 
508 
509 void OnDiskInvertedLists::free_slot (size_t offset, size_t capacity) {
510 
511  // should hold lock2
512  if (capacity == 0) return;
513 
514  auto it = slots.begin();
515  while (it != slots.end() && it->offset <= offset) {
516  it++;
517  }
518 
519  size_t inf = 1UL << 60;
520 
521  size_t end_prev = inf;
522  if (it != slots.begin()) {
523  auto prev = it;
524  prev--;
525  end_prev = prev->offset + prev->capacity;
526  }
527 
528  size_t begin_next = 1L << 60;
529  if (it != slots.end()) {
530  begin_next = it->offset;
531  }
532 
533  assert (end_prev == inf || offset >= end_prev);
534  assert (offset + capacity <= begin_next);
535 
536  if (offset == end_prev) {
537  auto prev = it;
538  prev--;
539  if (offset + capacity == begin_next) {
540  prev->capacity += capacity + it->capacity;
541  slots.erase (it);
542  } else {
543  prev->capacity += capacity;
544  }
545  } else {
546  if (offset + capacity == begin_next) {
547  it->offset -= capacity;
548  it->capacity += capacity;
549  } else {
550  slots.insert (it, Slot (offset, capacity));
551  }
552  }
553 
554  // TODO shrink global storage if needed
555 }
556 
557 
558 /*****************************************
559  * Compact form
560  *****************************************/
561 
562 size_t OnDiskInvertedLists::merge_from (const InvertedLists **ils, int n_il)
563 {
564  FAISS_THROW_IF_NOT_MSG (totsize == 0, "works only on an empty InvertedLists");
565 
566  std::vector<size_t> sizes (nlist);
567  for (int i = 0; i < n_il; i++) {
568  const InvertedLists *il = ils[i];
569  FAISS_THROW_IF_NOT (il->nlist == nlist && il->code_size == code_size);
570 
571  for (size_t j = 0; j < nlist; j++) {
572  sizes [j] += il->list_size(j);
573  }
574  }
575 
576  size_t cums = 0;
577  size_t ntotal = 0;
578  for (size_t j = 0; j < nlist; j++) {
579  ntotal += sizes[j];
580  lists[j].size = 0;
581  lists[j].capacity = sizes[j];
582  lists[j].offset = cums;
583  cums += lists[j].capacity * (sizeof(idx_t) + code_size);
584  }
585 
586  update_totsize (cums);
587 
588 #pragma omp parallel for
589  for (size_t j = 0; j < nlist; j++) {
590  List & l = lists[j];
591  for (int i = 0; i < n_il; i++) {
592  const InvertedLists *il = ils[i];
593  size_t n_entry = il->list_size(j);
594  l.size += n_entry;
595  update_entries (j, l.size - n_entry, n_entry,
596  il->get_ids(j),
597  il->get_codes(j));
598  }
599  assert (l.size == l.capacity);
600  }
601 
602  return ntotal;
603 }
604 
605 
606 } // namespace faiss
const idx_t * get_ids(size_t list_no) const override
size_t code_size
code size per vector in bytes
Definition: InvertedLists.h:36
long idx_t
all indices are this type
Definition: Index.h:64
const uint8_t * get_codes(size_t list_no) const override
size_t list_size(size_t list_no) const override
get the size of a list
size_t nlist
number of possible key values
Definition: InvertedLists.h:35
void prefetch_lists(const long *list_nos, int nlist) const override