Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/OnDiskInvertedLists.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 
10 #include "OnDiskInvertedLists.h"
11 
12 #include <pthread.h>
13 
14 #include <unordered_set>
15 
16 #include <sys/mman.h>
17 #include <unistd.h>
18 #include <sys/types.h>
19 
20 #include "FaissAssert.h"
21 
22 
23 namespace faiss {
24 
25 
26 /**********************************************
27  * LockLevels
28  **********************************************/
29 
30 
31 struct LockLevels {
32  /* There n times lock1(n), one lock2 and one lock3
33  * Invariants:
34  * a single thread can hold one lock1(n) for some n
35  * a single thread can hold lock2, if it holds lock1(n) for some n
36  * a single thread can hold lock3, if it holds lock1(n) for some n
37  * AND lock2 AND no other thread holds lock1(m) for m != n
38  */
39  pthread_mutex_t mutex1;
40  pthread_cond_t level1_cv;
41  pthread_cond_t level2_cv;
42  pthread_cond_t level3_cv;
43 
44  std::unordered_set<int> level1_holders; // which level1 locks are held
45  int n_level2; // nb threads that wait on level2
46  bool level3_in_use; // a threads waits on level3
47  bool level2_in_use;
48 
49  LockLevels() {
50  pthread_mutex_init(&mutex1, nullptr);
51  pthread_cond_init(&level1_cv, nullptr);
52  pthread_cond_init(&level2_cv, nullptr);
53  pthread_cond_init(&level3_cv, nullptr);
54  n_level2 = 0;
55  level2_in_use = false;
56  level3_in_use = false;
57  }
58 
59  ~LockLevels() {
60  pthread_cond_destroy(&level1_cv);
61  pthread_cond_destroy(&level2_cv);
62  pthread_cond_destroy(&level3_cv);
63  pthread_mutex_destroy(&mutex1);
64  }
65 
66  void lock_1(int no) {
67  pthread_mutex_lock(&mutex1);
68  while (level3_in_use || level1_holders.count(no) > 0) {
69  pthread_cond_wait(&level1_cv, &mutex1);
70  }
71  level1_holders.insert(no);
72  pthread_mutex_unlock(&mutex1);
73  }
74 
75  void unlock_1(int no) {
76  pthread_mutex_lock(&mutex1);
77  assert(level1_holders.count(no) == 1);
78  level1_holders.erase(no);
79  if (level3_in_use) { // a writer is waiting
80  pthread_cond_signal(&level3_cv);
81  } else {
82  pthread_cond_broadcast(&level1_cv);
83  }
84  pthread_mutex_unlock(&mutex1);
85  }
86 
87  void lock_2() {
88  pthread_mutex_lock(&mutex1);
89  n_level2 ++;
90  if (level3_in_use) { // tell waiting level3 that we are blocked
91  pthread_cond_signal(&level3_cv);
92  }
93  while (level2_in_use) {
94  pthread_cond_wait(&level2_cv, &mutex1);
95  }
96  level2_in_use = true;
97  pthread_mutex_unlock(&mutex1);
98  }
99 
100  void unlock_2() {
101  pthread_mutex_lock(&mutex1);
102  level2_in_use = false;
103  n_level2 --;
104  pthread_cond_signal(&level2_cv);
105  pthread_mutex_unlock(&mutex1);
106  }
107 
108  void lock_3() {
109  pthread_mutex_lock(&mutex1);
110  level3_in_use = true;
111  // wait until there are no level1 holders anymore except the
112  // ones that are waiting on level2 (we are holding lock2)
113  while (level1_holders.size() > n_level2) {
114  pthread_cond_wait(&level3_cv, &mutex1);
115  }
116  // don't release the lock!
117  }
118 
119  void unlock_3() {
120  level3_in_use = false;
121  // wake up all level1_holders
122  pthread_cond_broadcast(&level1_cv);
123  pthread_mutex_unlock(&mutex1);
124  }
125 
126  void print () {
127  pthread_mutex_lock(&mutex1);
128  printf("State: level3_in_use=%d n_level2=%d level1_holders: [", level3_in_use, n_level2);
129  for (int k : level1_holders) {
130  printf("%d ", k);
131  }
132  printf("]\n");
133  pthread_mutex_unlock(&mutex1);
134  }
135 
136 };
137 
138 /**********************************************
139  * OngoingPrefetch
140  **********************************************/
141 
143 
144  struct Thread {
145  pthread_t pth;
146  const OnDiskInvertedLists *od;
147  int64_t list_no;
148  };
149 
150  std::vector<Thread> threads;
151 
152  pthread_mutex_t mutex;
153 
154  // pretext to avoid code below to be optimized out
155  static int global_cs;
156 
157  const OnDiskInvertedLists *od;
158 
159  OngoingPrefetch (const OnDiskInvertedLists *od): od (od)
160  {
161  pthread_mutex_init (&mutex, nullptr);
162  }
163 
164  static void* prefetch_list (void * arg) {
165  Thread *th = static_cast<Thread*>(arg);
166 
167  size_t n = th->od->list_size(th->list_no);
168  const Index::idx_t *idx = th->od->get_ids(th->list_no);
169  const uint8_t *codes = th->od->get_codes(th->list_no);
170  int cs = 0;
171  for (size_t i = 0; i < n;i++) {
172  cs += idx[i];
173  }
174  const long *codes8 = (const long*)codes;
175  long n8 = n * th->od->code_size / 8;
176 
177  for (size_t i = 0; i < n8;i++) {
178  cs += codes8[i];
179  }
180  global_cs += cs & 1;
181  return nullptr;
182  }
183 
184  void prefetch_lists (const long *list_nos, int n) {
185  pthread_mutex_lock (&mutex);
186  for (auto &th: threads) {
187  if (th.list_no != -1) {
188  pthread_join (th.pth, nullptr);
189  }
190  }
191  threads.resize (n);
192  for (int i = 0; i < n; i++) {
193  long list_no = list_nos[i];
194  Thread & th = threads[i];
195  if (list_no >= 0 && od->list_size(list_no) > 0) {
196  th.list_no = list_no;
197  th.od = od;
198  pthread_create (&th.pth, nullptr, prefetch_list, &th);
199  } else {
200  th.list_no = -1;
201  }
202  }
203  pthread_mutex_unlock (&mutex);
204  }
205 
206  ~OngoingPrefetch () {
207  pthread_mutex_lock (&mutex);
208  for (auto &th: threads) {
209  if (th.list_no != -1) {
210  pthread_join (th.pth, nullptr);
211  }
212  }
213  pthread_mutex_unlock (&mutex);
214  pthread_mutex_destroy (&mutex);
215  }
216 
217 };
218 
219 int OnDiskInvertedLists::OngoingPrefetch::global_cs = 0;
220 
221 
222 void OnDiskInvertedLists::prefetch_lists (const long *list_nos, int n) const
223 {
224  pf->prefetch_lists (list_nos, n);
225 }
226 
227 
228 
229 /**********************************************
230  * OnDiskInvertedLists: mmapping
231  **********************************************/
232 
233 
234 void OnDiskInvertedLists::do_mmap ()
235 {
236  const char *rw_flags = read_only ? "r" : "rw+";
237  int prot = read_only ? PROT_READ : PROT_WRITE | PROT_READ;
238  FILE *f = fopen (filename.c_str(), rw_flags);
239  FAISS_THROW_IF_NOT_FMT (f, "could not open %s in mode %s: %s",
240  filename.c_str(), rw_flags, strerror(errno));
241 
242  ptr = (uint8_t*)mmap (nullptr, totsize,
243  prot, MAP_SHARED, fileno (f), 0);
244 
245  FAISS_THROW_IF_NOT_FMT (ptr != MAP_FAILED,
246  "could not mmap %s: %s",
247  filename.c_str(),
248  strerror(errno));
249  fclose (f);
250 
251 }
252 
253 void OnDiskInvertedLists::update_totsize (size_t new_size)
254 {
255 
256  // unmap file
257  if (ptr != nullptr) {
258  int err = munmap (ptr, totsize);
259  FAISS_THROW_IF_NOT_FMT (err == 0, "mumap error: %s",
260  strerror(errno));
261  }
262  if (totsize == 0) {
263  // must create file before truncating it
264  FILE *f = fopen (filename.c_str(), "w");
265  FAISS_THROW_IF_NOT_FMT (f, "could not open %s in mode W: %s",
266  filename.c_str(), strerror(errno));
267  fclose (f);
268  }
269 
270  if (new_size > totsize) {
271  if (!slots.empty() &&
272  slots.back().offset + slots.back().capacity == totsize) {
273  slots.back().capacity += new_size - totsize;
274  } else {
275  slots.push_back (Slot(totsize, new_size - totsize));
276  }
277  } else {
278  assert(!"not implemented");
279  }
280 
281  totsize = new_size;
282 
283  // create file
284  printf ("resizing %s to %ld bytes\n", filename.c_str(), totsize);
285 
286  int err = truncate (filename.c_str(), totsize);
287 
288  FAISS_THROW_IF_NOT_FMT (err == 0, "truncate %s to %ld: %s",
289  filename.c_str(), totsize,
290  strerror(errno));
291  do_mmap ();
292 }
293 
294 
295 
296 
297 
298 
299 /**********************************************
300  * OnDiskInvertedLists
301  **********************************************/
302 
303 #define INVALID_OFFSET (size_t)(-1)
304 
305 OnDiskInvertedLists::List::List ():
306  size (0), capacity (0), offset (INVALID_OFFSET)
307 {}
308 
309 OnDiskInvertedLists::Slot::Slot (size_t offset, size_t capacity):
310  offset (offset), capacity (capacity)
311 {}
312 
313 OnDiskInvertedLists::Slot::Slot ():
314  offset (0), capacity (0)
315 {}
316 
317 
318 
319 OnDiskInvertedLists::OnDiskInvertedLists (
320  size_t nlist, size_t code_size,
321  const char *filename):
322  InvertedLists (nlist, code_size),
323  filename (filename),
324  totsize (0),
325  ptr (nullptr),
326  read_only (false),
327  locks (new LockLevels ()),
328  pf (new OngoingPrefetch (this))
329 {
330  lists.resize (nlist);
331 
332  // slots starts empty
333 }
334 
335 OnDiskInvertedLists::OnDiskInvertedLists ():
336  InvertedLists (0, 0),
337  read_only (false),
338  locks (new LockLevels ()),
339  pf (new OngoingPrefetch (this))
340 {
341 }
342 
343 OnDiskInvertedLists::~OnDiskInvertedLists ()
344 {
345  delete pf;
346 
347  // unmap all lists
348  if (ptr != nullptr) {
349  int err = munmap (ptr, totsize);
350  FAISS_THROW_IF_NOT_FMT (err == 0,
351  "mumap error: %s",
352  strerror(errno));
353  }
354  delete locks;
355 }
356 
357 
358 
359 
360 size_t OnDiskInvertedLists::list_size(size_t list_no) const
361 {
362  return lists[list_no].size;
363 }
364 
365 
366 const uint8_t * OnDiskInvertedLists::get_codes (size_t list_no) const
367 {
368  return ptr + lists[list_no].offset;
369 }
370 
371 const Index::idx_t * OnDiskInvertedLists::get_ids (size_t list_no) const
372 {
373  return (const idx_t*)(ptr + lists[list_no].offset +
374  code_size * lists[list_no].capacity);
375 }
376 
377 
378 void OnDiskInvertedLists::update_entries (
379  size_t list_no, size_t offset, size_t n_entry,
380  const idx_t *ids_in, const uint8_t *codes_in)
381 {
382  FAISS_THROW_IF_NOT (!read_only);
383  if (n_entry == 0) return;
384  const List & l = lists[list_no];
385  assert (n_entry + offset <= l.size);
386  idx_t *ids = const_cast<idx_t*>(get_ids (list_no));
387  memcpy (ids + offset, ids_in, sizeof(ids_in[0]) * n_entry);
388  uint8_t *codes = const_cast<uint8_t*>(get_codes (list_no));
389  memcpy (codes + offset * code_size, codes_in, code_size * n_entry);
390 }
391 
392 size_t OnDiskInvertedLists::add_entries (
393  size_t list_no, size_t n_entry,
394  const idx_t* ids, const uint8_t *code)
395 {
396  FAISS_THROW_IF_NOT (!read_only);
397  locks->lock_1 (list_no);
398  size_t o = list_size (list_no);
399  resize_locked (list_no, n_entry + o);
400  update_entries (list_no, o, n_entry, ids, code);
401  locks->unlock_1 (list_no);
402  return o;
403 }
404 
405 void OnDiskInvertedLists::resize (size_t list_no, size_t new_size)
406 {
407  FAISS_THROW_IF_NOT (!read_only);
408  locks->lock_1 (list_no);
409  resize_locked (list_no, new_size);
410  locks->unlock_1 (list_no);
411 }
412 
413 
414 
415 void OnDiskInvertedLists::resize_locked (size_t list_no, size_t new_size)
416 {
417  List & l = lists[list_no];
418 
419  if (new_size <= l.capacity &&
420  new_size > l.capacity / 2) {
421  l.size = new_size;
422  return;
423  }
424 
425  // otherwise we release the current slot, and find a new one
426 
427  locks->lock_2 ();
428  free_slot (l.offset, l.capacity);
429 
430  List new_l;
431 
432  if (new_size == 0) {
433  new_l = List();
434  } else {
435  new_l.size = new_size;
436  new_l.capacity = 1;
437  while (new_l.capacity < new_size) {
438  new_l.capacity *= 2;
439  }
440  new_l.offset = allocate_slot (
441  new_l.capacity * (sizeof(idx_t) + code_size));
442  }
443 
444  // copy common data
445  if (l.offset != new_l.offset) {
446  size_t n = std::min (new_size, l.size);
447  if (n > 0) {
448  memcpy (ptr + new_l.offset, get_codes(list_no), n * code_size);
449  memcpy (ptr + new_l.offset + new_l.capacity * code_size,
450  get_ids (list_no), n * sizeof(idx_t));
451  }
452  }
453 
454  lists[list_no] = new_l;
455  locks->unlock_2 ();
456 }
457 
458 size_t OnDiskInvertedLists::allocate_slot (size_t capacity) {
459  // should hold lock2
460 
461  auto it = slots.begin();
462  while (it != slots.end() && it->capacity < capacity) {
463  it++;
464  }
465 
466  if (it == slots.end()) {
467  // not enough capacity
468  size_t new_size = totsize == 0 ? 32 : totsize * 2;
469  while (new_size - totsize < capacity)
470  new_size *= 2;
471  locks->lock_3 ();
472  update_totsize(new_size);
473  locks->unlock_3 ();
474  it = slots.begin();
475  while (it != slots.end() && it->capacity < capacity) {
476  it++;
477  }
478  assert (it != slots.end());
479  }
480 
481  size_t o = it->offset;
482  if (it->capacity == capacity) {
483  slots.erase (it);
484  } else {
485  // take from beginning of slot
486  it->capacity -= capacity;
487  it->offset += capacity;
488  }
489 
490  return o;
491 }
492 
493 
494 
495 void OnDiskInvertedLists::free_slot (size_t offset, size_t capacity) {
496 
497  // should hold lock2
498  if (capacity == 0) return;
499 
500  auto it = slots.begin();
501  while (it != slots.end() && it->offset <= offset) {
502  it++;
503  }
504 
505  size_t inf = 1UL << 60;
506 
507  size_t end_prev = inf;
508  if (it != slots.begin()) {
509  auto prev = it;
510  prev--;
511  end_prev = prev->offset + prev->capacity;
512  }
513 
514  size_t begin_next = 1L << 60;
515  if (it != slots.end()) {
516  begin_next = it->offset;
517  }
518 
519  assert (end_prev == inf || offset >= end_prev);
520  assert (offset + capacity <= begin_next);
521 
522  if (offset == end_prev) {
523  auto prev = it;
524  prev--;
525  if (offset + capacity == begin_next) {
526  prev->capacity += capacity + it->capacity;
527  slots.erase (it);
528  } else {
529  prev->capacity += capacity;
530  }
531  } else {
532  if (offset + capacity == begin_next) {
533  it->offset -= capacity;
534  it->capacity += capacity;
535  } else {
536  slots.insert (it, Slot (offset, capacity));
537  }
538  }
539 
540  // TODO shrink global storage if needed
541 }
542 
543 
544 /*****************************************
545  * Compact form
546  *****************************************/
547 
548 size_t OnDiskInvertedLists::merge_from (const InvertedLists **ils, int n_il)
549 {
550  FAISS_THROW_IF_NOT_MSG (totsize == 0, "works only on an empty InvertedLists");
551 
552  std::vector<size_t> sizes (nlist);
553  for (int i = 0; i < n_il; i++) {
554  const InvertedLists *il = ils[i];
555  FAISS_THROW_IF_NOT (il->nlist == nlist && il->code_size == code_size);
556 
557  for (size_t j = 0; j < nlist; j++) {
558  sizes [j] += il->list_size(j);
559  }
560  }
561 
562  size_t cums = 0;
563  size_t ntotal = 0;
564  for (size_t j = 0; j < nlist; j++) {
565  ntotal += sizes[j];
566  lists[j].size = 0;
567  lists[j].capacity = sizes[j];
568  lists[j].offset = cums;
569  cums += lists[j].capacity * (sizeof(idx_t) + code_size);
570  }
571 
572  update_totsize (cums);
573 
574 #pragma omp parallel for
575  for (size_t j = 0; j < nlist; j++) {
576  List & l = lists[j];
577  for (int i = 0; i < n_il; i++) {
578  const InvertedLists *il = ils[i];
579  size_t n_entry = il->list_size(j);
580  l.size += n_entry;
581  update_entries (j, l.size - n_entry, n_entry,
582  il->get_ids(j),
583  il->get_codes(j));
584  }
585  assert (l.size == l.capacity);
586  }
587 
588  return ntotal;
589 }
590 
591 
592 
593 
594 
595 
596 } // namespace faiss
const idx_t * get_ids(size_t list_no) const override
size_t code_size
code size per vector in bytes
Definition: IndexIVF.h:71
long idx_t
all indices are this type
Definition: Index.h:62
const uint8_t * get_codes(size_t list_no) const override
size_t list_size(size_t list_no) const override
get the size of a list
size_t nlist
number of possible key values
Definition: IndexIVF.h:70
void prefetch_lists(const long *list_nos, int nlist) const override