diff --git a/contrib/ivf_tools.py b/contrib/ivf_tools.py new file mode 100644 index 000000000..968e39f0a --- /dev/null +++ b/contrib/ivf_tools.py @@ -0,0 +1,25 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import numpy as np +import faiss + +def get_invlist(invlists, l): + """ returns the inverted lists content. """ + ls = invlists.list_size(l) + list_ids = np.zeros(ls, dtype='int64') + ids = codes = None + try: + ids = invlists.get_ids(l) + faiss.memcpy(faiss.swig_ptr(list_ids), ids, list_ids.nbytes) + codes = invlists.get_codes(l) + list_codes = np.zeros((ls, invlists.code_size), dtype='uint8') + faiss.memcpy(faiss.swig_ptr(list_codes), codes, list_codes.nbytes) + finally: + if ids is not None: + invlists.release_ids(l, ids) + if codes is not None: + invlists.release_codes(l, codes) + return list_ids, list_codes diff --git a/faiss/OnDiskInvertedLists.cpp b/faiss/OnDiskInvertedLists.cpp index e38cb6c85..69fa5882c 100644 --- a/faiss/OnDiskInvertedLists.cpp +++ b/faiss/OnDiskInvertedLists.cpp @@ -678,6 +678,18 @@ void OnDiskInvertedLists::crop_invlists(size_t l0, size_t l1) nlist = l1 - l0; } + +void OnDiskInvertedLists::set_all_lists_sizes(const size_t *sizes) +{ + size_t ofs = 0; + for (size_t i = 0; i < nlist; i++) { + lists[i].offset = ofs; + lists[i].capacity = lists[i].size = sizes[i]; + ofs += sizes[i] * (sizeof(idx_t) + code_size); + } + +} + /******************************************************* * I/O support via callbacks *******************************************************/ @@ -755,7 +767,9 @@ InvertedLists * OnDiskInvertedListsIOHook::read(IOReader *f, int io_flags) const } READ1(od->totsize); - od->do_mmap(); + if (!(io_flags & IO_FLAG_SKIP_IVF_DATA)) { + od->do_mmap(); + } return od; } diff --git a/faiss/OnDiskInvertedLists.h b/faiss/OnDiskInvertedLists.h index 8edc2a6ab..bb143e387 100644 --- a/faiss/OnDiskInvertedLists.h +++ b/faiss/OnDiskInvertedLists.h @@ -126,6 +126,9 @@ struct OnDiskInvertedLists: InvertedLists { size_t allocate_slot (size_t capacity); void free_slot (size_t offset, size_t capacity); + /// override all list sizes and make a packed storage + void set_all_lists_sizes(const size_t *sizes); + // empty constructor for the I/O functions OnDiskInvertedLists (); }; diff --git a/faiss/index_io.h b/faiss/index_io.h index 5019a9461..a8a9eed07 100644 --- a/faiss/index_io.h +++ b/faiss/index_io.h @@ -51,7 +51,7 @@ const int IO_FLAG_READ_ONLY = 2; const int IO_FLAG_ONDISK_SAME_DIR = 4; // don't load IVF data to RAM, only list sizes const int IO_FLAG_SKIP_IVF_DATA = 8; -// try to memmap data (useful for OnDiskInvertedLists) +// try to memmap data (useful to load an ArrayInvertedLists as an OnDiskInvertedLists) const int IO_FLAG_MMAP = IO_FLAG_SKIP_IVF_DATA | 0x646f0000; diff --git a/faiss/python/swigfaiss.swig b/faiss/python/swigfaiss.swig index f3822e082..aff45087c 100644 --- a/faiss/python/swigfaiss.swig +++ b/faiss/python/swigfaiss.swig @@ -174,7 +174,9 @@ namespace std { T * data(); size_t size(); T at (size_t n) const; + T & operator [] (size_t n); void resize (size_t n); + void swap (vector & other); }; };