335 lines
10 KiB
C++
335 lines
10 KiB
C++
/**
|
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
*
|
|
* This source code is licensed under the MIT license found in the
|
|
* LICENSE file in the root directory of this source tree.
|
|
*/
|
|
|
|
// -*- c++ -*-
|
|
|
|
#ifndef FAISS_INVERTEDLISTS_IVF_H
|
|
#define FAISS_INVERTEDLISTS_IVF_H
|
|
|
|
/**
|
|
* Definition of inverted lists + a few common classes that implement
|
|
* the interface.
|
|
*/
|
|
|
|
#include <vector>
|
|
#include "Index.h"
|
|
|
|
|
|
namespace faiss {
|
|
|
|
/** Table of inverted lists
|
|
* multithreading rules:
|
|
* - concurrent read accesses are allowed
|
|
* - concurrent update accesses are allowed
|
|
* - for resize and add_entries, only concurrent access to different lists
|
|
* are allowed
|
|
*/
|
|
struct InvertedLists {
|
|
typedef Index::idx_t idx_t;
|
|
|
|
size_t nlist; ///< number of possible key values
|
|
size_t code_size; ///< code size per vector in bytes
|
|
|
|
InvertedLists (size_t nlist, size_t code_size);
|
|
|
|
/*************************
|
|
* Read only functions */
|
|
|
|
/// get the size of a list
|
|
virtual size_t list_size(size_t list_no) const = 0;
|
|
|
|
/** get the codes for an inverted list
|
|
* must be released by release_codes
|
|
*
|
|
* @return codes size list_size * code_size
|
|
*/
|
|
virtual const uint8_t * get_codes (size_t list_no) const = 0;
|
|
|
|
/** get the ids for an inverted list
|
|
* must be released by release_ids
|
|
*
|
|
* @return ids size list_size
|
|
*/
|
|
virtual const idx_t * get_ids (size_t list_no) const = 0;
|
|
|
|
/// release codes returned by get_codes (default implementation is nop
|
|
virtual void release_codes (size_t list_no, const uint8_t *codes) const;
|
|
|
|
/// release ids returned by get_ids
|
|
virtual void release_ids (size_t list_no, const idx_t *ids) const;
|
|
|
|
/// @return a single id in an inverted list
|
|
virtual idx_t get_single_id (size_t list_no, size_t offset) const;
|
|
|
|
/// @return a single code in an inverted list
|
|
/// (should be deallocated with release_codes)
|
|
virtual const uint8_t * get_single_code (
|
|
size_t list_no, size_t offset) const;
|
|
|
|
/// prepare the following lists (default does nothing)
|
|
/// a list can be -1 hence the signed long
|
|
virtual void prefetch_lists (const idx_t *list_nos, int nlist) const;
|
|
|
|
/*************************
|
|
* writing functions */
|
|
|
|
/// add one entry to an inverted list
|
|
virtual size_t add_entry (size_t list_no, idx_t theid,
|
|
const uint8_t *code);
|
|
|
|
virtual size_t add_entries (
|
|
size_t list_no, size_t n_entry,
|
|
const idx_t* ids, const uint8_t *code) = 0;
|
|
|
|
virtual void update_entry (size_t list_no, size_t offset,
|
|
idx_t id, const uint8_t *code);
|
|
|
|
virtual void update_entries (size_t list_no, size_t offset, size_t n_entry,
|
|
const idx_t *ids, const uint8_t *code) = 0;
|
|
|
|
virtual void resize (size_t list_no, size_t new_size) = 0;
|
|
|
|
virtual void reset ();
|
|
|
|
/// move all entries from oivf (empty on output)
|
|
void merge_from (InvertedLists *oivf, size_t add_id);
|
|
|
|
virtual ~InvertedLists ();
|
|
|
|
/*************************
|
|
* statistics */
|
|
|
|
/// 1= perfectly balanced, >1: imbalanced
|
|
double imbalance_factor () const;
|
|
|
|
/// display some stats about the inverted lists
|
|
void print_stats () const;
|
|
|
|
/// sum up list sizes
|
|
size_t compute_ntotal () const;
|
|
|
|
/**************************************
|
|
* Scoped inverted lists (for automatic deallocation)
|
|
*
|
|
* instead of writing:
|
|
*
|
|
* uint8_t * codes = invlists->get_codes (10);
|
|
* ... use codes
|
|
* invlists->release_codes(10, codes)
|
|
*
|
|
* write:
|
|
*
|
|
* ScopedCodes codes (invlists, 10);
|
|
* ... use codes.get()
|
|
* // release called automatically when codes goes out of scope
|
|
*
|
|
* the following function call also works:
|
|
*
|
|
* foo (123, ScopedCodes (invlists, 10).get(), 456);
|
|
*
|
|
*/
|
|
|
|
struct ScopedIds {
|
|
const InvertedLists *il;
|
|
const idx_t *ids;
|
|
size_t list_no;
|
|
|
|
ScopedIds (const InvertedLists *il, size_t list_no):
|
|
il (il), ids (il->get_ids (list_no)), list_no (list_no)
|
|
{}
|
|
|
|
const idx_t *get() {return ids; }
|
|
|
|
idx_t operator [] (size_t i) const {
|
|
return ids[i];
|
|
}
|
|
|
|
~ScopedIds () {
|
|
il->release_ids (list_no, ids);
|
|
}
|
|
};
|
|
|
|
struct ScopedCodes {
|
|
const InvertedLists *il;
|
|
const uint8_t *codes;
|
|
size_t list_no;
|
|
|
|
ScopedCodes (const InvertedLists *il, size_t list_no):
|
|
il (il), codes (il->get_codes (list_no)), list_no (list_no)
|
|
{}
|
|
|
|
ScopedCodes (const InvertedLists *il, size_t list_no, size_t offset):
|
|
il (il), codes (il->get_single_code (list_no, offset)),
|
|
list_no (list_no)
|
|
{}
|
|
|
|
const uint8_t *get() {return codes; }
|
|
|
|
~ScopedCodes () {
|
|
il->release_codes (list_no, codes);
|
|
}
|
|
};
|
|
|
|
|
|
};
|
|
|
|
|
|
/// simple (default) implementation as an array of inverted lists
|
|
struct ArrayInvertedLists: InvertedLists {
|
|
std::vector < std::vector<uint8_t> > codes; // binary codes, size nlist
|
|
std::vector < std::vector<idx_t> > ids; ///< Inverted lists for indexes
|
|
|
|
ArrayInvertedLists (size_t nlist, size_t code_size);
|
|
|
|
size_t list_size(size_t list_no) const override;
|
|
const uint8_t * get_codes (size_t list_no) const override;
|
|
const idx_t * get_ids (size_t list_no) const override;
|
|
|
|
size_t add_entries (
|
|
size_t list_no, size_t n_entry,
|
|
const idx_t* ids, const uint8_t *code) override;
|
|
|
|
void update_entries (size_t list_no, size_t offset, size_t n_entry,
|
|
const idx_t *ids, const uint8_t *code) override;
|
|
|
|
void resize (size_t list_no, size_t new_size) override;
|
|
|
|
virtual ~ArrayInvertedLists ();
|
|
};
|
|
|
|
/*****************************************************************
|
|
* Meta-inverted lists
|
|
*
|
|
* About terminology: the inverted lists are seen as a sparse matrix,
|
|
* that can be stacked horizontally, vertically and sliced.
|
|
*****************************************************************/
|
|
|
|
struct ReadOnlyInvertedLists: InvertedLists {
|
|
|
|
ReadOnlyInvertedLists (size_t nlist, size_t code_size):
|
|
InvertedLists (nlist, code_size) {}
|
|
|
|
size_t add_entries (
|
|
size_t list_no, size_t n_entry,
|
|
const idx_t* ids, const uint8_t *code) override;
|
|
|
|
void update_entries (size_t list_no, size_t offset, size_t n_entry,
|
|
const idx_t *ids, const uint8_t *code) override;
|
|
|
|
void resize (size_t list_no, size_t new_size) override;
|
|
|
|
};
|
|
|
|
|
|
/// Horizontal stack of inverted lists
|
|
struct HStackInvertedLists: ReadOnlyInvertedLists {
|
|
|
|
std::vector<const InvertedLists *>ils;
|
|
|
|
/// build InvertedLists by concatenating nil of them
|
|
HStackInvertedLists (int nil, const InvertedLists **ils);
|
|
|
|
size_t list_size(size_t list_no) const override;
|
|
const uint8_t * get_codes (size_t list_no) const override;
|
|
const idx_t * get_ids (size_t list_no) const override;
|
|
|
|
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
|
|
|
|
void release_codes (size_t list_no, const uint8_t *codes) const override;
|
|
void release_ids (size_t list_no, const idx_t *ids) const override;
|
|
|
|
idx_t get_single_id (size_t list_no, size_t offset) const override;
|
|
|
|
const uint8_t * get_single_code (
|
|
size_t list_no, size_t offset) const override;
|
|
|
|
};
|
|
|
|
using ConcatenatedInvertedLists = HStackInvertedLists;
|
|
|
|
|
|
/// vertical slice of indexes in another InvertedLists
|
|
struct SliceInvertedLists: ReadOnlyInvertedLists {
|
|
const InvertedLists *il;
|
|
idx_t i0, i1;
|
|
|
|
SliceInvertedLists(const InvertedLists *il, idx_t i0, idx_t i1);
|
|
|
|
size_t list_size(size_t list_no) const override;
|
|
const uint8_t * get_codes (size_t list_no) const override;
|
|
const idx_t * get_ids (size_t list_no) const override;
|
|
|
|
void release_codes (size_t list_no, const uint8_t *codes) const override;
|
|
void release_ids (size_t list_no, const idx_t *ids) const override;
|
|
|
|
idx_t get_single_id (size_t list_no, size_t offset) const override;
|
|
|
|
const uint8_t * get_single_code (
|
|
size_t list_no, size_t offset) const override;
|
|
|
|
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
|
|
};
|
|
|
|
|
|
struct VStackInvertedLists: ReadOnlyInvertedLists {
|
|
std::vector<const InvertedLists *>ils;
|
|
std::vector<idx_t> cumsz;
|
|
|
|
/// build InvertedLists by concatenating nil of them
|
|
VStackInvertedLists (int nil, const InvertedLists **ils);
|
|
|
|
size_t list_size(size_t list_no) const override;
|
|
const uint8_t * get_codes (size_t list_no) const override;
|
|
const idx_t * get_ids (size_t list_no) const override;
|
|
|
|
void release_codes (size_t list_no, const uint8_t *codes) const override;
|
|
void release_ids (size_t list_no, const idx_t *ids) const override;
|
|
|
|
idx_t get_single_id (size_t list_no, size_t offset) const override;
|
|
|
|
const uint8_t * get_single_code (
|
|
size_t list_no, size_t offset) const override;
|
|
|
|
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
|
|
|
|
};
|
|
|
|
|
|
/** use the first inverted lists if they are non-empty otherwise use the second
|
|
*
|
|
* This is useful if il1 has a few inverted lists that are too long,
|
|
* and that il0 has replacement lists for those, with empty lists for
|
|
* the others. */
|
|
struct MaskedInvertedLists: ReadOnlyInvertedLists {
|
|
|
|
const InvertedLists *il0;
|
|
const InvertedLists *il1;
|
|
|
|
MaskedInvertedLists (const InvertedLists *il0,
|
|
const InvertedLists *il1);
|
|
|
|
size_t list_size(size_t list_no) const override;
|
|
const uint8_t * get_codes (size_t list_no) const override;
|
|
const idx_t * get_ids (size_t list_no) const override;
|
|
|
|
void release_codes (size_t list_no, const uint8_t *codes) const override;
|
|
void release_ids (size_t list_no, const idx_t *ids) const override;
|
|
|
|
idx_t get_single_id (size_t list_no, size_t offset) const override;
|
|
|
|
const uint8_t * get_single_code (
|
|
size_t list_no, size_t offset) const override;
|
|
|
|
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
|
|
|
|
};
|
|
|
|
} // namespace faiss
|
|
|
|
|
|
#endif
|