#include <IndexIVFPQ.h>

Inheritance diagram for faiss::IndexIVFPQ:

Public Member Functions
	IndexIVFPQ (Index *quantizer, size_t d, size_t nlist, size_t M, size_t nbits_per_idx)

virtual void	set_typename () override

virtual void	add_with_ids (idx_t n, const float x, const long xids=nullptr) override

void	add_core_o (idx_t n, const float x, const long xids, float residuals_2, const long precomputed_idx=nullptr)

virtual void	search (idx_t n, const float x, idx_t k, float distances, idx_t *labels) const override

virtual void	reset () override
	removes all elements from the database.

virtual long	remove_ids (const IDSelector &sel) override

virtual void	train_residual (idx_t n, const float *x) override
	trains the product quantizer

void	train_residual_o (idx_t n, const float x, float residuals_2)
	same as train_residual, also output 2nd level residuals

virtual void	reconstruct_n (idx_t i0, idx_t ni, float *recons) const override

virtual void	reconstruct (idx_t key, float *recons) const override

size_t	find_duplicates (idx_t ids, size_t lims) const

void	encode (long key, const float x, uint8_t code) const

void	encode_multiple (size_t n, const long keys, const float x, uint8_t *codes) const
	same as encode, for multiple points at once

virtual void	search_knn_with_key (size_t nx, const float qx, const long keys, const float coarse_dis, float_maxheap_array_t res, bool store_pairs=false) const

void	precompute_table ()
	build precomputed table More...

virtual void	merge_from_residuals (IndexIVF &other) override
	used to implement merging

void	copy_subset_to (IndexIVFPQ &other, int subset_type, long a1, long a2) const

Public Member Functions inherited from faiss::IndexIVF
	IndexIVF (Index *quantizer, size_t d, size_t nlist, MetricType metric=METRIC_INNER_PRODUCT)

virtual void	train (idx_t n, const float *x) override
	Trains the quantizer and calls train_residual to train sub-quantizers.

virtual void	add (idx_t n, const float *x) override
	Quantizes x and calls add_with_key.

virtual void	merge_from (IndexIVF &other, idx_t add_id)

void	make_direct_map ()
	intialize a direct map

double	imbalance_factor () const
	1= perfectly balanced, >1: imbalanced

void	print_stats () const
	display some stats about the inverted lists

Public Member Functions inherited from faiss::Index
	Index (idx_t d=0, MetricType metric=METRIC_INNER_PRODUCT)

virtual void	range_search (idx_t n, const float x, float radius, RangeSearchResult result) const

void	assign (idx_t n, const float x, idx_t labels, idx_t k=1)

void	compute_residual (const float x, float residual, idx_t key) const

void	display () const

virtual std::string	get_typename () const

Public Attributes
bool	by_residual
	Encode residual or plain vector?

int	use_precomputed_table
	if by_residual, build precompute tables

size_t	code_size
	code size per vector in bytes

ProductQuantizer	pq
	produces the codes

bool	do_polysemous_training
	reorder PQ centroids after training?

PolysemousTraining *	polysemous_training
	if NULL, use default

size_t	scan_table_threshold
	use table computation or on-the-fly?

size_t	max_codes
	max nb of codes to visit to do a query

int	polysemous_ht
	Hamming thresh for polysemous filtering.

std::vector< std::vector < uint8_t > >	codes

std::vector< float >	precomputed_table

Public Attributes inherited from faiss::IndexIVF
size_t	nlist
	number of possible key values

size_t	nprobe
	number of probes at query time

Index *	quantizer
	quantizer that maps vectors to inverted lists

bool	quantizer_trains_alone
	just pass over the trainset to quantizer

bool	own_fields
	whether object owns the quantizer

ClusteringParameters	cp
	to override default clustering params

std::vector< std::vector< long > >	ids
	Inverted lists for indexes.

bool	maintain_direct_map
	map for direct access to the elements. Enables reconstruct().

std::vector< long >	direct_map

Public Attributes inherited from faiss::Index
std::string	index_typename

int	d
	vector dimension

idx_t	ntotal
	total nb of indexed vectors

bool	verbose
	verbosity level

bool	is_trained
	set if the Index does not require training, or if training is done already

MetricType	metric_type
	type of metric this index uses for search

Additional Inherited Members
Public Types inherited from faiss::Index
typedef long	idx_t
	all indices are this type

Detailed Description

Inverted file with Product Quantizer encoding. Each residual vector is encoded as a product quantizer code.

Definition at line 30 of file IndexIVFPQ.h.

Member Function Documentation

void faiss::IndexIVFPQ::add_core_o	(	idx_t	n,
		const float *	x,
		const long *	xids,
		float *	residuals_2,
		const long *	precomputed_idx = `nullptr`
	)

same as add_core, also:

output 2nd level residuals if residuals_2 != NULL
use precomputed list numbers if precomputed_idx != NULL

Definition at line 176 of file IndexIVFPQ.cpp.

void faiss::IndexIVFPQ::add_with_ids	(	idx_t	n,
		const float *	x,
		const long *	xids = `nullptr`
	)

overridevirtual

Same as add, but stores xids instead of sequential ids.

The default implementation fails with an assertion, as it is not supported by all indexes.

Parameters

xids	if non-null, ids to store for the vectors (size n)

Reimplemented from faiss::Index.

Reimplemented in faiss::IndexIVFPQR.

Definition at line 170 of file IndexIVFPQ.cpp.

void faiss::IndexIVFPQ::copy_subset_to	(	IndexIVFPQ &	other,
		int	subset_type,
		long	a1,
		long	a2
	)		const

copy a subset of the entries index to the other index

if subset_type == 0: copies ids in [a1, a2) if subset_type == 1: copies ids if id % a1 == a2

Definition at line 317 of file IndexIVFPQ.cpp.

size_t faiss::IndexIVFPQ::find_duplicates	(	idx_t *	ids,
		size_t *	lims
	)		const

Find exact duplicates in the dataset.

the duplicates are returned in pre-allocated arrays (see the max sizes).

lims limits between groups of duplicates (max size ntotal / 2 + 1) ids ids[lims[i]] : ids[lims[i+1]-1] is a group of duplicates (max size ntotal)

Returns: n number of groups found

Definition at line 1130 of file IndexIVFPQ.cpp.

void faiss::IndexIVFPQ::precompute_table ( )

build precomputed table

Precomputed tables for residuals

During IVFPQ search with by_residual, we compute

d = || x - y_C - y_R ||^2

where x is the query vector, y_C the coarse centroid, y_R the refined PQ centroid. The expression can be decomposed as:

d = || x - y_C ||^2 + || y_R ||^2 + 2 * (y_C|y_R) - 2 * (x|y_R)

term 1 term 2 term 3

When using multiprobe, we use the following decomposition:

term 1 is the distance to the coarse centroid, that is computed during the 1st stage search.
term 2 can be precomputed, as it does not involve x. However, because of the PQ, it needs nlist * M * ksub storage. This is why use_precomputed_table is off by default
term 3 is the classical non-residual distance table.

Since y_R defined by a product quantizer, it is split across subvectors and stored separately for each subvector. If the coarse quantizer is a MultiIndexQuantizer then the table can be stored more compactly.

At search time, the tables for term 2 and term 3 are added up. This is faster when the length of the lists is > ksub * M.

Definition at line 376 of file IndexIVFPQ.cpp.

void faiss::IndexIVFPQ::reconstruct	(	idx_t	key,
		float *	recons
	)		const

overridevirtual

Reconstruct a stored vector (or an approximation if lossy coding)

this function may not be defined for some indexes

Parameters

key	id of the vector to reconstruct
recons	reconstucted vector (size d)

Reimplemented from faiss::Index.

Definition at line 287 of file IndexIVFPQ.cpp.

void faiss::IndexIVFPQ::reconstruct_n	(	idx_t	i0,
		idx_t	ni,
		float *	recons
	)		const

overridevirtual

Reconstruct a subset of the indexed vectors

Parameters

i0	first vector to reconstruct
ni	nb of vectors to reconstruct
recons	output array of reconstructed vectors, size ni * d

Reimplemented from faiss::Index.

Reimplemented in faiss::IndexIVFPQR.

Definition at line 258 of file IndexIVFPQ.cpp.

long faiss::IndexIVFPQ::remove_ids ( const IDSelector & sel )

overridevirtual

removes IDs from the index. Not supported by all indexes

Reimplemented from faiss::Index.

Reimplemented in faiss::IndexIVFPQR.

Definition at line 1073 of file IndexIVFPQ.cpp.

void faiss::IndexIVFPQ::search	(	idx_t	n,
		const float *	x,
		idx_t	k,
		float *	distances,
		idx_t *	labels
	)		const

overridevirtual

query n vectors of dimension d to the index.

return at most k vectors. If there are not enough results for a query, the result array is padded with -1s.

Parameters

x	input vectors to search, size n * d
labels	output labels of the NNs, size n*k
distances	output pairwise distances, size n*k

Implements faiss::Index.

Reimplemented in faiss::IndexIVFPQR.

Definition at line 1045 of file IndexIVFPQ.cpp.

void faiss::IndexIVFPQ::search_knn_with_key	(	size_t	nx,
		const float *	qx,
		const long *	keys,
		const float *	coarse_dis,
		float_maxheap_array_t *	res,
		bool	store_pairs = `false`
	)		const

virtual

search a set of vectors, that are pre-quantized by the IVF quantizer. Fill in the corresponding heaps with the query results.

Parameters

nx	nb of vectors to query
qx	query vectors, size nx * d
keys	coarse quantization indices, size nx * nprobe
coarse_dis	distances to coarse centroids, size nx * nprobe
res	heaps for all the results, gives the nprobe
store_pairs	store inv list index + inv list offset instead in upper/lower 32 bit of result, instead of ids (used for reranking).

Reimplemented in faiss::IndexIVFPQCompact.

Definition at line 946 of file IndexIVFPQ.cpp.

Member Data Documentation

std::vector<float> faiss::IndexIVFPQ::precomputed_table

if use_precompute_table size nlist * pq.M * pq.ksub

Definition at line 48 of file IndexIVFPQ.h.

The documentation for this struct was generated from the following files:

Public Member Functions

Public Attributes

Additional Inherited Members

Detailed Description

Member Function Documentation

Member Data Documentation