Classes
struct	AutoTuneCriterion

struct	OneRecallAtRCriterion

struct	IntersectionCriterion

struct	OperatingPoint

struct	OperatingPoints

struct	ParameterRange
	possible values of a parameter, sorted from least to most expensive/accurate More...

struct	ParameterSpace

struct	RangeSearchResult

struct	IDSelector

struct	IDSelectorRange

struct	IDSelectorBatch

struct	BufferList

struct	RangeSearchPartialResult
	the entries in the buffers are split per query More...

struct	ClusteringParameters

struct	Clustering

struct	HammingComputer4

struct	HammingComputer8

struct	HammingComputer16

struct	HammingComputer20

struct	HammingComputer32

struct	HammingComputer64

struct	HammingComputerM8

struct	HammingComputerM4

struct	HammingComputer

struct	GenHammingComputer8

struct	GenHammingComputer16

struct	GenHammingComputer32

struct	GenHammingComputerM8

struct	CMax

struct	CMin

struct	HeapArray

struct	Index

struct	Cloner

struct	IndexFlat

struct	IndexFlatIP

struct	IndexFlatL2

struct	IndexFlatL2BaseShift

struct	IndexRefineFlat

struct	IndexFlat1D
	optimized version for 1D "vectors" More...

struct	IndexIVF

struct	IndexIVFFlat

struct	CodeCmp

struct	IndexIVFPQ

struct	IndexIVFPQStats

struct	IndexIVFPQR

struct	IndexIVFPQCompact

struct	IndexLSH

struct	ArgSort

struct	SortedArray

struct	SemiSortedArray

struct	MinSumK

struct	IndexPQ

struct	IndexPQStats

struct	MultiIndexQuantizer

struct	IndexIDMap

struct	IndexShards

struct	IndexSplitVectors

struct	Score3Computer

struct	IndirectSort

struct	RankingScore2

struct	SimulatedAnnealingParameters
	parameters used for the simulated annealing method More...

struct	PermutationObjective
	abstract class for the loss function More...

struct	ReproduceDistancesObjective

struct	SimulatedAnnealingOptimizer
	Simulated annealing optimization algorithm for permutations. More...

struct	PolysemousTraining
	optimizes the order of indices in a ProductQuantizer More...

struct	ProductQuantizer

struct	NopDistanceCorrection

struct	BaseShiftDistanceCorrection

struct	RandomGenerator
	random generator that can be used in multithreaded contexts More...

struct	VectorTransform

struct	LinearTransform

struct	RandomRotationMatrix
	Randomly rotate a set of vectors. More...

struct	PCAMatrix

struct	OPQMatrix

struct	RemapDimensionsTransform

struct	IndexPreTransform

Typedefs
typedef HeapArray< CMin< float, long > >	float_minheap_array_t

typedef HeapArray< CMin< int, long > >	int_minheap_array_t

typedef HeapArray< CMax< float, long > >	float_maxheap_array_t

typedef HeapArray< CMax< int, long > >	int_maxheap_array_t

Enumerations
enum	MetricType { METRIC_INNER_PRODUCT = 0, METRIC_L2 = 1 }
	Some algorithms support both an inner product vetsion and a L2 search version.

Functions
Index *	index_factory (int d, const char *description_in, MetricType metric)

float	kmeans_clustering (size_t d, size_t n, size_t k, const float x, float centroids)

template<size_t nbits, typename T >
T	hamming (const uint8_t bs1, const uint8_t bs2)

template<size_t nbits>
hamdis_t	hamming (const uint64_t bs1, const uint64_t bs2)

template<>
hamdis_t	hamming< 64 > (const uint64_t pa, const uint64_t pb)

template<>
hamdis_t	hamming< 128 > (const uint64_t pa, const uint64_t pb)

template<>
hamdis_t	hamming< 256 > (const uint64_t pa, const uint64_t pb)

hamdis_t	hamming (const uint64_t bs1, const uint64_t bs2, size_t nwords)

template<size_t nbits>
void	hammings (const uint64_t bs1, const uint64_t bs2, size_t n1, size_t n2, hamdis_t *dis)

void	hammings (const uint64_t bs1, const uint64_t bs2, size_t n1, size_t n2, size_t nwords, hamdis_t *__restrict dis)

template<size_t nbits>
void	hamming_count_thres (const uint64_t bs1, const uint64_t bs2, size_t n1, size_t n2, hamdis_t ht, size_t *nptr)

template<size_t nbits>
void	crosshamming_count_thres (const uint64_t dbs, size_t n, int ht, size_t nptr)

template<size_t nbits>
size_t	match_hamming_thres (const uint64_t bs1, const uint64_t bs2, size_t n1, size_t n2, int ht, long idx, hamdis_t hams)

void	fvec2bitvec (const float x, uint8_t b, size_t d)

void	fvecs2bitvecs (const float x, uint8_t b, size_t d, size_t n)

void	bitvec_print (const uint8_t *b, size_t d)

void	hammings (const uint8_t a, const uint8_t b, size_t na, size_t nb, size_t ncodes, hamdis_t *__restrict dis)

void	hammings_knn_core (int_maxheap_array_t ha, const uint8_t a, const uint8_t *b, size_t nb, size_t ncodes)

void	hammings_knn (int_maxheap_array_t ha, const uint8_t a, const uint8_t *b, size_t nb, size_t ncodes, int order)

void	hamming_count_thres (const uint8_t bs1, const uint8_t bs2, size_t n1, size_t n2, hamdis_t ht, size_t ncodes, size_t *nptr)

void	crosshamming_count_thres (const uint8_t dbs, size_t n, hamdis_t ht, size_t ncodes, size_t nptr)

size_t	match_hamming_thres (const uint8_t bs1, const uint8_t bs2, size_t n1, size_t n2, hamdis_t ht, size_t ncodes, long idx, hamdis_t dis)

void	generalized_hammings_knn (int_maxheap_array_t ha, const uint8_t a, const uint8_t *b, size_t nb, size_t code_size, int ordered)

int	popcount64 (uint64_t x)

void	hammings (const uint8_t a, const uint8_t b, size_t na, size_t nb, size_t nbytespercode, hamdis_t *dis)

	SPECIALIZED_HC (4)

	SPECIALIZED_HC (8)

	SPECIALIZED_HC (16)

	SPECIALIZED_HC (20)

	SPECIALIZED_HC (32)

	SPECIALIZED_HC (64)

int	generalized_hamming_64 (uint64_t a)

template<class C >
void	heap_pop (size_t k, typename C::T bh_val, typename C::TI bh_ids)

template<class C >
void	heap_push (size_t k, typename C::T bh_val, typename C::TI bh_ids, typename C::T val, typename C::TI ids)

template<typename T >
void	minheap_pop (size_t k, T bh_val, long bh_ids)

template<typename T >
void	minheap_push (size_t k, T bh_val, long bh_ids, T val, long ids)

template<typename T >
void	maxheap_pop (size_t k, T bh_val, long bh_ids)

template<typename T >
void	maxheap_push (size_t k, T bh_val, long bh_ids, T val, long ids)

template<class C >
void	heap_heapify (size_t k, typename C::T bh_val, typename C::TI bh_ids, const typename C::T x=nullptr, const typename C::TI ids=nullptr, size_t k0=0)

template<typename T >
void	minheap_heapify (size_t k, T bh_val, long bh_ids, const T x=nullptr, const long ids=nullptr, size_t k0=0)

template<typename T >
void	maxheap_heapify (size_t k, T bh_val, long bh_ids, const T x=nullptr, const long ids=nullptr, size_t k0=0)

template<class C >
void	heap_addn (size_t k, typename C::T bh_val, typename C::TI bh_ids, const typename C::T x, const typename C::TI ids, size_t n)

template<typename T >
void	minheap_addn (size_t k, T bh_val, long bh_ids, const T x, const long ids, size_t n)

template<typename T >
void	maxheap_addn (size_t k, T bh_val, long bh_ids, const T x, const long ids, size_t n)

template<typename C >
size_t	heap_reorder (size_t k, typename C::T bh_val, typename C::TI bh_ids)

template<typename T >
size_t	minheap_reorder (size_t k, T bh_val, long bh_ids)

template<typename T >
size_t	maxheap_reorder (size_t k, T bh_val, long bh_ids)

template<class C >
void	indirect_heap_pop (size_t k, const typename C::T bh_val, typename C::TI bh_ids)

template<class C >
void	indirect_heap_push (size_t k, const typename C::T bh_val, typename C::TI bh_ids, typename C::TI id)

void	write_VectorTransform (const VectorTransform vt, FILE f)

void	write_ProductQuantizer (const ProductQuantizer pq, const char fname)

void	write_index (const Index idx, FILE f)

void	write_index (const Index idx, const char fname)

void	write_VectorTransform (const VectorTransform vt, const char fname)

VectorTransform *	read_VectorTransform (FILE *f)

ProductQuantizer *	read_ProductQuantizer (const char *fname)

Index *	read_index (FILE *f, bool try_mmap)

Index *	read_index (const char *fname, bool try_mmap)

VectorTransform *	read_VectorTransform (const char *fname)

Index *	clone_index (const Index *index)

template<class C >
void	partial_sort (int k, int n, const typename C::T vals, typename C::TI perm)

template<typename CT , class C >
void	pq_estimators_from_tables_Mmul4 (int M, const CT codes, size_t ncodes, const float __restrict dis_table, size_t ksub, size_t k, float heap_dis, long heap_ids)

template<typename CT , class C >
void	pq_estimators_from_tables_M4 (const CT codes, size_t ncodes, const float __restrict dis_table, size_t ksub, size_t k, float heap_dis, long heap_ids)

double	getmillisecs ()
	ms elapsed since some arbitrary epoch

void	float_rand (float *x, size_t n, long seed)

void	float_randn (float *x, size_t n, long seed)

void	long_rand (long *x, size_t n, long seed)

void	rand_perm (int *perm, size_t n, long seed)

void	byte_rand (uint8_t *x, size_t n, long seed)

void	reflection (const float __restrict u, float __restrict x, size_t n, size_t d, size_t nu)

void	reflection_ref (const float u, float x, size_t n, size_t d, size_t nu)

float	fvec_L2sqr (const float x, const float y, size_t d)
	Squared L2 distance between two vectors.

float	fvec_L2sqr_ref (const float x, const float y, size_t d)

float	fvec_inner_product (const float x, const float y, size_t d)

float	fvec_inner_product_ref (const float x, const float y, size_t d)

float	fvec_norm_L2sqr (const float *x, size_t d)

float	fvec_norm_L2sqr_ref (const float *__restrict x, size_t d)

void	fvec_inner_products_ny (float __restrict ip, const float x, const float *y, size_t d, size_t ny)

void	fvec_L2sqr_ny (float __restrict dis, const float x, const float *y, size_t d, size_t ny)

void	fvec_norms_L2 (float __restrict nr, const float __restrict x, size_t d, size_t nx)

void	fvec_norms_L2sqr (float __restrict nr, const float __restrict x, size_t d, size_t nx)

void	fvec_renorm_L2 (size_t d, size_t nx, float *__restrict x)

void	knn_inner_product (const float x, const float y, size_t d, size_t nx, size_t ny, float_minheap_array_t *res)

void	knn_L2sqr (const float x, const float y, size_t d, size_t nx, size_t ny, float_maxheap_array_t *res)

void	knn_L2sqr_base_shift (const float x, const float y, size_t d, size_t nx, size_t ny, float_maxheap_array_t res, const float base_shift)

void	fvec_inner_products_by_idx (float __restrict ip, const float x, const float y, const long __restrict ids, size_t d, size_t nx, size_t ny)

void	fvec_L2sqr_by_idx (float __restrict dis, const float x, const float y, const long __restrict ids, size_t d, size_t nx, size_t ny)

void	knn_inner_products_by_idx (const float x, const float y, const long __restrict ids, size_t d, size_t nx, size_t ny, float_minheap_array_t res)

void	knn_L2sqr_by_idx (const float x, const float y, const long __restrict ids, size_t d, size_t nx, size_t ny, float_maxheap_array_t res)

void	range_search_L2sqr (const float x, const float y, size_t d, size_t nx, size_t ny, float radius, RangeSearchResult *res)

void	range_search_inner_product (const float x, const float y, size_t d, size_t nx, size_t ny, float radius, RangeSearchResult *result)
	same as range_search_L2sqr for the inner product similarity

void	inner_product_to_L2sqr (float __restrict dis, const float nr1, const float *nr2, size_t n1, size_t n2)

void	matrix_qr (int m, int n, float *a)

void	pairwise_L2sqr (long d, long nq, const float xq, long nb, const float xb, float *dis, long ldq, long ldb, long ldd)

int	km_update_centroids (const float x, float centroids, long *assign, size_t d, size_t k, size_t n)

void	ranklist_handle_ties (int k, long idx, const float dis)

size_t	ranklist_intersection_size (size_t k1, const long v1, size_t k2, const long v2_in)

double	imbalance_factor (int k, const int *hist)
	same, takes a histogram as input

double	imbalance_factor (int n, int k, const long *assign)
	a balanced assignment has a IF of 1

int	ivec_hist (size_t n, const int v, int vmax, int hist)
	compute histogram on v

void	bincode_hist (size_t n, size_t nbits, const uint8_t codes, int hist)

size_t	ivec_checksum (size_t n, const int *a)
	compute a checksum on a table.

void	fvec_argsort (size_t n, const float vals, size_t perm)

void	fvec_argsort_parallel (size_t n, const float vals, size_t perm)

void	fvec_madd (size_t n, const float a, float bf, const float b, float *c)

int	fvec_madd_and_argmin (size_t n, const float a, float bf, const float b, float *c)

size_t	get_mem_usage_kb ()
	get current RSS usage in kB

void	fvec_inner_products_ny (float ip, const float x, const float *y, size_t d, size_t ny)

void	fvec_norms_L2 (float ip, const float x, size_t d, size_t nx)

void	fvec_norms_L2sqr (float ip, const float x, size_t d, size_t nx)
	same as fvec_norms_L2, but computes square norms

void	fvec_renorm_L2 (size_t d, size_t nx, float *x)

void	reflection (const float u, float x, size_t n, size_t d, size_t nu)

Variables
int	read_old_fmt_hack = 0

IndexIVFPQStats	indexIVFPQ_stats

IndexPQStats	indexPQ_stats

Detailed Description

This source code is licensed under the CC-by-NC license found in the LICENSE file in the root directory of this source tree.

This source code is licensed under the CC-by-NC license found in the LICENSE file in the root directory of this source tree.Multi-dimensional array class for CUDA device and host usage. Originally from Facebook's fbcunn, since added to the Torch GPU library cutorch as well.

This source code is licensed under the CC-by-NC license found in the LICENSE file in the root directory of this source tree.

Throughout the library, vectors are provided as float * pointers. Most algorithms can be optimized when several vectors are processed (added/searched) together in a batch. In this case, they are passed in as a matrix. When n vectors of size d are provided as float * x, component j of vector i is

x[ i * d + j ]

where 0 <= i < n and 0 <= j < d. In other words, matrices are always compact. When specifying the size of the matrix, we call it an n*d matrix, which implies a row-major storage.

A few utilitary functions for similarity search:

random generators
optimized exhaustive distance and knn search functions
some functions reimplemented from torch for speed

This source code is licensed under the CC-by-NC license found in the LICENSE file in the root directory of this source tree. Defines a few objects that apply transformations to a set of vectors Often these are pre-processing steps.

Function Documentation

void faiss::bincode_hist	(	size_t	n,
		size_t	nbits,
		const uint8_t *	codes,
		int *	hist
	)

Compute histogram of bits on a code array

Parameters

codes	size(n, nbits / 8)
hist	size(nbits): nb of 1s in the array of codes

Definition at line 1467 of file utils.cpp.

void faiss::fvec_madd	(	size_t	n,
		const float *	a,
		float	bf,
		const float *	b,
		float *	c
	)

compute c := a + bf * b for a, b and c tables

Parameters

n	size of the tables
a	size n
b	size n
c	restult table, size n

Definition at line 1707 of file utils.cpp.

int faiss::fvec_madd_and_argmin	(	size_t	n,
		const float *	a,
		float	bf,
		const float *	b,
		float *	c
	)

same as fvec_madd, also return index of the min of the result table

Returns: index of the min of table c

Definition at line 1781 of file utils.cpp.

float faiss::fvec_norm_L2sqr	(	const float *	x,
		size_t	d
	)

squared norm of a vector

Definition at line 511 of file utils.cpp.

void faiss::fvec_norms_L2	(	float *	ip,
		const float *	x,
		size_t	d,
		size_t	nx
	)

compute the L2 norms for a set of vectors

Parameters

ip	output norms, size nx
x	set of vectors, size nx * d

void faiss::generalized_hammings_knn	(	int_maxheap_array_t *	ha,
		const uint8_t *	a,
		const uint8_t *	b,
		size_t	nb,
		size_t	code_size,
		int	ordered = `true`
	)

generalized Hamming distances (= count number of code bytes that are the same)

Definition at line 635 of file hamming.cpp.

void faiss::hammings	(	const uint8_t *	a,
		const uint8_t *	b,
		size_t	na,
		size_t	nb,
		size_t	nbytespercode,
		hamdis_t *	dis
	)

Compute a set of Hamming distances between na and nb binary vectors

Parameters

a	size na * nbytespercode
b	size nb * nbytespercode
nbytespercode	should be multiple of 8
dis	output distances, size na * nb

void faiss::hammings_knn	(	int_maxheap_array_t *	ha,
		const uint8_t *	a,
		const uint8_t *	b,
		size_t	nb,
		size_t	ncodes,
		int	ordered
	)

Return the k smallest Hamming distances for a set of binary query vectors

Parameters

a	queries, size ha->nh * ncodes
b	database, size nb * ncodes
nb	number of database vectors
ncodes	size of the binary codes (bytes)
ordered	if != 0: order the results by decreasing distance (may be bottleneck for k/n > 0.01)

Definition at line 475 of file hamming.cpp.

template<class C >

void faiss::heap_pop	(	size_t	k,
		typename C::T *	bh_val,
		typename C::TI *	bh_ids
	)

inline

Pops the top element from the heap defined by bh_val[0..k-1] and bh_ids[0..k-1]. on output the element at k-1 is undefined.

Definition at line 91 of file Heap.h.

template<class C >

void faiss::heap_push	(	size_t	k,
		typename C::T *	bh_val,
		typename C::TI *	bh_ids,
		typename C::T	val,
		typename C::TI	ids
	)

inline

Pushes the element (val, ids) into the heap bh_val[0..k-2] and bh_ids[0..k-2]. on output the element at k-1 is defined.

Definition at line 127 of file Heap.h.

Index * faiss::index_factory	(	int	d,
		const char *	description,
		MetricType	metric = `METRIC_L2`
	)

Build and index with the sequence of processing steps described in the string.

Definition at line 623 of file AutoTune.cpp.

int faiss::km_update_centroids	(	const float *	x,
		float *	centroids,
		long *	assign,
		size_t	d,
		size_t	k,
		size_t	n
	)

For k-means: update stage. Returns nb of split clusters.

Definition at line 1286 of file utils.cpp.

float faiss::kmeans_clustering	(	size_t	d,
		size_t	n,
		size_t	k,
		const float *	x,
		float *	centroids
	)

simplified interface

Parameters

d	dimension of the data
n	nb of training vectors
k	nb of output centroids
x	training set (size n * d)
centroids	output centroids (size k * d)

Returns: final quantization error

Definition at line 204 of file Clustering.cpp.

void faiss::knn_inner_product	(	const float *	x,
		const float *	y,
		size_t	d,
		size_t	nx,
		size_t	ny,
		float_minheap_array_t *	res
	)

Return the k nearest neighors of each of the nx vectors x among the ny vector y, w.r.t to max inner product

Parameters

x	query vectors, size nx * d
y	database vectors, size ny * d
res	result array, which also provides k. Sorted on output

Definition at line 830 of file utils.cpp.

void faiss::knn_L2sqr	(	const float *	x,
		const float *	y,
		size_t	d,
		size_t	nx,
		size_t	ny,
		float_maxheap_array_t *	res
	)

Same as knn_inner_product, for the L2 distance

Definition at line 850 of file utils.cpp.

void faiss::knn_L2sqr_base_shift	(	const float *	x,
		const float *	y,
		size_t	d,
		size_t	nx,
		size_t	ny,
		float_maxheap_array_t *	res,
		const float *	base_shift
	)

same as knn_L2sqr, but base_shift[bno] is subtracted to all computed distances.

Parameters

base_shift size ny

Definition at line 870 of file utils.cpp.

void faiss::matrix_qr	(	int	m,
		int	n,
		float *	a
	)

compute the Q of the QR decomposition for m > n

Parameters

a	size n * m: input matrix and output Q

Definition at line 1206 of file utils.cpp.

void faiss::pairwise_L2sqr	(	long	d,
		long	nq,
		const float *	xq,
		long	nb,
		const float *	xb,
		float *	dis,
		long	ldq = `-1`,
		long	ldb = `-1`,
		long	ldd = `-1`
	)

Compute pairwise distances between sets of vectors

Parameters

d	dimension of the vectors
nq	nb of query vectors
nb	nb of database vectors
xq	query vectors (size nq * d)
xb	database vectros (size nb * d)
dis	output distances (size nq * nb)
ldq,ldb,ldd	strides for the matrices

Definition at line 1228 of file utils.cpp.

template<class C >

void faiss::partial_sort	(	int	k,
		int	n,
		const typename C::T *	vals,
		typename C::TI *	perm
	)

Array has n values. Sort the k first ones and copy the other ones into elements k..n-1

Definition at line 528 of file IndexPQ.cpp.

void faiss::range_search_L2sqr	(	const float *	x,
		const float *	y,
		size_t	d,
		size_t	nx,
		size_t	ny,
		float	radius,
		RangeSearchResult *	result
	)

Return the k nearest neighors of each of the nx vectors x among the ny vector y, w.r.t to max inner product

Parameters

x	query vectors, size nx * d
y	database vectors, size ny * d
radius	search radius around the x vectors
result	result structure

Definition at line 1151 of file utils.cpp.

void faiss::ranklist_handle_ties	(	int	k,
		long *	idx,
		const float *	dis
	)

distances are supposed to be sorted. Sorts indices with same distance

Definition at line 1378 of file utils.cpp.

size_t faiss::ranklist_intersection_size	(	size_t	k1,
		const long *	v1,
		size_t	k2,
		const long *	v2
	)

count the number of comon elements between v1 and v2 algorithm = sorting + bissection to avoid double-counting duplicates

Definition at line 1394 of file utils.cpp.

Index * faiss::read_index	(	FILE *	f,
		bool	try_mmap = `false`
	)

mmap'ing currently works only for IndexIVFPQCompact, the IndexIVFPQCompact destructor will unmap the file.

Definition at line 476 of file index_io.cpp.

Classes

Typedefs

Enumerations

Functions

Variables

Detailed Description

Function Documentation