Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
Classes | Typedefs | Enumerations | Functions | Variables
faiss Namespace Reference

Classes

struct  AutoTuneCriterion
 
struct  OneRecallAtRCriterion
 
struct  IntersectionCriterion
 
struct  OperatingPoint
 
struct  OperatingPoints
 
struct  ParameterRange
 possible values of a parameter, sorted from least to most expensive/accurate More...
 
struct  ParameterSpace
 
struct  RangeSearchResult
 
struct  IDSelector
 
struct  IDSelectorRange
 
struct  IDSelectorBatch
 
struct  BufferList
 
struct  RangeSearchPartialResult
 the entries in the buffers are split per query More...
 
struct  ClusteringParameters
 
struct  Clustering
 
struct  HammingComputer4
 
struct  HammingComputer8
 
struct  HammingComputer16
 
struct  HammingComputer20
 
struct  HammingComputer32
 
struct  HammingComputer64
 
struct  HammingComputerM8
 
struct  HammingComputerM4
 
struct  HammingComputer
 
struct  GenHammingComputer8
 
struct  GenHammingComputer16
 
struct  GenHammingComputer32
 
struct  GenHammingComputerM8
 
struct  CMax
 
struct  CMin
 
struct  HeapArray
 
struct  Index
 
struct  Cloner
 
struct  IndexFlat
 
struct  IndexFlatIP
 
struct  IndexFlatL2
 
struct  IndexFlatL2BaseShift
 
struct  IndexRefineFlat
 
struct  IndexFlat1D
 optimized version for 1D "vectors" More...
 
struct  IndexIVF
 
struct  IndexIVFFlat
 
struct  CodeCmp
 
struct  IndexIVFPQ
 
struct  IndexIVFPQStats
 
struct  IndexIVFPQR
 
struct  IndexIVFPQCompact
 
struct  IndexLSH
 
struct  ArgSort
 
struct  SortedArray
 
struct  SemiSortedArray
 
struct  MinSumK
 
struct  IndexPQ
 
struct  IndexPQStats
 
struct  MultiIndexQuantizer
 
struct  IndexIDMap
 
struct  IndexShards
 
struct  IndexSplitVectors
 
struct  Score3Computer
 
struct  IndirectSort
 
struct  RankingScore2
 
struct  SimulatedAnnealingParameters
 parameters used for the simulated annealing method More...
 
struct  PermutationObjective
 abstract class for the loss function More...
 
struct  ReproduceDistancesObjective
 
struct  SimulatedAnnealingOptimizer
 Simulated annealing optimization algorithm for permutations. More...
 
struct  PolysemousTraining
 optimizes the order of indices in a ProductQuantizer More...
 
struct  ProductQuantizer
 
struct  NopDistanceCorrection
 
struct  BaseShiftDistanceCorrection
 
struct  RandomGenerator
 random generator that can be used in multithreaded contexts More...
 
struct  VectorTransform
 
struct  LinearTransform
 
struct  RandomRotationMatrix
 Randomly rotate a set of vectors. More...
 
struct  PCAMatrix
 
struct  OPQMatrix
 
struct  RemapDimensionsTransform
 
struct  IndexPreTransform
 

Typedefs

typedef HeapArray< CMin< float,
long > > 
float_minheap_array_t
 
typedef HeapArray< CMin< int,
long > > 
int_minheap_array_t
 
typedef HeapArray< CMax< float,
long > > 
float_maxheap_array_t
 
typedef HeapArray< CMax< int,
long > > 
int_maxheap_array_t
 

Enumerations

enum  MetricType { METRIC_INNER_PRODUCT = 0, METRIC_L2 = 1 }
 Some algorithms support both an inner product vetsion and a L2 search version.
 

Functions

Indexindex_factory (int d, const char *description_in, MetricType metric)
 
float kmeans_clustering (size_t d, size_t n, size_t k, const float *x, float *centroids)
 
template<size_t nbits, typename T >
hamming (const uint8_t *bs1, const uint8_t *bs2)
 
template<size_t nbits>
hamdis_t hamming (const uint64_t *bs1, const uint64_t *bs2)
 
template<>
hamdis_t hamming< 64 > (const uint64_t *pa, const uint64_t *pb)
 
template<>
hamdis_t hamming< 128 > (const uint64_t *pa, const uint64_t *pb)
 
template<>
hamdis_t hamming< 256 > (const uint64_t *pa, const uint64_t *pb)
 
hamdis_t hamming (const uint64_t *bs1, const uint64_t *bs2, size_t nwords)
 
template<size_t nbits>
void hammings (const uint64_t *bs1, const uint64_t *bs2, size_t n1, size_t n2, hamdis_t *dis)
 
void hammings (const uint64_t *bs1, const uint64_t *bs2, size_t n1, size_t n2, size_t nwords, hamdis_t *__restrict dis)
 
template<size_t nbits>
void hamming_count_thres (const uint64_t *bs1, const uint64_t *bs2, size_t n1, size_t n2, hamdis_t ht, size_t *nptr)
 
template<size_t nbits>
void crosshamming_count_thres (const uint64_t *dbs, size_t n, int ht, size_t *nptr)
 
template<size_t nbits>
size_t match_hamming_thres (const uint64_t *bs1, const uint64_t *bs2, size_t n1, size_t n2, int ht, long *idx, hamdis_t *hams)
 
void fvec2bitvec (const float *x, uint8_t *b, size_t d)
 
void fvecs2bitvecs (const float *x, uint8_t *b, size_t d, size_t n)
 
void bitvec_print (const uint8_t *b, size_t d)
 
void hammings (const uint8_t *a, const uint8_t *b, size_t na, size_t nb, size_t ncodes, hamdis_t *__restrict dis)
 
void hammings_knn_core (int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t ncodes)
 
void hammings_knn (int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t ncodes, int order)
 
void hamming_count_thres (const uint8_t *bs1, const uint8_t *bs2, size_t n1, size_t n2, hamdis_t ht, size_t ncodes, size_t *nptr)
 
void crosshamming_count_thres (const uint8_t *dbs, size_t n, hamdis_t ht, size_t ncodes, size_t *nptr)
 
size_t match_hamming_thres (const uint8_t *bs1, const uint8_t *bs2, size_t n1, size_t n2, hamdis_t ht, size_t ncodes, long *idx, hamdis_t *dis)
 
void generalized_hammings_knn (int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t code_size, int ordered)
 
int popcount64 (uint64_t x)
 
void hammings (const uint8_t *a, const uint8_t *b, size_t na, size_t nb, size_t nbytespercode, hamdis_t *dis)
 
 SPECIALIZED_HC (4)
 
 SPECIALIZED_HC (8)
 
 SPECIALIZED_HC (16)
 
 SPECIALIZED_HC (20)
 
 SPECIALIZED_HC (32)
 
 SPECIALIZED_HC (64)
 
int generalized_hamming_64 (uint64_t a)
 
template<class C >
void heap_pop (size_t k, typename C::T *bh_val, typename C::TI *bh_ids)
 
template<class C >
void heap_push (size_t k, typename C::T *bh_val, typename C::TI *bh_ids, typename C::T val, typename C::TI ids)
 
template<typename T >
void minheap_pop (size_t k, T *bh_val, long *bh_ids)
 
template<typename T >
void minheap_push (size_t k, T *bh_val, long *bh_ids, T val, long ids)
 
template<typename T >
void maxheap_pop (size_t k, T *bh_val, long *bh_ids)
 
template<typename T >
void maxheap_push (size_t k, T *bh_val, long *bh_ids, T val, long ids)
 
template<class C >
void heap_heapify (size_t k, typename C::T *bh_val, typename C::TI *bh_ids, const typename C::T *x=nullptr, const typename C::TI *ids=nullptr, size_t k0=0)
 
template<typename T >
void minheap_heapify (size_t k, T *bh_val, long *bh_ids, const T *x=nullptr, const long *ids=nullptr, size_t k0=0)
 
template<typename T >
void maxheap_heapify (size_t k, T *bh_val, long *bh_ids, const T *x=nullptr, const long *ids=nullptr, size_t k0=0)
 
template<class C >
void heap_addn (size_t k, typename C::T *bh_val, typename C::TI *bh_ids, const typename C::T *x, const typename C::TI *ids, size_t n)
 
template<typename T >
void minheap_addn (size_t k, T *bh_val, long *bh_ids, const T *x, const long *ids, size_t n)
 
template<typename T >
void maxheap_addn (size_t k, T *bh_val, long *bh_ids, const T *x, const long *ids, size_t n)
 
template<typename C >
size_t heap_reorder (size_t k, typename C::T *bh_val, typename C::TI *bh_ids)
 
template<typename T >
size_t minheap_reorder (size_t k, T *bh_val, long *bh_ids)
 
template<typename T >
size_t maxheap_reorder (size_t k, T *bh_val, long *bh_ids)
 
template<class C >
void indirect_heap_pop (size_t k, const typename C::T *bh_val, typename C::TI *bh_ids)
 
template<class C >
void indirect_heap_push (size_t k, const typename C::T *bh_val, typename C::TI *bh_ids, typename C::TI id)
 
void write_VectorTransform (const VectorTransform *vt, FILE *f)
 
void write_ProductQuantizer (const ProductQuantizer *pq, const char *fname)
 
void write_index (const Index *idx, FILE *f)
 
void write_index (const Index *idx, const char *fname)
 
void write_VectorTransform (const VectorTransform *vt, const char *fname)
 
VectorTransformread_VectorTransform (FILE *f)
 
ProductQuantizerread_ProductQuantizer (const char *fname)
 
Indexread_index (FILE *f, bool try_mmap)
 
Indexread_index (const char *fname, bool try_mmap)
 
VectorTransformread_VectorTransform (const char *fname)
 
Indexclone_index (const Index *index)
 
template<class C >
void partial_sort (int k, int n, const typename C::T *vals, typename C::TI *perm)
 
template<typename CT , class C >
void pq_estimators_from_tables_Mmul4 (int M, const CT *codes, size_t ncodes, const float *__restrict dis_table, size_t ksub, size_t k, float *heap_dis, long *heap_ids)
 
template<typename CT , class C >
void pq_estimators_from_tables_M4 (const CT *codes, size_t ncodes, const float *__restrict dis_table, size_t ksub, size_t k, float *heap_dis, long *heap_ids)
 
double getmillisecs ()
 ms elapsed since some arbitrary epoch
 
void float_rand (float *x, size_t n, long seed)
 
void float_randn (float *x, size_t n, long seed)
 
void long_rand (long *x, size_t n, long seed)
 
void rand_perm (int *perm, size_t n, long seed)
 
void byte_rand (uint8_t *x, size_t n, long seed)
 
void reflection (const float *__restrict u, float *__restrict x, size_t n, size_t d, size_t nu)
 
void reflection_ref (const float *u, float *x, size_t n, size_t d, size_t nu)
 
float fvec_L2sqr (const float *x, const float *y, size_t d)
 Squared L2 distance between two vectors.
 
float fvec_L2sqr_ref (const float *x, const float *y, size_t d)
 
float fvec_inner_product (const float *x, const float *y, size_t d)
 
float fvec_inner_product_ref (const float *x, const float *y, size_t d)
 
float fvec_norm_L2sqr (const float *x, size_t d)
 
float fvec_norm_L2sqr_ref (const float *__restrict x, size_t d)
 
void fvec_inner_products_ny (float *__restrict ip, const float *x, const float *y, size_t d, size_t ny)
 
void fvec_L2sqr_ny (float *__restrict dis, const float *x, const float *y, size_t d, size_t ny)
 
void fvec_norms_L2 (float *__restrict nr, const float *__restrict x, size_t d, size_t nx)
 
void fvec_norms_L2sqr (float *__restrict nr, const float *__restrict x, size_t d, size_t nx)
 
void fvec_renorm_L2 (size_t d, size_t nx, float *__restrict x)
 
void knn_inner_product (const float *x, const float *y, size_t d, size_t nx, size_t ny, float_minheap_array_t *res)
 
void knn_L2sqr (const float *x, const float *y, size_t d, size_t nx, size_t ny, float_maxheap_array_t *res)
 
void knn_L2sqr_base_shift (const float *x, const float *y, size_t d, size_t nx, size_t ny, float_maxheap_array_t *res, const float *base_shift)
 
void fvec_inner_products_by_idx (float *__restrict ip, const float *x, const float *y, const long *__restrict ids, size_t d, size_t nx, size_t ny)
 
void fvec_L2sqr_by_idx (float *__restrict dis, const float *x, const float *y, const long *__restrict ids, size_t d, size_t nx, size_t ny)
 
void knn_inner_products_by_idx (const float *x, const float *y, const long *__restrict ids, size_t d, size_t nx, size_t ny, float_minheap_array_t *res)
 
void knn_L2sqr_by_idx (const float *x, const float *y, const long *__restrict ids, size_t d, size_t nx, size_t ny, float_maxheap_array_t *res)
 
void range_search_L2sqr (const float *x, const float *y, size_t d, size_t nx, size_t ny, float radius, RangeSearchResult *res)
 
void range_search_inner_product (const float *x, const float *y, size_t d, size_t nx, size_t ny, float radius, RangeSearchResult *result)
 same as range_search_L2sqr for the inner product similarity
 
void inner_product_to_L2sqr (float *__restrict dis, const float *nr1, const float *nr2, size_t n1, size_t n2)
 
void matrix_qr (int m, int n, float *a)
 
void pairwise_L2sqr (long d, long nq, const float *xq, long nb, const float *xb, float *dis, long ldq, long ldb, long ldd)
 
int km_update_centroids (const float *x, float *centroids, long *assign, size_t d, size_t k, size_t n)
 
void ranklist_handle_ties (int k, long *idx, const float *dis)
 
size_t ranklist_intersection_size (size_t k1, const long *v1, size_t k2, const long *v2_in)
 
double imbalance_factor (int k, const int *hist)
 same, takes a histogram as input
 
double imbalance_factor (int n, int k, const long *assign)
 a balanced assignment has a IF of 1
 
int ivec_hist (size_t n, const int *v, int vmax, int *hist)
 compute histogram on v
 
void bincode_hist (size_t n, size_t nbits, const uint8_t *codes, int *hist)
 
size_t ivec_checksum (size_t n, const int *a)
 compute a checksum on a table.
 
void fvec_argsort (size_t n, const float *vals, size_t *perm)
 
void fvec_argsort_parallel (size_t n, const float *vals, size_t *perm)
 
void fvec_madd (size_t n, const float *a, float bf, const float *b, float *c)
 
int fvec_madd_and_argmin (size_t n, const float *a, float bf, const float *b, float *c)
 
size_t get_mem_usage_kb ()
 get current RSS usage in kB
 
void fvec_inner_products_ny (float *ip, const float *x, const float *y, size_t d, size_t ny)
 
void fvec_norms_L2 (float *ip, const float *x, size_t d, size_t nx)
 
void fvec_norms_L2sqr (float *ip, const float *x, size_t d, size_t nx)
 same as fvec_norms_L2, but computes square norms
 
void fvec_renorm_L2 (size_t d, size_t nx, float *x)
 
void reflection (const float *u, float *x, size_t n, size_t d, size_t nu)
 

Variables

int read_old_fmt_hack = 0
 
IndexIVFPQStats indexIVFPQ_stats
 
IndexPQStats indexPQ_stats
 

Detailed Description

Copyright (c) 2015-present, Facebook, Inc. All rights reserved.

This source code is licensed under the CC-by-NC license found in the LICENSE file in the root directory of this source tree.

Copyright (c) 2015-present, Facebook, Inc. All rights reserved.

This source code is licensed under the CC-by-NC license found in the LICENSE file in the root directory of this source tree.Multi-dimensional array class for CUDA device and host usage. Originally from Facebook's fbcunn, since added to the Torch GPU library cutorch as well.

Copyright (c) 2015-present, Facebook, Inc. All rights reserved.

This source code is licensed under the CC-by-NC license found in the LICENSE file in the root directory of this source tree.

Throughout the library, vectors are provided as float * pointers. Most algorithms can be optimized when several vectors are processed (added/searched) together in a batch. In this case, they are passed in as a matrix. When n vectors of size d are provided as float * x, component j of vector i is

x[ i * d + j ]

where 0 <= i < n and 0 <= j < d. In other words, matrices are always compact. When specifying the size of the matrix, we call it an n*d matrix, which implies a row-major storage.

Copyright (c) 2015-present, Facebook, Inc. All rights reserved.

This source code is licensed under the CC-by-NC license found in the LICENSE file in the root directory of this source tree. Copyright 2004-present Facebook. All Rights Reserved -*- c++ -*-

A few utilitary functions for similarity search:

Copyright (c) 2015-present, Facebook, Inc. All rights reserved.

This source code is licensed under the CC-by-NC license found in the LICENSE file in the root directory of this source tree. Defines a few objects that apply transformations to a set of vectors Often these are pre-processing steps.

Function Documentation

void faiss::bincode_hist ( size_t  n,
size_t  nbits,
const uint8_t *  codes,
int *  hist 
)

Compute histogram of bits on a code array

Parameters
codessize(n, nbits / 8)
histsize(nbits): nb of 1s in the array of codes

Definition at line 1467 of file utils.cpp.

void faiss::fvec_madd ( size_t  n,
const float *  a,
float  bf,
const float *  b,
float *  c 
)

compute c := a + bf * b for a, b and c tables

Parameters
nsize of the tables
asize n
bsize n
crestult table, size n

Definition at line 1707 of file utils.cpp.

int faiss::fvec_madd_and_argmin ( size_t  n,
const float *  a,
float  bf,
const float *  b,
float *  c 
)

same as fvec_madd, also return index of the min of the result table

Returns
index of the min of table c

Definition at line 1781 of file utils.cpp.

float faiss::fvec_norm_L2sqr ( const float *  x,
size_t  d 
)

squared norm of a vector

Definition at line 511 of file utils.cpp.

void faiss::fvec_norms_L2 ( float *  ip,
const float *  x,
size_t  d,
size_t  nx 
)

compute the L2 norms for a set of vectors

Parameters
ipoutput norms, size nx
xset of vectors, size nx * d
void faiss::generalized_hammings_knn ( int_maxheap_array_t *  ha,
const uint8_t *  a,
const uint8_t *  b,
size_t  nb,
size_t  code_size,
int  ordered = true 
)

generalized Hamming distances (= count number of code bytes that are the same)

Definition at line 635 of file hamming.cpp.

void faiss::hammings ( const uint8_t *  a,
const uint8_t *  b,
size_t  na,
size_t  nb,
size_t  nbytespercode,
hamdis_t *  dis 
)

Compute a set of Hamming distances between na and nb binary vectors

Parameters
asize na * nbytespercode
bsize nb * nbytespercode
nbytespercodeshould be multiple of 8
disoutput distances, size na * nb
void faiss::hammings_knn ( int_maxheap_array_t *  ha,
const uint8_t *  a,
const uint8_t *  b,
size_t  nb,
size_t  ncodes,
int  ordered 
)

Return the k smallest Hamming distances for a set of binary query vectors

Parameters
aqueries, size ha->nh * ncodes
bdatabase, size nb * ncodes
nbnumber of database vectors
ncodessize of the binary codes (bytes)
orderedif != 0: order the results by decreasing distance (may be bottleneck for k/n > 0.01)

Definition at line 475 of file hamming.cpp.

template<class C >
void faiss::heap_pop ( size_t  k,
typename C::T *  bh_val,
typename C::TI *  bh_ids 
)
inline

Pops the top element from the heap defined by bh_val[0..k-1] and bh_ids[0..k-1]. on output the element at k-1 is undefined.

Definition at line 91 of file Heap.h.

template<class C >
void faiss::heap_push ( size_t  k,
typename C::T *  bh_val,
typename C::TI *  bh_ids,
typename C::T  val,
typename C::TI  ids 
)
inline

Pushes the element (val, ids) into the heap bh_val[0..k-2] and bh_ids[0..k-2]. on output the element at k-1 is defined.

Definition at line 127 of file Heap.h.

Index * faiss::index_factory ( int  d,
const char *  description,
MetricType  metric = METRIC_L2 
)

Build and index with the sequence of processing steps described in the string.

Definition at line 623 of file AutoTune.cpp.

int faiss::km_update_centroids ( const float *  x,
float *  centroids,
long *  assign,
size_t  d,
size_t  k,
size_t  n 
)

For k-means: update stage. Returns nb of split clusters.

Definition at line 1286 of file utils.cpp.

float faiss::kmeans_clustering ( size_t  d,
size_t  n,
size_t  k,
const float *  x,
float *  centroids 
)

simplified interface

Parameters
ddimension of the data
nnb of training vectors
knb of output centroids
xtraining set (size n * d)
centroidsoutput centroids (size k * d)
Returns
final quantization error

Definition at line 204 of file Clustering.cpp.

void faiss::knn_inner_product ( const float *  x,
const float *  y,
size_t  d,
size_t  nx,
size_t  ny,
float_minheap_array_t *  res 
)

Return the k nearest neighors of each of the nx vectors x among the ny vector y, w.r.t to max inner product

Parameters
xquery vectors, size nx * d
ydatabase vectors, size ny * d
resresult array, which also provides k. Sorted on output

Definition at line 830 of file utils.cpp.

void faiss::knn_L2sqr ( const float *  x,
const float *  y,
size_t  d,
size_t  nx,
size_t  ny,
float_maxheap_array_t *  res 
)

Same as knn_inner_product, for the L2 distance

Definition at line 850 of file utils.cpp.

void faiss::knn_L2sqr_base_shift ( const float *  x,
const float *  y,
size_t  d,
size_t  nx,
size_t  ny,
float_maxheap_array_t *  res,
const float *  base_shift 
)

same as knn_L2sqr, but base_shift[bno] is subtracted to all computed distances.

Parameters
base_shiftsize ny

Definition at line 870 of file utils.cpp.

void faiss::matrix_qr ( int  m,
int  n,
float *  a 
)

compute the Q of the QR decomposition for m > n

Parameters
asize n * m: input matrix and output Q

Definition at line 1206 of file utils.cpp.

void faiss::pairwise_L2sqr ( long  d,
long  nq,
const float *  xq,
long  nb,
const float *  xb,
float *  dis,
long  ldq = -1,
long  ldb = -1,
long  ldd = -1 
)

Compute pairwise distances between sets of vectors

Parameters
ddimension of the vectors
nqnb of query vectors
nbnb of database vectors
xqquery vectors (size nq * d)
xbdatabase vectros (size nb * d)
disoutput distances (size nq * nb)
ldq,ldb,lddstrides for the matrices

Definition at line 1228 of file utils.cpp.

template<class C >
void faiss::partial_sort ( int  k,
int  n,
const typename C::T *  vals,
typename C::TI *  perm 
)

Array has n values. Sort the k first ones and copy the other ones into elements k..n-1

Definition at line 528 of file IndexPQ.cpp.

void faiss::range_search_L2sqr ( const float *  x,
const float *  y,
size_t  d,
size_t  nx,
size_t  ny,
float  radius,
RangeSearchResult *  result 
)

Return the k nearest neighors of each of the nx vectors x among the ny vector y, w.r.t to max inner product

Parameters
xquery vectors, size nx * d
ydatabase vectors, size ny * d
radiussearch radius around the x vectors
resultresult structure

Definition at line 1151 of file utils.cpp.

void faiss::ranklist_handle_ties ( int  k,
long *  idx,
const float *  dis 
)

distances are supposed to be sorted. Sorts indices with same distance

Definition at line 1378 of file utils.cpp.

size_t faiss::ranklist_intersection_size ( size_t  k1,
const long *  v1,
size_t  k2,
const long *  v2 
)

count the number of comon elements between v1 and v2 algorithm = sorting + bissection to avoid double-counting duplicates

Definition at line 1394 of file utils.cpp.

Index * faiss::read_index ( FILE *  f,
bool  try_mmap = false 
)

mmap'ing currently works only for IndexIVFPQCompact, the IndexIVFPQCompact destructor will unmap the file.

Definition at line 476 of file index_io.cpp.