#include <IndexIVFPQ.h>
Public Member Functions | |
IndexIVFPQ (Index *quantizer, size_t d, size_t nlist, size_t M, size_t nbits_per_idx) | |
virtual void | set_typename () override |
virtual void | add_with_ids (idx_t n, const float *x, const long *xids=nullptr) override |
void | add_core_o (idx_t n, const float *x, const long *xids, float *residuals_2, const long *precomputed_idx=nullptr) |
virtual void | search (idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override |
virtual void | reset () override |
removes all elements from the database. | |
virtual long | remove_ids (const IDSelector &sel) override |
virtual void | train_residual (idx_t n, const float *x) override |
trains the product quantizer | |
void | train_residual_o (idx_t n, const float *x, float *residuals_2) |
same as train_residual, also output 2nd level residuals | |
virtual void | reconstruct_n (idx_t i0, idx_t ni, float *recons) const override |
virtual void | reconstruct (idx_t key, float *recons) const override |
size_t | find_duplicates (idx_t *ids, size_t *lims) const |
void | encode (long key, const float *x, uint8_t *code) const |
void | encode_multiple (size_t n, const long *keys, const float *x, uint8_t *codes) const |
same as encode, for multiple points at once | |
virtual void | search_knn_with_key (size_t nx, const float *qx, const long *keys, const float *coarse_dis, float_maxheap_array_t *res, bool store_pairs=false) const |
void | precompute_table () |
build precomputed table More... | |
virtual void | merge_from_residuals (IndexIVF &other) override |
used to implement merging | |
void | copy_subset_to (IndexIVFPQ &other, int subset_type, long a1, long a2) const |
![]() | |
IndexIVF (Index *quantizer, size_t d, size_t nlist, MetricType metric=METRIC_INNER_PRODUCT) | |
virtual void | train (idx_t n, const float *x) override |
Trains the quantizer and calls train_residual to train sub-quantizers. | |
virtual void | add (idx_t n, const float *x) override |
Quantizes x and calls add_with_key. | |
virtual void | merge_from (IndexIVF &other, idx_t add_id) |
void | make_direct_map () |
intialize a direct map | |
double | imbalance_factor () const |
1= perfectly balanced, >1: imbalanced | |
void | print_stats () const |
display some stats about the inverted lists | |
![]() | |
Index (idx_t d=0, MetricType metric=METRIC_INNER_PRODUCT) | |
virtual void | range_search (idx_t n, const float *x, float radius, RangeSearchResult *result) const |
void | assign (idx_t n, const float *x, idx_t *labels, idx_t k=1) |
void | compute_residual (const float *x, float *residual, idx_t key) const |
void | display () const |
virtual std::string | get_typename () const |
Public Attributes | |
bool | by_residual |
Encode residual or plain vector? | |
int | use_precomputed_table |
if by_residual, build precompute tables | |
size_t | code_size |
code size per vector in bytes | |
ProductQuantizer | pq |
produces the codes | |
bool | do_polysemous_training |
reorder PQ centroids after training? | |
PolysemousTraining * | polysemous_training |
if NULL, use default | |
size_t | scan_table_threshold |
use table computation or on-the-fly? | |
size_t | max_codes |
max nb of codes to visit to do a query | |
int | polysemous_ht |
Hamming thresh for polysemous filtering. | |
std::vector< std::vector < uint8_t > > | codes |
std::vector< float > | precomputed_table |
![]() | |
size_t | nlist |
number of possible key values | |
size_t | nprobe |
number of probes at query time | |
Index * | quantizer |
quantizer that maps vectors to inverted lists | |
bool | quantizer_trains_alone |
just pass over the trainset to quantizer | |
bool | own_fields |
whether object owns the quantizer | |
ClusteringParameters | cp |
to override default clustering params | |
std::vector< std::vector< long > > | ids |
Inverted lists for indexes. | |
bool | maintain_direct_map |
map for direct access to the elements. Enables reconstruct(). | |
std::vector< long > | direct_map |
![]() | |
std::string | index_typename |
int | d |
vector dimension | |
idx_t | ntotal |
total nb of indexed vectors | |
bool | verbose |
verbosity level | |
bool | is_trained |
set if the Index does not require training, or if training is done already | |
MetricType | metric_type |
type of metric this index uses for search | |
Additional Inherited Members | |
![]() | |
typedef long | idx_t |
all indices are this type | |
Inverted file with Product Quantizer encoding. Each residual vector is encoded as a product quantizer code.
Definition at line 30 of file IndexIVFPQ.h.
void faiss::IndexIVFPQ::add_core_o | ( | idx_t | n, |
const float * | x, | ||
const long * | xids, | ||
float * | residuals_2, | ||
const long * | precomputed_idx = nullptr |
||
) |
same as add_core, also:
Definition at line 176 of file IndexIVFPQ.cpp.
|
overridevirtual |
Same as add, but stores xids instead of sequential ids.
The default implementation fails with an assertion, as it is not supported by all indexes.
xids | if non-null, ids to store for the vectors (size n) |
Reimplemented from faiss::Index.
Reimplemented in faiss::IndexIVFPQR.
Definition at line 170 of file IndexIVFPQ.cpp.
void faiss::IndexIVFPQ::copy_subset_to | ( | IndexIVFPQ & | other, |
int | subset_type, | ||
long | a1, | ||
long | a2 | ||
) | const |
copy a subset of the entries index to the other index
if subset_type == 0: copies ids in [a1, a2) if subset_type == 1: copies ids if id % a1 == a2
Definition at line 317 of file IndexIVFPQ.cpp.
size_t faiss::IndexIVFPQ::find_duplicates | ( | idx_t * | ids, |
size_t * | lims | ||
) | const |
Find exact duplicates in the dataset.
the duplicates are returned in pre-allocated arrays (see the max sizes).
lims limits between groups of duplicates (max size ntotal / 2 + 1) ids ids[lims[i]] : ids[lims[i+1]-1] is a group of duplicates (max size ntotal)
Definition at line 1130 of file IndexIVFPQ.cpp.
void faiss::IndexIVFPQ::precompute_table | ( | ) |
build precomputed table
Precomputed tables for residuals
During IVFPQ search with by_residual, we compute
d = || x - y_C - y_R ||^2
where x is the query vector, y_C the coarse centroid, y_R the refined PQ centroid. The expression can be decomposed as:
d = || x - y_C ||^2 + || y_R ||^2 + 2 * (y_C|y_R) - 2 * (x|y_R)
term 1 term 2 term 3
When using multiprobe, we use the following decomposition:
Since y_R defined by a product quantizer, it is split across subvectors and stored separately for each subvector. If the coarse quantizer is a MultiIndexQuantizer then the table can be stored more compactly.
At search time, the tables for term 2 and term 3 are added up. This is faster when the length of the lists is > ksub * M.
Definition at line 376 of file IndexIVFPQ.cpp.
|
overridevirtual |
Reconstruct a stored vector (or an approximation if lossy coding)
this function may not be defined for some indexes
key | id of the vector to reconstruct |
recons | reconstucted vector (size d) |
Reimplemented from faiss::Index.
Definition at line 287 of file IndexIVFPQ.cpp.
Reconstruct a subset of the indexed vectors
i0 | first vector to reconstruct |
ni | nb of vectors to reconstruct |
recons | output array of reconstructed vectors, size ni * d |
Reimplemented from faiss::Index.
Reimplemented in faiss::IndexIVFPQR.
Definition at line 258 of file IndexIVFPQ.cpp.
|
overridevirtual |
removes IDs from the index. Not supported by all indexes
Reimplemented from faiss::Index.
Reimplemented in faiss::IndexIVFPQR.
Definition at line 1073 of file IndexIVFPQ.cpp.
|
overridevirtual |
query n vectors of dimension d to the index.
return at most k vectors. If there are not enough results for a query, the result array is padded with -1s.
x | input vectors to search, size n * d |
labels | output labels of the NNs, size n*k |
distances | output pairwise distances, size n*k |
Implements faiss::Index.
Reimplemented in faiss::IndexIVFPQR.
Definition at line 1045 of file IndexIVFPQ.cpp.
|
virtual |
search a set of vectors, that are pre-quantized by the IVF quantizer. Fill in the corresponding heaps with the query results.
nx | nb of vectors to query |
qx | query vectors, size nx * d |
keys | coarse quantization indices, size nx * nprobe |
coarse_dis | distances to coarse centroids, size nx * nprobe |
res | heaps for all the results, gives the nprobe |
store_pairs | store inv list index + inv list offset instead in upper/lower 32 bit of result, instead of ids (used for reranking). |
Reimplemented in faiss::IndexIVFPQCompact.
Definition at line 946 of file IndexIVFPQ.cpp.
std::vector<float> faiss::IndexIVFPQ::precomputed_table |
if use_precompute_table size nlist * pq.M * pq.ksub
Definition at line 48 of file IndexIVFPQ.h.