faiss/IndexIVF.h

188 lines
5.4 KiB
C++

/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the CC-by-NC license found in the
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c++ -*-
#ifndef FAISS_INDEX_IVF_H
#define FAISS_INDEX_IVF_H
#include <vector>
#include "Index.h"
#include "Clustering.h"
#include "Heap.h"
namespace faiss {
/** Index based on a inverted file (IVF)
*
* In the inverted file, the quantizer (an Index instance) provides a
* quantization index for each vector to be added. The quantization
* index maps to a list (aka inverted list or posting list), where the
* id of the vector is then stored.
*
* At search time, the vector to be searched is also quantized, and
* only the list corresponding to the quantization index is
* searched. This speeds up the search by making it
* non-exhaustive. This can be relaxed using multi-probe search: a few
* (nprobe) quantization indices are selected and several inverted
* lists are visited.
*
* Sub-classes implement a post-filtering of the index that refines
* the distance estimation from the query to databse vectors.
*/
struct IndexIVF: Index {
size_t nlist; ///< number of possible key values
size_t nprobe; ///< number of probes at query time
Index * quantizer; ///< quantizer that maps vectors to inverted lists
bool quantizer_trains_alone; ///< just pass over the trainset to quantizer
bool own_fields; ///< whether object owns the quantizer
ClusteringParameters cp; ///< to override default clustering params
std::vector < std::vector<long> > ids; ///< Inverted lists for indexes
/// map for direct access to the elements. Enables reconstruct().
bool maintain_direct_map;
std::vector <long> direct_map;
/** The Inverted file takes a quantizer (an Index) on input,
* which implements the function mapping a vector to a list
* identifier. The pointer is borrowed: the quantizer should not
* be deleted while the IndexIVF is in use.
*/
IndexIVF (Index * quantizer, size_t d, size_t nlist,
MetricType metric = METRIC_INNER_PRODUCT);
virtual void reset () override;
/// Trains the quantizer and calls train_residual to train sub-quantizers
virtual void train (idx_t n, const float *x) override;
/// Quantizes x and calls add_with_key
virtual void add (idx_t n, const float *x) override;
/// Sub-classes that encode the residuals can train their encoders here
/// does nothing by default
virtual void train_residual (idx_t n, const float *x);
/** moves the entries from another dataset to self. On output,
* other is empty. add_id is added to all moved ids (for
* sequential ids, this would be this->ntotal */
virtual void merge_from (IndexIVF &other, idx_t add_id);
/** implemented by sub-classes */
virtual void merge_from_residuals (IndexIVF &other) = 0;
virtual ~IndexIVF();
/// intialize a direct map
void make_direct_map ();
/// 1= perfectly balanced, >1: imbalanced
double imbalance_factor () const;
/// display some stats about the inverted lists
void print_stats () const;
IndexIVF ();
};
/** Inverted file with stored vectors. Here the inverted file
* pre-selects the vectors to be searched, but they are not otherwise
* encoded.
*/
struct IndexIVFFlat: IndexIVF {
/** Inverted list of original vectors. Each list is a nl * d
* matrix, where nl is the nb of vectors stored in the list. */
std::vector < std::vector<float> > vecs;
IndexIVFFlat (
Index * quantizer, size_t d, size_t nlist_,
MetricType = METRIC_INNER_PRODUCT);
/** Return the typeName of the index (which includes main parameters */
virtual void set_typename () override;
/// same as add_with_ids, with precomputed coarse quantizer
void add_core (idx_t n, const float * x, const long *xids,
const long *precomputed_idx);
/// implemented for all IndexIVF* classes
virtual void add_with_ids (idx_t n, const float * x, const long *xids)
override;
virtual void search (
idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const override;
virtual void range_search (
idx_t n, const float *x, float radius,
RangeSearchResult *result) const override;
/** copy a subset of the entries index to the other index
*
* if subset_type == 0: copies ids in [a1, a2)
* if subset_type == 1: copies ids if id % a1 == a2
*/
void copy_subset_to (IndexIVFFlat & other, int subset_type,
long a1, long a2) const;
virtual void reset() override;
virtual long remove_ids (const IDSelector & sel) override;
/// Implementation of the search for the inner product metric
void search_knn_inner_product (
size_t nx, const float * x,
const long * keys,
float_minheap_array_t * res) const;
/// Implementation of the search for the L2 metric
void search_knn_L2sqr (
size_t nx, const float * x,
const long * keys,
float_maxheap_array_t * res) const;
virtual void reconstruct (idx_t key, float * recons) const override;
virtual void merge_from_residuals (IndexIVF &other) override;
IndexIVFFlat () {}
};
} // namespace faiss
#endif