2017-02-23 06:26:44 +08:00
|
|
|
/**
|
|
|
|
* Copyright (c) 2015-present, Facebook, Inc.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
2017-07-30 15:18:45 +08:00
|
|
|
* This source code is licensed under the BSD+Patents license found in the
|
2017-02-23 06:26:44 +08:00
|
|
|
* LICENSE file in the root directory of this source tree.
|
|
|
|
*/
|
|
|
|
|
|
|
|
// -*- c++ -*-
|
|
|
|
|
|
|
|
#ifndef FAISS_INDEX_H
|
|
|
|
#define FAISS_INDEX_H
|
|
|
|
|
|
|
|
|
|
|
|
#include <cstdio>
|
|
|
|
#include <typeinfo>
|
|
|
|
#include <string>
|
|
|
|
#include <sstream>
|
|
|
|
|
2018-12-20 00:48:35 +08:00
|
|
|
#define FAISS_VERSION_MAJOR 1
|
2019-04-05 17:50:39 +08:00
|
|
|
#define FAISS_VERSION_MINOR 5
|
|
|
|
#define FAISS_VERSION_PATCH 1
|
2017-02-23 06:26:44 +08:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @namespace faiss
|
|
|
|
*
|
|
|
|
* Throughout the library, vectors are provided as float * pointers.
|
|
|
|
* Most algorithms can be optimized when several vectors are processed
|
|
|
|
* (added/searched) together in a batch. In this case, they are passed
|
|
|
|
* in as a matrix. When n vectors of size d are provided as float * x,
|
|
|
|
* component j of vector i is
|
|
|
|
*
|
|
|
|
* x[ i * d + j ]
|
|
|
|
*
|
|
|
|
* where 0 <= i < n and 0 <= j < d. In other words, matrices are
|
|
|
|
* always compact. When specifying the size of the matrix, we call it
|
|
|
|
* an n*d matrix, which implies a row-major storage.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
namespace faiss {
|
|
|
|
|
|
|
|
|
2017-09-18 21:46:18 +08:00
|
|
|
/// Some algorithms support both an inner product version and a L2 search version.
|
2017-02-23 06:26:44 +08:00
|
|
|
enum MetricType {
|
|
|
|
METRIC_INNER_PRODUCT = 0,
|
|
|
|
METRIC_L2 = 1,
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/// Forward declarations see AuxIndexStructures.h
|
|
|
|
struct IDSelector;
|
|
|
|
struct RangeSearchResult;
|
|
|
|
|
|
|
|
/** Abstract structure for an index
|
|
|
|
*
|
|
|
|
* Supports adding vertices and searching them.
|
|
|
|
*
|
|
|
|
* Currently only asymmetric queries are supported:
|
|
|
|
* database-to-database queries are not implemented.
|
|
|
|
*/
|
|
|
|
struct Index {
|
2019-03-29 23:32:28 +08:00
|
|
|
using idx_t = long; ///< all indices are this type
|
|
|
|
using component_t = float;
|
|
|
|
using distance_t = float;
|
2017-02-23 06:26:44 +08:00
|
|
|
|
|
|
|
int d; ///< vector dimension
|
|
|
|
idx_t ntotal; ///< total nb of indexed vectors
|
|
|
|
bool verbose; ///< verbosity level
|
|
|
|
|
|
|
|
/// set if the Index does not require training, or if training is done already
|
|
|
|
bool is_trained;
|
|
|
|
|
|
|
|
/// type of metric this index uses for search
|
|
|
|
MetricType metric_type;
|
|
|
|
|
2018-01-09 22:42:06 +08:00
|
|
|
explicit Index (idx_t d = 0, MetricType metric = METRIC_L2):
|
2017-02-23 06:26:44 +08:00
|
|
|
d(d),
|
|
|
|
ntotal(0),
|
|
|
|
verbose(false),
|
|
|
|
is_trained(true),
|
|
|
|
metric_type (metric) {}
|
|
|
|
|
2018-01-09 22:42:06 +08:00
|
|
|
virtual ~Index ();
|
2017-02-23 06:26:44 +08:00
|
|
|
|
|
|
|
|
|
|
|
/** Perform training on a representative set of vectors
|
|
|
|
*
|
|
|
|
* @param n nb of training vectors
|
|
|
|
* @param x training vecors, size n * d
|
|
|
|
*/
|
2018-01-09 22:42:06 +08:00
|
|
|
virtual void train(idx_t n, const float* x);
|
2017-02-23 06:26:44 +08:00
|
|
|
|
|
|
|
/** Add n vectors of dimension d to the index.
|
|
|
|
*
|
|
|
|
* Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
|
|
|
|
* This function slices the input vectors in chuncks smaller than
|
|
|
|
* blocksize_add and calls add_core.
|
|
|
|
* @param x input matrix, size n * d
|
|
|
|
*/
|
|
|
|
virtual void add (idx_t n, const float *x) = 0;
|
|
|
|
|
|
|
|
/** Same as add, but stores xids instead of sequential ids.
|
|
|
|
*
|
|
|
|
* The default implementation fails with an assertion, as it is
|
|
|
|
* not supported by all indexes.
|
|
|
|
*
|
|
|
|
* @param xids if non-null, ids to store for the vectors (size n)
|
|
|
|
*/
|
|
|
|
virtual void add_with_ids (idx_t n, const float * x, const long *xids);
|
|
|
|
|
|
|
|
/** query n vectors of dimension d to the index.
|
|
|
|
*
|
|
|
|
* return at most k vectors. If there are not enough results for a
|
|
|
|
* query, the result array is padded with -1s.
|
|
|
|
*
|
|
|
|
* @param x input vectors to search, size n * d
|
|
|
|
* @param labels output labels of the NNs, size n*k
|
|
|
|
* @param distances output pairwise distances, size n*k
|
|
|
|
*/
|
|
|
|
virtual void search (idx_t n, const float *x, idx_t k,
|
|
|
|
float *distances, idx_t *labels) const = 0;
|
|
|
|
|
|
|
|
/** query n vectors of dimension d to the index.
|
|
|
|
*
|
|
|
|
* return all vectors with distance < radius. Note that many
|
|
|
|
* indexes do not implement the range_search (only the k-NN search
|
|
|
|
* is mandatory).
|
|
|
|
*
|
|
|
|
* @param x input vectors to search, size n * d
|
|
|
|
* @param radius search radius
|
|
|
|
* @param result result table
|
|
|
|
*/
|
|
|
|
virtual void range_search (idx_t n, const float *x, float radius,
|
|
|
|
RangeSearchResult *result) const;
|
|
|
|
|
|
|
|
/** return the indexes of the k vectors closest to the query x.
|
|
|
|
*
|
|
|
|
* This function is identical as search but only return labels of neighbors.
|
|
|
|
* @param x input vectors to search, size n * d
|
|
|
|
* @param labels output labels of the NNs, size n*k
|
|
|
|
*/
|
|
|
|
void assign (idx_t n, const float * x, idx_t * labels, idx_t k = 1);
|
|
|
|
|
|
|
|
/// removes all elements from the database.
|
|
|
|
virtual void reset() = 0;
|
|
|
|
|
|
|
|
/** removes IDs from the index. Not supported by all indexes
|
|
|
|
*/
|
|
|
|
virtual long remove_ids (const IDSelector & sel);
|
|
|
|
|
|
|
|
/** Reconstruct a stored vector (or an approximation if lossy coding)
|
|
|
|
*
|
|
|
|
* this function may not be defined for some indexes
|
|
|
|
* @param key id of the vector to reconstruct
|
|
|
|
* @param recons reconstucted vector (size d)
|
|
|
|
*/
|
|
|
|
virtual void reconstruct (idx_t key, float * recons) const;
|
|
|
|
|
|
|
|
|
|
|
|
/** Reconstruct vectors i0 to i0 + ni - 1
|
|
|
|
*
|
|
|
|
* this function may not be defined for some indexes
|
|
|
|
* @param recons reconstucted vector (size ni * d)
|
|
|
|
*/
|
|
|
|
virtual void reconstruct_n (idx_t i0, idx_t ni, float *recons) const;
|
|
|
|
|
2018-01-09 22:42:06 +08:00
|
|
|
/** Similar to search, but also reconstructs the stored vectors (or an
|
|
|
|
* approximation in the case of lossy coding) for the search results.
|
|
|
|
*
|
|
|
|
* If there are not enough results for a query, the resulting arrays
|
|
|
|
* is padded with -1s.
|
|
|
|
*
|
|
|
|
* @param recons reconstructed vectors size (n, k, d)
|
|
|
|
**/
|
|
|
|
virtual void search_and_reconstruct (idx_t n, const float *x, idx_t k,
|
|
|
|
float *distances, idx_t *labels,
|
|
|
|
float *recons) const;
|
2017-02-23 06:26:44 +08:00
|
|
|
|
|
|
|
/** Computes a residual vector after indexing encoding.
|
|
|
|
*
|
|
|
|
* The residual vector is the difference between a vector and the
|
|
|
|
* reconstruction that can be decoded from its representation in
|
|
|
|
* the index. The residual can be used for multiple-stage indexing
|
|
|
|
* methods, like IndexIVF's methods.
|
|
|
|
*
|
|
|
|
* @param x input vector, size d
|
|
|
|
* @param residual output residual vector, size d
|
|
|
|
* @param key encoded index, as returned by search and assign
|
|
|
|
*/
|
|
|
|
void compute_residual (const float * x, float * residual, idx_t key) const;
|
|
|
|
|
|
|
|
/** Display the actual class name and some more info */
|
|
|
|
void display () const;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#endif
|