faiss/Index.h

/**
 * Copyright (c) 2015-present, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under the BSD+Patents license found in the
 * LICENSE file in the root directory of this source tree.
 */

// -*- c++ -*-

#ifndef FAISS_INDEX_H
#define FAISS_INDEX_H


#include <cstdio>
#include <typeinfo>
#include <string>
#include <sstream>

#define FAISS_VERSION_MAJOR 1
#define FAISS_VERSION_MINOR 5
#define FAISS_VERSION_PATCH 1

/**
 * @namespace faiss
 *
 * Throughout the library, vectors are provided as float * pointers.
 * Most algorithms can be optimized when several vectors are processed
 * (added/searched) together in a batch. In this case, they are passed
 * in as a matrix. When n vectors of size d are provided as float * x,
 * component j of vector i is
 *
 *   x[ i * d + j ]
 *
 * where 0 <= i < n and 0 <= j < d. In other words, matrices are
 * always compact. When specifying the size of the matrix, we call it
 * an n*d matrix, which implies a row-major storage.
 */


namespace faiss {


/// Some algorithms support both an inner product version and a L2 search version.
enum MetricType {
    METRIC_INNER_PRODUCT = 0,
    METRIC_L2 = 1,
};


/// Forward declarations see AuxIndexStructures.h
struct IDSelector;
struct RangeSearchResult;

/** Abstract structure for an index
 *
 * Supports adding vertices and searching them.
 *
 * Currently only asymmetric queries are supported:
 * database-to-database queries are not implemented.
 */
struct Index {
    using idx_t = long;    ///< all indices are this type
    using component_t = float;
    using distance_t = float;

    int d;                 ///< vector dimension
    idx_t ntotal;          ///< total nb of indexed vectors
    bool verbose;          ///< verbosity level

    /// set if the Index does not require training, or if training is done already
    bool is_trained;

    /// type of metric this index uses for search
    MetricType metric_type;

    explicit Index (idx_t d = 0, MetricType metric = METRIC_L2):
                    d(d),
                    ntotal(0),
                    verbose(false),
                    is_trained(true),
                    metric_type (metric) {}

    virtual ~Index ();


    /** Perform training on a representative set of vectors
     *
     * @param n      nb of training vectors
     * @param x      training vecors, size n * d
     */
    virtual void train(idx_t n, const float* x);

    /** Add n vectors of dimension d to the index.
     *
     * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
     * This function slices the input vectors in chuncks smaller than
     * blocksize_add and calls add_core.
     * @param x      input matrix, size n * d
     */
    virtual void add (idx_t n, const float *x) = 0;

    /** Same as add, but stores xids instead of sequential ids.
     *
     * The default implementation fails with an assertion, as it is
     * not supported by all indexes.
     *
     * @param xids if non-null, ids to store for the vectors (size n)
     */
    virtual void add_with_ids (idx_t n, const float * x, const long *xids);

    /** query n vectors of dimension d to the index.
     *
     * return at most k vectors. If there are not enough results for a
     * query, the result array is padded with -1s.
     *
     * @param x           input vectors to search, size n * d
     * @param labels      output labels of the NNs, size n*k
     * @param distances   output pairwise distances, size n*k
     */
    virtual void search (idx_t n, const float *x, idx_t k,
                         float *distances, idx_t *labels) const = 0;

    /** query n vectors of dimension d to the index.
     *
     * return all vectors with distance < radius. Note that many
     * indexes do not implement the range_search (only the k-NN search
     * is mandatory).
     *
     * @param x           input vectors to search, size n * d
     * @param radius      search radius
     * @param result      result table
     */
    virtual void range_search (idx_t n, const float *x, float radius,
                               RangeSearchResult *result) const;

    /** return the indexes of the k vectors closest to the query x.
     *
     * This function is identical as search but only return labels of neighbors.
     * @param x           input vectors to search, size n * d
     * @param labels      output labels of the NNs, size n*k
     */
    void assign (idx_t n, const float * x, idx_t * labels, idx_t k = 1);

    /// removes all elements from the database.
    virtual void reset() = 0;

    /** removes IDs from the index. Not supported by all indexes
     */
    virtual long remove_ids (const IDSelector & sel);

    /** Reconstruct a stored vector (or an approximation if lossy coding)
     *
     * this function may not be defined for some indexes
     * @param key         id of the vector to reconstruct
     * @param recons      reconstucted vector (size d)
     */
    virtual void reconstruct (idx_t key, float * recons) const;


    /** Reconstruct vectors i0 to i0 + ni - 1
     *
     * this function may not be defined for some indexes
     * @param recons      reconstucted vector (size ni * d)
     */
    virtual void reconstruct_n (idx_t i0, idx_t ni, float *recons) const;

    /** Similar to search, but also reconstructs the stored vectors (or an
     * approximation in the case of lossy coding) for the search results.
     *
     * If there are not enough results for a query, the resulting arrays
     * is padded with -1s.
     *
     * @param recons      reconstructed vectors size (n, k, d)
     **/
    virtual void search_and_reconstruct (idx_t n, const float *x, idx_t k,
                                         float *distances, idx_t *labels,
                                         float *recons) const;

    /** Computes a residual vector after indexing encoding.
     *
     * The residual vector is the difference between a vector and the
     * reconstruction that can be decoded from its representation in
     * the index. The residual can be used for multiple-stage indexing
     * methods, like IndexIVF's methods.
     *
     * @param x           input vector, size d
     * @param residual    output residual vector, size d
     * @param key         encoded index, as returned by search and assign
     */
    void compute_residual (const float * x, float * residual, idx_t key) const;

    /** Display the actual class name and some more info */
    void display () const;


};

}


#endif
Initial commit 2017-02-22 23:26:44 +01:00			`/**`
			`* Copyright (c) 2015-present, Facebook, Inc.`
			`* All rights reserved.`
			`*`
changed license 2017-07-30 00:18:45 -07:00			`* This source code is licensed under the BSD+Patents license found in the`
Initial commit 2017-02-22 23:26:44 +01:00			`* LICENSE file in the root directory of this source tree.`
			`*/`

			`// -- c++ --`

			`#ifndef FAISS_INDEX_H`
			`#define FAISS_INDEX_H`


			`#include <cstdio>`
			`#include <typeinfo>`
			`#include <string>`
			`#include <sstream>`

Facebook sync (Dec 2018). (#660) * Add GpuIndexBinaryFlat * Add IndexBinaryHNSW 2018-12-19 17:48:35 +01:00			`#define FAISS_VERSION_MAJOR 1`
Add conda packages metadata + tests. (#769) + Add conda packages metadata (now building Faiss using conda's toolchain); + add Dockerfile for building conda packages (for all CUDA versions); + add working Dockerfile building faiss on Centos7; + simplify GPU build; + avoid falling back to CPU-only version (python); + simplify TravisCI config; + update INSTALL.md; + add configure flag for specifying target architectures (--with-cuda-arch); + fix Makefile for gpu tests; + fix various Makefile issues; + remove stale file (gpu/utils/DeviceUtils.cpp). 2019-04-05 11:50:39 +02:00			`#define FAISS_VERSION_MINOR 5`
			`#define FAISS_VERSION_PATCH 1`
Initial commit 2017-02-22 23:26:44 +01:00
			`/**`
			`* @namespace faiss`
			`*`
			`* Throughout the library, vectors are provided as float * pointers.`
			`* Most algorithms can be optimized when several vectors are processed`
			`* (added/searched) together in a batch. In this case, they are passed`
			`* in as a matrix. When n vectors of size d are provided as float * x,`
			`* component j of vector i is`
			`*`
			`* x[ i * d + j ]`
			`*`
			`* where 0 <= i < n and 0 <= j < d. In other words, matrices are`
			`* always compact. When specifying the size of the matrix, we call it`
			`* an n*d matrix, which implies a row-major storage.`
			`*/`


			`namespace faiss {`


Update Index.h 2017-09-18 15:46:18 +02:00			`/// Some algorithms support both an inner product version and a L2 search version.`
Initial commit 2017-02-22 23:26:44 +01:00			`enum MetricType {`
			`METRIC_INNER_PRODUCT = 0,`
			`METRIC_L2 = 1,`
			`};`


			`/// Forward declarations see AuxIndexStructures.h`
			`struct IDSelector;`
			`struct RangeSearchResult;`

			`/** Abstract structure for an index`
			`*`
			`* Supports adding vertices and searching them.`
			`*`
			`* Currently only asymmetric queries are supported:`
			`* database-to-database queries are not implemented.`
			`*/`
			`struct Index {`
Facebook sync (Mar 2019) (#756) Facebook sync (Mar 2019) - MatrixStats object - option to round coordinates during k-means optimization - alternative option for search in HNSW - moved stats and imbalance_factor of IndexIVF to InvertedLists object - range search for IVFScalarQuantizer - direct unit8 codec in ScalarQuantizer - renamed IndexProxy to IndexReplicas and moved to main Faiss - better support for PQ code assignment with external index - support for IMI2x16 (4B virtual centroids!) - support for k = 2048 search on GPU (instead of 1024) - most CUDA mem alloc failures throw exceptions instead of terminating on an assertion - support for renaming an ondisk invertedlists - interrupt computations with ctrl-C in python 2019-03-29 16:32:28 +01:00			`using idx_t = long; ///< all indices are this type`
			`using component_t = float;`
			`using distance_t = float;`
Initial commit 2017-02-22 23:26:44 +01:00
			`int d; ///< vector dimension`
			`idx_t ntotal; ///< total nb of indexed vectors`
			`bool verbose; ///< verbosity level`

			`/// set if the Index does not require training, or if training is done already`
			`bool is_trained;`

			`/// type of metric this index uses for search`
			`MetricType metric_type;`

sync with FB version 2017-01-09 - adding HNSW indexing method - simultaneous search and reconstruction for IndexIVFPQ 2018-01-09 06:42:06 -08:00			`explicit Index (idx_t d = 0, MetricType metric = METRIC_L2):`
Initial commit 2017-02-22 23:26:44 +01:00			`d(d),`
			`ntotal(0),`
			`verbose(false),`
			`is_trained(true),`
			`metric_type (metric) {}`

sync with FB version 2017-01-09 - adding HNSW indexing method - simultaneous search and reconstruction for IndexIVFPQ 2018-01-09 06:42:06 -08:00			`virtual ~Index ();`
Initial commit 2017-02-22 23:26:44 +01:00

			`/** Perform training on a representative set of vectors`
			`*`
			`* @param n nb of training vectors`
			`* @param x training vecors, size n * d`
			`*/`
sync with FB version 2017-01-09 - adding HNSW indexing method - simultaneous search and reconstruction for IndexIVFPQ 2018-01-09 06:42:06 -08:00			`virtual void train(idx_t n, const float* x);`
Initial commit 2017-02-22 23:26:44 +01:00
			`/** Add n vectors of dimension d to the index.`
			`*`
			`* Vectors are implicitly assigned labels ntotal .. ntotal + n - 1`
			`* This function slices the input vectors in chuncks smaller than`
			`* blocksize_add and calls add_core.`
			`* @param x input matrix, size n * d`
			`*/`
			`virtual void add (idx_t n, const float *x) = 0;`

			`/** Same as add, but stores xids instead of sequential ids.`
			`*`
			`* The default implementation fails with an assertion, as it is`
			`* not supported by all indexes.`
			`*`
			`* @param xids if non-null, ids to store for the vectors (size n)`
			`*/`
			`virtual void add_with_ids (idx_t n, const float * x, const long *xids);`

			`/** query n vectors of dimension d to the index.`
			`*`
			`* return at most k vectors. If there are not enough results for a`
			`* query, the result array is padded with -1s.`
			`*`
			`* @param x input vectors to search, size n * d`
			`* @param labels output labels of the NNs, size n*k`
			`* @param distances output pairwise distances, size n*k`
			`*/`
			`virtual void search (idx_t n, const float *x, idx_t k,`
			`float distances, idx_t labels) const = 0;`

			`/** query n vectors of dimension d to the index.`
			`*`
			`* return all vectors with distance < radius. Note that many`
			`* indexes do not implement the range_search (only the k-NN search`
			`* is mandatory).`
			`*`
			`* @param x input vectors to search, size n * d`
			`* @param radius search radius`
			`* @param result result table`
			`*/`
			`virtual void range_search (idx_t n, const float *x, float radius,`
			`RangeSearchResult *result) const;`

			`/** return the indexes of the k vectors closest to the query x.`
			`*`
			`* This function is identical as search but only return labels of neighbors.`
			`* @param x input vectors to search, size n * d`
			`* @param labels output labels of the NNs, size n*k`
			`*/`
			`void assign (idx_t n, const float * x, idx_t * labels, idx_t k = 1);`

			`/// removes all elements from the database.`
			`virtual void reset() = 0;`

			`/** removes IDs from the index. Not supported by all indexes`
			`*/`
			`virtual long remove_ids (const IDSelector & sel);`

			`/** Reconstruct a stored vector (or an approximation if lossy coding)`
			`*`
			`* this function may not be defined for some indexes`
			`* @param key id of the vector to reconstruct`
			`* @param recons reconstucted vector (size d)`
			`*/`
			`virtual void reconstruct (idx_t key, float * recons) const;`


			`/** Reconstruct vectors i0 to i0 + ni - 1`
			`*`
			`* this function may not be defined for some indexes`
			`* @param recons reconstucted vector (size ni * d)`
			`*/`
			`virtual void reconstruct_n (idx_t i0, idx_t ni, float *recons) const;`

sync with FB version 2017-01-09 - adding HNSW indexing method - simultaneous search and reconstruction for IndexIVFPQ 2018-01-09 06:42:06 -08:00			`/** Similar to search, but also reconstructs the stored vectors (or an`
			`* approximation in the case of lossy coding) for the search results.`
			`*`
			`* If there are not enough results for a query, the resulting arrays`
			`* is padded with -1s.`
			`*`
			`* @param recons reconstructed vectors size (n, k, d)`
			`**/`
			`virtual void search_and_reconstruct (idx_t n, const float *x, idx_t k,`
			`float distances, idx_t labels,`
			`float *recons) const;`
Initial commit 2017-02-22 23:26:44 +01:00
			`/** Computes a residual vector after indexing encoding.`
			`*`
			`* The residual vector is the difference between a vector and the`
			`* reconstruction that can be decoded from its representation in`
			`* the index. The residual can be used for multiple-stage indexing`
			`* methods, like IndexIVF's methods.`
			`*`
			`* @param x input vector, size d`
			`* @param residual output residual vector, size d`
			`* @param key encoded index, as returned by search and assign`
			`*/`
			`void compute_residual (const float * x, float * residual, idx_t key) const;`

			`/** Display the actual class name and some more info */`
			`void display () const;`



			`};`

			`}`


			`#endif`