Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/tmp/faiss/Index.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // -*- c++ -*-
10 
11 #ifndef FAISS_INDEX_H
12 #define FAISS_INDEX_H
13 
14 
15 #include <cstdio>
16 #include <typeinfo>
17 #include <string>
18 #include <sstream>
19 
20 #define FAISS_VERSION_MAJOR 1
21 #define FAISS_VERSION_MINOR 4
22 #define FAISS_VERSION_PATCH 0
23 
24 /**
25  * @namespace faiss
26  *
27  * Throughout the library, vectors are provided as float * pointers.
28  * Most algorithms can be optimized when several vectors are processed
29  * (added/searched) together in a batch. In this case, they are passed
30  * in as a matrix. When n vectors of size d are provided as float * x,
31  * component j of vector i is
32  *
33  * x[ i * d + j ]
34  *
35  * where 0 <= i < n and 0 <= j < d. In other words, matrices are
36  * always compact. When specifying the size of the matrix, we call it
37  * an n*d matrix, which implies a row-major storage.
38  */
39 
40 
41 namespace faiss {
42 
43 
44 /// Some algorithms support both an inner product version and a L2 search version.
45 enum MetricType {
46  METRIC_INNER_PRODUCT = 0,
47  METRIC_L2 = 1,
48 };
49 
50 
51 /// Forward declarations see AuxIndexStructures.h
52 struct IDSelector;
53 struct RangeSearchResult;
54 
55 /** Abstract structure for an index
56  *
57  * Supports adding vertices and searching them.
58  *
59  * Currently only asymmetric queries are supported:
60  * database-to-database queries are not implemented.
61  */
62 struct Index {
63 
64  typedef long idx_t; ///< all indices are this type
65 
66  int d; ///< vector dimension
67  idx_t ntotal; ///< total nb of indexed vectors
68  bool verbose; ///< verbosity level
69 
70  /// set if the Index does not require training, or if training is done already
71  bool is_trained;
72 
73  /// type of metric this index uses for search
75 
76  explicit Index (idx_t d = 0, MetricType metric = METRIC_L2):
77  d(d),
78  ntotal(0),
79  verbose(false),
80  is_trained(true),
81  metric_type (metric) {}
82 
83  virtual ~Index ();
84 
85 
86  /** Perform training on a representative set of vectors
87  *
88  * @param n nb of training vectors
89  * @param x training vecors, size n * d
90  */
91  virtual void train(idx_t n, const float* x);
92 
93  /** Add n vectors of dimension d to the index.
94  *
95  * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
96  * This function slices the input vectors in chuncks smaller than
97  * blocksize_add and calls add_core.
98  * @param x input matrix, size n * d
99  */
100  virtual void add (idx_t n, const float *x) = 0;
101 
102  /** Same as add, but stores xids instead of sequential ids.
103  *
104  * The default implementation fails with an assertion, as it is
105  * not supported by all indexes.
106  *
107  * @param xids if non-null, ids to store for the vectors (size n)
108  */
109  virtual void add_with_ids (idx_t n, const float * x, const long *xids);
110 
111  /** query n vectors of dimension d to the index.
112  *
113  * return at most k vectors. If there are not enough results for a
114  * query, the result array is padded with -1s.
115  *
116  * @param x input vectors to search, size n * d
117  * @param labels output labels of the NNs, size n*k
118  * @param distances output pairwise distances, size n*k
119  */
120  virtual void search (idx_t n, const float *x, idx_t k,
121  float *distances, idx_t *labels) const = 0;
122 
123  /** query n vectors of dimension d to the index.
124  *
125  * return all vectors with distance < radius. Note that many
126  * indexes do not implement the range_search (only the k-NN search
127  * is mandatory).
128  *
129  * @param x input vectors to search, size n * d
130  * @param radius search radius
131  * @param result result table
132  */
133  virtual void range_search (idx_t n, const float *x, float radius,
134  RangeSearchResult *result) const;
135 
136  /** return the indexes of the k vectors closest to the query x.
137  *
138  * This function is identical as search but only return labels of neighbors.
139  * @param x input vectors to search, size n * d
140  * @param labels output labels of the NNs, size n*k
141  */
142  void assign (idx_t n, const float * x, idx_t * labels, idx_t k = 1);
143 
144  /// removes all elements from the database.
145  virtual void reset() = 0;
146 
147  /** removes IDs from the index. Not supported by all indexes
148  */
149  virtual long remove_ids (const IDSelector & sel);
150 
151  /** Reconstruct a stored vector (or an approximation if lossy coding)
152  *
153  * this function may not be defined for some indexes
154  * @param key id of the vector to reconstruct
155  * @param recons reconstucted vector (size d)
156  */
157  virtual void reconstruct (idx_t key, float * recons) const;
158 
159 
160  /** Reconstruct vectors i0 to i0 + ni - 1
161  *
162  * this function may not be defined for some indexes
163  * @param recons reconstucted vector (size ni * d)
164  */
165  virtual void reconstruct_n (idx_t i0, idx_t ni, float *recons) const;
166 
167  /** Similar to search, but also reconstructs the stored vectors (or an
168  * approximation in the case of lossy coding) for the search results.
169  *
170  * If there are not enough results for a query, the resulting arrays
171  * is padded with -1s.
172  *
173  * @param recons reconstructed vectors size (n, k, d)
174  **/
175  virtual void search_and_reconstruct (idx_t n, const float *x, idx_t k,
176  float *distances, idx_t *labels,
177  float *recons) const;
178 
179  /** Computes a residual vector after indexing encoding.
180  *
181  * The residual vector is the difference between a vector and the
182  * reconstruction that can be decoded from its representation in
183  * the index. The residual can be used for multiple-stage indexing
184  * methods, like IndexIVF's methods.
185  *
186  * @param x input vector, size d
187  * @param residual output residual vector, size d
188  * @param key encoded index, as returned by search and assign
189  */
190  void compute_residual (const float * x, float * residual, idx_t key) const;
191 
192  /** Display the actual class name and some more info */
193  void display () const;
194 
195 
196 
197 };
198 
199 }
200 
201 
202 #endif
virtual void reset()=0
removes all elements from the database.
virtual void search_and_reconstruct(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels, float *recons) const
Definition: Index.cpp:67
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:35
virtual void train(idx_t n, const float *x)
Definition: Index.cpp:24
virtual void add_with_ids(idx_t n, const float *x, const long *xids)
Definition: Index.cpp:42
int d
vector dimension
Definition: Index.h:66
virtual void reconstruct_n(idx_t i0, idx_t ni, float *recons) const
Definition: Index.cpp:60
virtual void add(idx_t n, const float *x)=0
long idx_t
all indices are this type
Definition: Index.h:64
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
bool verbose
verbosity level
Definition: Index.h:68
virtual long remove_ids(const IDSelector &sel)
Definition: Index.cpp:49
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
void display() const
Definition: Index.cpp:95
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:74
virtual void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const
Definition: Index.cpp:29
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:71
void compute_residual(const float *x, float *residual, idx_t key) const
Definition: Index.cpp:87
virtual void reconstruct(idx_t key, float *recons) const
Definition: Index.cpp:55
MetricType
Some algorithms support both an inner product version and a L2 search version.
Definition: Index.h:45