Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/Index.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved
10 // -*- c++ -*-
11 
12 #ifndef FAISS_INDEX_H
13 #define FAISS_INDEX_H
14 
15 
16 #include <cstdio>
17 #include <typeinfo>
18 #include <string>
19 #include <sstream>
20 
21 
22 /**
23  * @namespace faiss
24  *
25  * Throughout the library, vectors are provided as float * pointers.
26  * Most algorithms can be optimized when several vectors are processed
27  * (added/searched) together in a batch. In this case, they are passed
28  * in as a matrix. When n vectors of size d are provided as float * x,
29  * component j of vector i is
30  *
31  * x[ i * d + j ]
32  *
33  * where 0 <= i < n and 0 <= j < d. In other words, matrices are
34  * always compact. When specifying the size of the matrix, we call it
35  * an n*d matrix, which implies a row-major storage.
36  */
37 
38 
39 namespace faiss {
40 
41 
42 /// Some algorithms support both an inner product vetsion and a L2 search version.
43 enum MetricType {
44  METRIC_INNER_PRODUCT = 0,
45  METRIC_L2 = 1,
46 };
47 
48 
49 /// Forward declarations see AuxIndexStructures.h
50 struct IDSelector;
51 struct RangeSearchResult;
52 
53 /** Abstract structure for an index
54  *
55  * Supports adding vertices and searching them.
56  *
57  * Currently only asymmetric queries are supported:
58  * database-to-database queries are not implemented.
59  */
60 struct Index {
61 
62  typedef long idx_t; ///< all indices are this type
63 
64  int d; ///< vector dimension
65  idx_t ntotal; ///< total nb of indexed vectors
66  bool verbose; ///< verbosity level
67 
68  /// set if the Index does not require training, or if training is done already
69  bool is_trained;
70 
71  /// type of metric this index uses for search
73 
74  explicit Index (idx_t d = 0, MetricType metric = METRIC_INNER_PRODUCT):
75  d(d),
76  ntotal(0),
77  verbose(false),
78  is_trained(true),
79  metric_type (metric) {}
80 
81  virtual ~Index () { }
82 
83 
84  /** Perform training on a representative set of vectors
85  *
86  * @param n nb of training vectors
87  * @param x training vecors, size n * d
88  */
89  virtual void train(idx_t /*n*/, const float* /*x*/) {
90  // does nothing by default
91  }
92 
93  /** Add n vectors of dimension d to the index.
94  *
95  * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
96  * This function slices the input vectors in chuncks smaller than
97  * blocksize_add and calls add_core.
98  * @param x input matrix, size n * d
99  */
100  virtual void add (idx_t n, const float *x) = 0;
101 
102  /** Same as add, but stores xids instead of sequential ids.
103  *
104  * The default implementation fails with an assertion, as it is
105  * not supported by all indexes.
106  *
107  * @param xids if non-null, ids to store for the vectors (size n)
108  */
109  virtual void add_with_ids (idx_t n, const float * x, const long *xids);
110 
111  /** query n vectors of dimension d to the index.
112  *
113  * return at most k vectors. If there are not enough results for a
114  * query, the result array is padded with -1s.
115  *
116  * @param x input vectors to search, size n * d
117  * @param labels output labels of the NNs, size n*k
118  * @param distances output pairwise distances, size n*k
119  */
120  virtual void search (idx_t n, const float *x, idx_t k,
121  float *distances, idx_t *labels) const = 0;
122 
123  /** query n vectors of dimension d to the index.
124  *
125  * return all vectors with distance < radius. Note that many
126  * indexes do not implement the range_search (only the k-NN search
127  * is mandatory).
128  *
129  * @param x input vectors to search, size n * d
130  * @param radius search radius
131  * @param result result table
132  */
133  virtual void range_search (idx_t n, const float *x, float radius,
134  RangeSearchResult *result) const;
135 
136  /** return the indexes of the k vectors closest to the query x.
137  *
138  * This function is identical as search but only return labels of neighbors.
139  * @param x input vectors to search, size n * d
140  * @param labels output labels of the NNs, size n*k
141  */
142  void assign (idx_t n, const float * x, idx_t * labels, idx_t k = 1);
143 
144  /// removes all elements from the database.
145  virtual void reset() = 0;
146 
147  /** removes IDs from the index. Not supported by all indexes
148  */
149  virtual long remove_ids (const IDSelector & sel);
150 
151  /** Reconstruct a stored vector (or an approximation if lossy coding)
152  *
153  * this function may not be defined for some indexes
154  * @param key id of the vector to reconstruct
155  * @param recons reconstucted vector (size d)
156  */
157  virtual void reconstruct (idx_t key, float * recons) const;
158 
159 
160  /** Reconstruct vectors i0 to i0 + ni - 1
161  *
162  * this function may not be defined for some indexes
163  * @param recons reconstucted vector (size ni * d)
164  */
165  virtual void reconstruct_n (idx_t i0, idx_t ni, float *recons) const;
166 
167 
168  /** Computes a residual vector after indexing encoding.
169  *
170  * The residual vector is the difference between a vector and the
171  * reconstruction that can be decoded from its representation in
172  * the index. The residual can be used for multiple-stage indexing
173  * methods, like IndexIVF's methods.
174  *
175  * @param x input vector, size d
176  * @param residual output residual vector, size d
177  * @param key encoded index, as returned by search and assign
178  */
179  void compute_residual (const float * x, float * residual, idx_t key) const;
180 
181  /** Display the actual class name and some more info */
182  void display () const;
183 
184 
185 
186 };
187 
188 }
189 
190 
191 #endif
virtual void reset()=0
removes all elements from the database.
virtual void train(idx_t, const float *)
Definition: Index.h:89
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:23
virtual void add_with_ids(idx_t n, const float *x, const long *xids)
Definition: Index.cpp:30
int d
vector dimension
Definition: Index.h:64
virtual void reconstruct_n(idx_t i0, idx_t ni, float *recons) const
Definition: Index.cpp:49
virtual void add(idx_t n, const float *x)=0
long idx_t
all indices are this type
Definition: Index.h:62
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:65
bool verbose
verbosity level
Definition: Index.h:66
virtual long remove_ids(const IDSelector &sel)
Definition: Index.cpp:37
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
void display() const
Definition: Index.cpp:65
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:72
virtual void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const
Definition: Index.cpp:17
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:69
void compute_residual(const float *x, float *residual, idx_t key) const
Definition: Index.cpp:57
virtual void reconstruct(idx_t key, float *recons) const
Definition: Index.cpp:43
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:43