Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
Index.h
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved
11 // -*- c++ -*-
12 
13 #ifndef FAISS_INDEX_H
14 #define FAISS_INDEX_H
15 
16 
17 #include <cstdio>
18 #include <typeinfo>
19 #include <string>
20 #include <sstream>
21 
22 
23 /**
24  * @namespace faiss
25  *
26  * Throughout the library, vectors are provided as float * pointers.
27  * Most algorithms can be optimized when several vectors are processed
28  * (added/searched) together in a batch. In this case, they are passed
29  * in as a matrix. When n vectors of size d are provided as float * x,
30  * component j of vector i is
31  *
32  * x[ i * d + j ]
33  *
34  * where 0 <= i < n and 0 <= j < d. In other words, matrices are
35  * always compact. When specifying the size of the matrix, we call it
36  * an n*d matrix, which implies a row-major storage.
37  */
38 
39 
40 namespace faiss {
41 
42 
43 /// Some algorithms support both an inner product vetsion and a L2 search version.
44 enum MetricType {
45  METRIC_INNER_PRODUCT = 0,
46  METRIC_L2 = 1,
47 };
48 
49 
50 /// Forward declarations see AuxIndexStructures.h
51 struct IDSelector;
52 struct RangeSearchResult;
53 
54 /** Abstract structure for an index
55  *
56  * Supports adding vertices and searching them.
57  *
58  * Currently only asymmetric queries are supported:
59  * database-to-database queries are not implemented.
60  */
61 struct Index {
62  std::string index_typename;
63 
64  typedef long idx_t; ///< all indices are this type
65 
66  int d; ///< vector dimension
67  idx_t ntotal; ///< total nb of indexed vectors
68  bool verbose; ///< verbosity level
69 
70  /// set if the Index does not require training, or if training is done already
71  bool is_trained;
72 
73  /// type of metric this index uses for search
75 
76  explicit Index (idx_t d = 0, MetricType metric = METRIC_INNER_PRODUCT):
77  index_typename ("Undefined Index typename"),
78  d(d),
79  ntotal(0),
80  verbose(false),
81  is_trained(true),
82  metric_type (metric) {}
83 
84  virtual ~Index () { }
85 
86 
87  /** Perform training on a representative set of vectors
88  *
89  * @param n nb of training vectors
90  * @param x training vecors, size n * d
91  */
92  virtual void train (idx_t n, const float *x) {
93  // does nothing by default
94  }
95 
96  /** Add n vectors of dimension d to the index.
97  *
98  * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
99  * This function slices the input vectors in chuncks smaller than
100  * blocksize_add and calls add_core.
101  * @param x input matrix, size n * d
102  */
103  virtual void add (idx_t n, const float *x) = 0;
104 
105  /** Same as add, but stores xids instead of sequential ids.
106  *
107  * The default implementation fails with an assertion, as it is
108  * not supported by all indexes.
109  *
110  * @param xids if non-null, ids to store for the vectors (size n)
111  */
112  virtual void add_with_ids (idx_t n, const float * x, const long *xids);
113 
114  /** query n vectors of dimension d to the index.
115  *
116  * return at most k vectors. If there are not enough results for a
117  * query, the result array is padded with -1s.
118  *
119  * @param x input vectors to search, size n * d
120  * @param labels output labels of the NNs, size n*k
121  * @param distances output pairwise distances, size n*k
122  */
123  virtual void search (idx_t n, const float *x, idx_t k,
124  float *distances, idx_t *labels) const = 0;
125 
126  /** query n vectors of dimension d to the index.
127  *
128  * return all vectors with distance < radius. Note that many
129  * indexes do not implement the range_search (only the k-NN search
130  * is mandatory).
131  *
132  * @param x input vectors to search, size n * d
133  * @param radius search radius
134  * @param result result table
135  */
136  virtual void range_search (idx_t n, const float *x, float radius,
137  RangeSearchResult *result) const;
138 
139  /** return the indexes of the k vectors closest to the query x.
140  *
141  * This function is identical as search but only return labels of neighbors.
142  * @param x input vectors to search, size n * d
143  * @param labels output labels of the NNs, size n*k
144  */
145  void assign (idx_t n, const float * x, idx_t * labels, idx_t k = 1);
146 
147  /// removes all elements from the database.
148  virtual void reset() = 0;
149 
150  /** removes IDs from the index. Not supported by all indexes
151  */
152  virtual long remove_ids (const IDSelector & sel);
153 
154  /** Reconstruct a stored vector (or an approximation if lossy coding)
155  *
156  * this function may not be defined for some indexes
157  * @param key id of the vector to reconstruct
158  * @param recons reconstucted vector (size d)
159  */
160  virtual void reconstruct (idx_t key, float * recons) const;
161 
162 
163  /** Reconstruct vectors i0 to i0 + ni - 1
164  *
165  * this function may not be defined for some indexes
166  * @param recons reconstucted vector (size ni * d)
167  */
168  virtual void reconstruct_n (idx_t i0, idx_t ni, float *recons) const;
169 
170 
171  /** Computes a residual vector after indexing encoding.
172  *
173  * The residual vector is the difference between a vector and the
174  * reconstruction that can be decoded from its representation in
175  * the index. The residual can be used for multiple-stage indexing
176  * methods, like IndexIVF's methods.
177  *
178  * @param x input vector, size d
179  * @param residual output residual vector, size d
180  * @param key encoded index, as returned by search and assign
181  */
182  void compute_residual (const float * x, float * residual, idx_t key) const;
183 
184  /** Display the actual class name and some more info */
185  void display () const;
186 
187  /** Return the typeName of the index (which includes main parameters */
188  virtual std::string get_typename () const {
189  return index_typename; }
190 
191  virtual void set_typename () = 0 ;
192 
193 
194 };
195 
196 }
197 
198 
199 #endif
virtual void reset()=0
removes all elements from the database.
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:24
virtual void add_with_ids(idx_t n, const float *x, const long *xids)
Definition: Index.cpp:32
int d
vector dimension
Definition: Index.h:66
virtual void reconstruct_n(idx_t i0, idx_t ni, float *recons) const
Definition: Index.cpp:50
virtual void add(idx_t n, const float *x)=0
long idx_t
all indices are this type
Definition: Index.h:64
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
virtual std::string get_typename() const
Definition: Index.h:188
bool verbose
verbosity level
Definition: Index.h:68
virtual long remove_ids(const IDSelector &sel)
Definition: Index.cpp:38
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
void display() const
Definition: Index.cpp:66
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:74
virtual void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const
Definition: Index.cpp:18
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:71
void compute_residual(const float *x, float *residual, idx_t key) const
Definition: Index.cpp:58
virtual void reconstruct(idx_t key, float *recons) const
Definition: Index.cpp:45
virtual void train(idx_t n, const float *x)
Definition: Index.h:92
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:44