Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/IndexIVF.h
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 // -*- c++ -*-
12 
13 #ifndef FAISS_INDEX_IVF_H
14 #define FAISS_INDEX_IVF_H
15 
16 
17 #include <vector>
18 
19 
20 #include "Index.h"
21 #include "Clustering.h"
22 #include "Heap.h"
23 
24 
25 namespace faiss {
26 
27 
28 
29 /** Index based on a inverted file (IVF)
30  *
31  * In the inverted file, the quantizer (an Index instance) provides a
32  * quantization index for each vector to be added. The quantization
33  * index maps to a list (aka inverted list or posting list), where the
34  * id of the vector is then stored.
35  *
36  * At search time, the vector to be searched is also quantized, and
37  * only the list corresponding to the quantization index is
38  * searched. This speeds up the search by making it
39  * non-exhaustive. This can be relaxed using multi-probe search: a few
40  * (nprobe) quantization indices are selected and several inverted
41  * lists are visited.
42  *
43  * Sub-classes implement a post-filtering of the index that refines
44  * the distance estimation from the query to databse vectors.
45  */
46 struct IndexIVF: Index {
47  size_t nlist; ///< number of possible key values
48  size_t nprobe; ///< number of probes at query time
49 
50  Index * quantizer; ///< quantizer that maps vectors to inverted lists
51  bool quantizer_trains_alone; ///< just pass over the trainset to quantizer
52  bool own_fields; ///< whether object owns the quantizer
53 
54  ClusteringParameters cp; ///< to override default clustering params
55 
56  std::vector < std::vector<long> > ids; ///< Inverted lists for indexes
57 
58  /// map for direct access to the elements. Enables reconstruct().
60  std::vector <long> direct_map;
61 
62  /** The Inverted file takes a quantizer (an Index) on input,
63  * which implements the function mapping a vector to a list
64  * identifier. The pointer is borrowed: the quantizer should not
65  * be deleted while the IndexIVF is in use.
66  */
67  IndexIVF (Index * quantizer, size_t d, size_t nlist,
68  MetricType metric = METRIC_INNER_PRODUCT);
69 
70  virtual void reset () override;
71 
72  /// Trains the quantizer and calls train_residual to train sub-quantizers
73  virtual void train (idx_t n, const float *x) override;
74 
75  /// Quantizes x and calls add_with_key
76  virtual void add (idx_t n, const float *x) override;
77 
78 
79 
80  /// Sub-classes that encode the residuals can train their encoders here
81  /// does nothing by default
82  virtual void train_residual (idx_t n, const float *x);
83 
84  /** moves the entries from another dataset to self. On output,
85  * other is empty. add_id is added to all moved ids (for
86  * sequential ids, this would be this->ntotal */
87  virtual void merge_from (IndexIVF &other, idx_t add_id);
88 
89  /** implemented by sub-classes */
90  virtual void merge_from_residuals (IndexIVF &other) = 0;
91 
92  virtual ~IndexIVF();
93 
94  /// intialize a direct map
95  void make_direct_map ();
96 
97  /// 1= perfectly balanced, >1: imbalanced
98  double imbalance_factor () const;
99 
100  /// display some stats about the inverted lists
101  void print_stats () const;
102 
103  IndexIVF ();
104 };
105 
106 
108  size_t nq; // nb of queries run
109  size_t nlist; // nb of inverted lists scanned
110  size_t ndis; // nb of distancs computed
111  size_t npartial; // nb of bound computations (IndexIVFFlatIPBounds)
112 
113  IndexIVFFlatStats () {reset (); }
114  void reset ();
115 };
116 
117 // global var that collects them all
118 extern IndexIVFFlatStats indexIVFFlat_stats;
119 
120 
121 
122 
123 
124 /** Inverted file with stored vectors. Here the inverted file
125  * pre-selects the vectors to be searched, but they are not otherwise
126  * encoded.
127  */
129  /** Inverted list of original vectors. Each list is a nl * d
130  * matrix, where nl is the nb of vectors stored in the list. */
131  std::vector < std::vector<float> > vecs;
132 
133  IndexIVFFlat (
134  Index * quantizer, size_t d, size_t nlist_,
135  MetricType = METRIC_INNER_PRODUCT);
136 
137  /** Return the typeName of the index (which includes main parameters */
138  virtual void set_typename () override;
139 
140 
141  /// same as add_with_ids, with precomputed coarse quantizer
142  virtual void add_core (idx_t n, const float * x, const long *xids,
143  const long *precomputed_idx);
144 
145 
146  /// implemented for all IndexIVF* classes
147  virtual void add_with_ids (idx_t n, const float * x, const long *xids)
148  override;
149 
150  virtual void search (
151  idx_t n, const float *x, idx_t k,
152  float *distances, idx_t *labels) const override;
153 
154  virtual void range_search (
155  idx_t n, const float *x, float radius,
156  RangeSearchResult *result) const override;
157 
158  /** copy a subset of the entries index to the other index
159  *
160  * if subset_type == 0: copies ids in [a1, a2)
161  * if subset_type == 1: copies ids if id % a1 == a2
162  */
163  void copy_subset_to (IndexIVFFlat & other, int subset_type,
164  long a1, long a2) const;
165 
166 
167  virtual void reset() override;
168 
169  virtual long remove_ids (const IDSelector & sel) override;
170 
171  /// Implementation of the search for the inner product metric
173  size_t nx, const float * x,
174  const long * keys,
175  float_minheap_array_t * res) const;
176 
177  /// Implementation of the search for the L2 metric
178  void search_knn_L2sqr (
179  size_t nx, const float * x,
180  const long * keys,
181  float_maxheap_array_t * res) const;
182 
183  virtual void reconstruct (idx_t key, float * recons) const override;
184 
185  virtual void merge_from_residuals (IndexIVF &other) override;
186 
187 
188  IndexIVFFlat () {}
189 };
190 
192 
193  /// nb of dimensions of pre-filter
194  size_t fsize;
195 
196  /// norm of remainder (dimensions fsize:d)
197  std::vector<std::vector<float> > part_norms;
198 
200  Index * quantizer, size_t d, size_t nlist,
201  size_t fsize);
202 
203  virtual void add_core (idx_t n, const float * x, const long *xids,
204  const long *precomputed_idx) override;
205 
206  virtual void search (
207  idx_t n, const float *x, idx_t k,
208  float *distances, idx_t *labels) const override;
209 
210 
211 
212 };
213 
214 
215 
216 } // namespace faiss
217 
218 
219 
220 
221 
222 #endif
void search_knn_L2sqr(size_t nx, const float *x, const long *keys, float_maxheap_array_t *res) const
Implementation of the search for the L2 metric.
Definition: IndexIVF.cpp:329
double imbalance_factor() const
1= perfectly balanced, &gt;1: imbalanced
Definition: IndexIVF.cpp:134
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:48
virtual void reconstruct(idx_t key, float *recons) const override
Definition: IndexIVF.cpp:523
bool quantizer_trains_alone
just pass over the trainset to quantizer
Definition: IndexIVF.h:51
virtual void set_typename() override
Definition: IndexIVF.cpp:207
virtual void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const override
Definition: IndexIVF.cpp:400
void copy_subset_to(IndexIVFFlat &other, int subset_type, long a1, long a2) const
Definition: IndexIVF.cpp:456
virtual void merge_from_residuals(IndexIVF &other) override
Definition: IndexIVF.cpp:444
virtual void train_residual(idx_t n, const float *x)
Definition: IndexIVF.cpp:125
virtual long remove_ids(const IDSelector &sel) override
Definition: IndexIVF.cpp:491
std::vector< std::vector< long > > ids
Inverted lists for indexes.
Definition: IndexIVF.h:56
int d
vector dimension
Definition: Index.h:66
Index * quantizer
quantizer that maps vectors to inverted lists
Definition: IndexIVF.h:50
virtual void train(idx_t n, const float *x) override
Trains the quantizer and calls train_residual to train sub-quantizers.
Definition: IndexIVF.cpp:96
ClusteringParameters cp
to override default clustering params
Definition: IndexIVF.h:54
virtual void add_with_ids(idx_t n, const float *x, const long *xids) override
implemented for all IndexIVF* classes
Definition: IndexIVF.cpp:224
bool own_fields
whether object owns the quantizer
Definition: IndexIVF.h:52
long idx_t
all indices are this type
Definition: Index.h:64
virtual void reset() override
removes all elements from the database.
Definition: IndexIVF.cpp:483
void make_direct_map()
intialize a direct map
Definition: IndexIVF.cpp:69
virtual void reset() override
removes all elements from the database.
Definition: IndexIVF.cpp:87
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexIVF.cpp:644
std::vector< std::vector< float > > part_norms
norm of remainder (dimensions fsize:d)
Definition: IndexIVF.h:197
size_t fsize
nb of dimensions of pre-filter
Definition: IndexIVF.h:194
virtual void merge_from_residuals(IndexIVF &other)=0
void print_stats() const
display some stats about the inverted lists
Definition: IndexIVF.cpp:143
virtual void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx) override
same as add_with_ids, with precomputed coarse quantizer
Definition: IndexIVF.cpp:548
size_t nlist
number of possible key values
Definition: IndexIVF.h:47
virtual void add(idx_t n, const float *x) override
Quantizes x and calls add_with_key.
Definition: IndexIVF.cpp:64
void search_knn_inner_product(size_t nx, const float *x, const long *keys, float_minheap_array_t *res) const
Implementation of the search for the inner product metric.
Definition: IndexIVF.cpp:278
bool maintain_direct_map
map for direct access to the elements. Enables reconstruct().
Definition: IndexIVF.h:59
virtual void merge_from(IndexIVF &other, idx_t add_id)
Definition: IndexIVF.cpp:163
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:44
std::vector< std::vector< float > > vecs
Definition: IndexIVF.h:131
virtual void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx)
same as add_with_ids, with precomputed coarse quantizer
Definition: IndexIVF.cpp:229
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexIVF.cpp:379