Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/IndexIVF.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 // -*- c++ -*-
11 
12 #ifndef FAISS_INDEX_IVF_H
13 #define FAISS_INDEX_IVF_H
14 
15 
16 #include <vector>
17 
18 
19 #include "Index.h"
20 #include "Clustering.h"
21 #include "Heap.h"
22 
23 
24 namespace faiss {
25 
26 
27 
28 /** Index based on a inverted file (IVF)
29  *
30  * In the inverted file, the quantizer (an Index instance) provides a
31  * quantization index for each vector to be added. The quantization
32  * index maps to a list (aka inverted list or posting list), where the
33  * id of the vector is then stored.
34  *
35  * At search time, the vector to be searched is also quantized, and
36  * only the list corresponding to the quantization index is
37  * searched. This speeds up the search by making it
38  * non-exhaustive. This can be relaxed using multi-probe search: a few
39  * (nprobe) quantization indices are selected and several inverted
40  * lists are visited.
41  *
42  * Sub-classes implement a post-filtering of the index that refines
43  * the distance estimation from the query to databse vectors.
44  */
45 struct IndexIVF: Index {
46  size_t nlist; ///< number of possible key values
47  size_t nprobe; ///< number of probes at query time
48 
49  Index * quantizer; ///< quantizer that maps vectors to inverted lists
50 
51  /**
52  * = 0: use the quantizer as index in a kmeans training
53  * = 1: just pass on the training set to the train() of the quantizer
54  * = 2: kmeans training on a flat index + add the centroids to the quantizer
55  */
57  bool own_fields; ///< whether object owns the quantizer
58 
59  ClusteringParameters cp; ///< to override default clustering params
60  Index *clustering_index; ///< to override index used during clustering
61 
62  std::vector < std::vector<long> > ids; ///< Inverted lists for indexes
63 
64  size_t code_size; ///< code size per vector in bytes
65  std::vector < std::vector<uint8_t> > codes; // binary codes, size nlist
66 
67  /// map for direct access to the elements. Enables reconstruct().
69  std::vector <long> direct_map;
70 
71  /** The Inverted file takes a quantizer (an Index) on input,
72  * which implements the function mapping a vector to a list
73  * identifier. The pointer is borrowed: the quantizer should not
74  * be deleted while the IndexIVF is in use.
75  */
76  IndexIVF (Index * quantizer, size_t d, size_t nlist,
77  MetricType metric = METRIC_INNER_PRODUCT);
78 
79  void reset() override;
80 
81  /// Trains the quantizer and calls train_residual to train sub-quantizers
82  void train(idx_t n, const float* x) override;
83 
84  /// Quantizes x and calls add_with_key
85  void add(idx_t n, const float* x) override;
86 
87  /// Sub-classes that encode the residuals can train their encoders here
88  /// does nothing by default
89  virtual void train_residual (idx_t n, const float *x);
90 
91 
92  /** search a set of vectors, that are pre-quantized by the IVF
93  * quantizer. Fill in the corresponding heaps with the query
94  * results. search() calls this.
95  *
96  * @param n nb of vectors to query
97  * @param x query vectors, size nx * d
98  * @param assign coarse quantization indices, size nx * nprobe
99  * @param centroid_dis
100  * distances to coarse centroids, size nx * nprobe
101  * @param distance
102  * output distances, size n * k
103  * @param labels output labels, size n * k
104  * @param store_pairs store inv list index + inv list offset
105  * instead in upper/lower 32 bit of result,
106  * instead of ids (used for reranking).
107  */
108  virtual void search_preassigned (idx_t n, const float *x, idx_t k,
109  const idx_t *assign,
110  const float *centroid_dis,
111  float *distances, idx_t *labels,
112  bool store_pairs) const = 0;
113 
114  /** assign the vectors, then call search_preassign */
115  virtual void search (idx_t n, const float *x, idx_t k,
116  float *distances, idx_t *labels) const override;
117 
118 
119  /// Dataset manipulation functions
120 
121  long remove_ids(const IDSelector& sel) override;
122 
123  /** moves the entries from another dataset to self. On output,
124  * other is empty. add_id is added to all moved ids (for
125  * sequential ids, this would be this->ntotal */
126  virtual void merge_from (IndexIVF &other, idx_t add_id);
127 
128  /** copy a subset of the entries index to the other index
129  *
130  * if subset_type == 0: copies ids in [a1, a2)
131  * if subset_type == 1: copies ids if id % a1 == a2
132  * if subset_type == 2: copies inverted lists such that a1
133  * elements are left before and a2 elements are after
134  */
135  virtual void copy_subset_to (IndexIVF & other, int subset_type,
136  long a1, long a2) const;
137 
138  ~IndexIVF() override;
139 
140  size_t get_list_size (size_t list_no) const
141  { return ids[list_no].size(); }
142 
143  /** intialize a direct map
144  *
145  * @param new_maintain_direct_map if true, create a direct map,
146  * else clear it
147  */
148  void make_direct_map (bool new_maintain_direct_map=true);
149 
150  /// 1= perfectly balanced, >1: imbalanced
151  double imbalance_factor () const;
152 
153  /// display some stats about the inverted lists
154  void print_stats () const;
155 
156  IndexIVF ();
157 };
158 
159 
161  size_t nq; // nb of queries run
162  size_t nlist; // nb of inverted lists scanned
163  size_t ndis; // nb of distancs computed
164  size_t npartial; // nb of bound computations (IndexIVFFlatIPBounds)
165 
166  IndexIVFFlatStats () {reset (); }
167  void reset ();
168 };
169 
170 // global var that collects them all
171 extern IndexIVFFlatStats indexIVFFlat_stats;
172 
173 
174 
175 
176 
177 /** Inverted file with stored vectors. Here the inverted file
178  * pre-selects the vectors to be searched, but they are not otherwise
179  * encoded, the code array just contains the raw float entries.
180  */
182 
183  IndexIVFFlat (
184  Index * quantizer, size_t d, size_t nlist_,
185  MetricType = METRIC_INNER_PRODUCT);
186 
187  /// same as add_with_ids, with precomputed coarse quantizer
188  virtual void add_core (idx_t n, const float * x, const long *xids,
189  const long *precomputed_idx);
190 
191  /// implemented for all IndexIVF* classes
192  void add_with_ids(idx_t n, const float* x, const long* xids) override;
193 
194  void search_preassigned (idx_t n, const float *x, idx_t k,
195  const idx_t *assign,
196  const float *centroid_dis,
197  float *distances, idx_t *labels,
198  bool store_pairs) const override;
199 
200  void range_search(
201  idx_t n,
202  const float* x,
203  float radius,
204  RangeSearchResult* result) const override;
205 
206  /** Update a subset of vectors.
207  *
208  * The index must have a direct_map
209  *
210  * @param nv nb of vectors to update
211  * @param idx vector indices to update, size nv
212  * @param v vectors of new values, size nv*d
213  */
214  void update_vectors (int nv, idx_t *idx, const float *v);
215 
216  void reconstruct(idx_t key, float* recons) const override;
217 
218  IndexIVFFlat () {}
219 };
220 
221 
222 
223 } // namespace faiss
224 
225 
226 
227 
228 
229 #endif
virtual void search_preassigned(idx_t n, const float *x, idx_t k, const idx_t *assign, const float *centroid_dis, float *distances, idx_t *labels, bool store_pairs) const =0
double imbalance_factor() const
1= perfectly balanced, &gt;1: imbalanced
Definition: IndexIVF.cpp:216
virtual void copy_subset_to(IndexIVF &other, int subset_type, long a1, long a2) const
Definition: IndexIVF.cpp:272
char quantizer_trains_alone
Definition: IndexIVF.h:56
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:47
void reconstruct(idx_t key, float *recons) const override
Definition: IndexIVF.cpp:646
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:23
void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const override
Definition: IndexIVF.cpp:548
virtual void train_residual(idx_t n, const float *x)
Definition: IndexIVF.cpp:208
std::vector< std::vector< long > > ids
Inverted lists for indexes.
Definition: IndexIVF.h:62
int d
vector dimension
Definition: Index.h:64
Index * quantizer
quantizer that maps vectors to inverted lists
Definition: IndexIVF.h:49
void train(idx_t n, const float *x) override
Trains the quantizer and calls train_residual to train sub-quantizers.
Definition: IndexIVF.cpp:157
ClusteringParameters cp
to override default clustering params
Definition: IndexIVF.h:59
void add_with_ids(idx_t n, const float *x, const long *xids) override
implemented for all IndexIVF* classes
Definition: IndexIVF.cpp:361
Index * clustering_index
to override index used during clustering
Definition: IndexIVF.h:60
bool own_fields
whether object owns the quantizer
Definition: IndexIVF.h:57
long idx_t
all indices are this type
Definition: Index.h:62
void reset() override
removes all elements from the database.
Definition: IndexIVF.cpp:112
void update_vectors(int nv, idx_t *idx, const float *v)
Definition: IndexIVF.cpp:598
void make_direct_map(bool new_maintain_direct_map=true)
Definition: IndexIVF.cpp:71
void print_stats() const
display some stats about the inverted lists
Definition: IndexIVF.cpp:225
size_t nlist
number of possible key values
Definition: IndexIVF.h:46
void add(idx_t n, const float *x) override
Quantizes x and calls add_with_key.
Definition: IndexIVF.cpp:66
long remove_ids(const IDSelector &sel) override
Dataset manipulation functions.
Definition: IndexIVF.cpp:123
bool maintain_direct_map
map for direct access to the elements. Enables reconstruct().
Definition: IndexIVF.h:68
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexIVF.cpp:96
void search_preassigned(idx_t n, const float *x, idx_t k, const idx_t *assign, const float *centroid_dis, float *distances, idx_t *labels, bool store_pairs) const override
Definition: IndexIVF.cpp:529
virtual void merge_from(IndexIVF &other, idx_t add_id)
Definition: IndexIVF.cpp:245
size_t code_size
code size per vector in bytes
Definition: IndexIVF.h:64
MetricType
Some algorithms support both an inner product version and a L2 search version.
Definition: Index.h:43
virtual void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx)
same as add_with_ids, with precomputed coarse quantizer
Definition: IndexIVF.cpp:366