Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/IndexIVF.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 // -*- c++ -*-
11 
12 #ifndef FAISS_INDEX_IVF_H
13 #define FAISS_INDEX_IVF_H
14 
15 
16 #include <vector>
17 
18 
19 #include "Index.h"
20 #include "Clustering.h"
21 #include "Heap.h"
22 
23 
24 namespace faiss {
25 
26 
27 
28 /** Index based on a inverted file (IVF)
29  *
30  * In the inverted file, the quantizer (an Index instance) provides a
31  * quantization index for each vector to be added. The quantization
32  * index maps to a list (aka inverted list or posting list), where the
33  * id of the vector is then stored.
34  *
35  * At search time, the vector to be searched is also quantized, and
36  * only the list corresponding to the quantization index is
37  * searched. This speeds up the search by making it
38  * non-exhaustive. This can be relaxed using multi-probe search: a few
39  * (nprobe) quantization indices are selected and several inverted
40  * lists are visited.
41  *
42  * Sub-classes implement a post-filtering of the index that refines
43  * the distance estimation from the query to databse vectors.
44  */
45 struct IndexIVF: Index {
46  size_t nlist; ///< number of possible key values
47  size_t nprobe; ///< number of probes at query time
48 
49  Index * quantizer; ///< quantizer that maps vectors to inverted lists
50  bool quantizer_trains_alone; ///< just pass over the trainset to quantizer
51  bool own_fields; ///< whether object owns the quantizer
52 
53  ClusteringParameters cp; ///< to override default clustering params
54 
55  std::vector < std::vector<long> > ids; ///< Inverted lists for indexes
56 
57  /// map for direct access to the elements. Enables reconstruct().
59  std::vector <long> direct_map;
60 
61  /** The Inverted file takes a quantizer (an Index) on input,
62  * which implements the function mapping a vector to a list
63  * identifier. The pointer is borrowed: the quantizer should not
64  * be deleted while the IndexIVF is in use.
65  */
66  IndexIVF (Index * quantizer, size_t d, size_t nlist,
67  MetricType metric = METRIC_INNER_PRODUCT);
68 
69  void reset() override;
70 
71  /// Trains the quantizer and calls train_residual to train sub-quantizers
72  void train(idx_t n, const float* x) override;
73 
74  /// Quantizes x and calls add_with_key
75  void add(idx_t n, const float* x) override;
76 
77  /// Sub-classes that encode the residuals can train their encoders here
78  /// does nothing by default
79  virtual void train_residual (idx_t n, const float *x);
80 
81  /** moves the entries from another dataset to self. On output,
82  * other is empty. add_id is added to all moved ids (for
83  * sequential ids, this would be this->ntotal */
84  virtual void merge_from (IndexIVF &other, idx_t add_id);
85 
86  /** implemented by sub-classes */
87  virtual void merge_from_residuals (IndexIVF &other) = 0;
88 
89  ~IndexIVF() override;
90 
91  size_t get_list_size (size_t list_no) const
92  { return ids[list_no].size(); }
93 
94  /** intialize a direct map
95  *
96  * @param new_maintain_direct_map if true, create a direct map,
97  * else clear it
98  */
99  void make_direct_map (bool new_maintain_direct_map=true);
100 
101  /// 1= perfectly balanced, >1: imbalanced
102  double imbalance_factor () const;
103 
104  /// display some stats about the inverted lists
105  void print_stats () const;
106 
107  IndexIVF ();
108 };
109 
110 
112  size_t nq; // nb of queries run
113  size_t nlist; // nb of inverted lists scanned
114  size_t ndis; // nb of distancs computed
115  size_t npartial; // nb of bound computations (IndexIVFFlatIPBounds)
116 
117  IndexIVFFlatStats () {reset (); }
118  void reset ();
119 };
120 
121 // global var that collects them all
122 extern IndexIVFFlatStats indexIVFFlat_stats;
123 
124 
125 
126 
127 
128 /** Inverted file with stored vectors. Here the inverted file
129  * pre-selects the vectors to be searched, but they are not otherwise
130  * encoded.
131  */
133  /** Inverted list of original vectors. Each list is a nl * d
134  * matrix, where nl is the nb of vectors stored in the list. */
135  std::vector < std::vector<float> > vecs;
136 
137  IndexIVFFlat (
138  Index * quantizer, size_t d, size_t nlist_,
139  MetricType = METRIC_INNER_PRODUCT);
140 
141  /// same as add_with_ids, with precomputed coarse quantizer
142  virtual void add_core (idx_t n, const float * x, const long *xids,
143  const long *precomputed_idx);
144 
145  /// implemented for all IndexIVF* classes
146  void add_with_ids(idx_t n, const float* x, const long* xids) override;
147 
148  void search(
149  idx_t n,
150  const float* x,
151  idx_t k,
152  float* distances,
153  idx_t* labels) const override;
154 
155  /// perform search, without computing the assignment to the quantizer
156  void search_preassigned (idx_t n, const float *x, idx_t k,
157  const idx_t *assign,
158  float *distances, idx_t *labels) const;
159 
160  void range_search(
161  idx_t n,
162  const float* x,
163  float radius,
164  RangeSearchResult* result) const override;
165 
166  /** copy a subset of the entries index to the other index
167  *
168  * if subset_type == 0: copies ids in [a1, a2)
169  * if subset_type == 1: copies ids if id % a1 == a2
170  */
171  void copy_subset_to (IndexIVFFlat & other, int subset_type,
172  long a1, long a2) const;
173 
174  void reset() override;
175 
176  long remove_ids(const IDSelector& sel) override;
177 
178  /// Implementation of the search for the inner product metric
180  size_t nx, const float * x,
181  const long * keys,
182  float_minheap_array_t * res) const;
183 
184  /// Implementation of the search for the L2 metric
185  void search_knn_L2sqr (
186  size_t nx, const float * x,
187  const long * keys,
188  float_maxheap_array_t * res) const;
189 
190  /** Update a subset of vectors.
191  *
192  * The index must have a direct_map
193  *
194  * @param nv nb of vectors to update
195  * @param idx vector indices to update, size nv
196  * @param v vectors of new values, size nv*d
197  */
198  void update_vectors (int nv, idx_t *idx, const float *v);
199 
200  void reconstruct(idx_t key, float* recons) const override;
201 
202  void merge_from_residuals(IndexIVF& other) override;
203 
204  IndexIVFFlat () {}
205 };
206 
208 
209  /// nb of dimensions of pre-filter
210  size_t fsize;
211 
212  /// norm of remainder (dimensions fsize:d)
213  std::vector<std::vector<float> > part_norms;
214 
216  Index * quantizer, size_t d, size_t nlist,
217  size_t fsize);
218 
219  void add_core(
220  idx_t n,
221  const float* x,
222  const long* xids,
223  const long* precomputed_idx) override;
224 
225  void search(
226  idx_t n,
227  const float* x,
228  idx_t k,
229  float* distances,
230  idx_t* labels) const override;
231 };
232 
233 
234 
235 } // namespace faiss
236 
237 
238 
239 
240 
241 #endif
void search_preassigned(idx_t n, const float *x, idx_t k, const idx_t *assign, float *distances, idx_t *labels) const
perform search, without computing the assignment to the quantizer
Definition: IndexIVF.cpp:382
void search_knn_L2sqr(size_t nx, const float *x, const long *keys, float_maxheap_array_t *res) const
Implementation of the search for the L2 metric.
Definition: IndexIVF.cpp:322
double imbalance_factor() const
1= perfectly balanced, &gt;1: imbalanced
Definition: IndexIVF.cpp:139
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:47
void reconstruct(idx_t key, float *recons) const override
Definition: IndexIVF.cpp:572
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:23
bool quantizer_trains_alone
just pass over the trainset to quantizer
Definition: IndexIVF.h:50
void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const override
Definition: IndexIVF.cpp:400
void copy_subset_to(IndexIVFFlat &other, int subset_type, long a1, long a2) const
Definition: IndexIVF.cpp:462
void merge_from_residuals(IndexIVF &other) override
Definition: IndexIVF.cpp:450
virtual void train_residual(idx_t n, const float *x)
Definition: IndexIVF.cpp:131
long remove_ids(const IDSelector &sel) override
Definition: IndexIVF.cpp:540
std::vector< std::vector< long > > ids
Inverted lists for indexes.
Definition: IndexIVF.h:55
int d
vector dimension
Definition: Index.h:64
Index * quantizer
quantizer that maps vectors to inverted lists
Definition: IndexIVF.h:49
void train(idx_t n, const float *x) override
Trains the quantizer and calls train_residual to train sub-quantizers.
Definition: IndexIVF.cpp:102
ClusteringParameters cp
to override default clustering params
Definition: IndexIVF.h:53
void add_with_ids(idx_t n, const float *x, const long *xids) override
implemented for all IndexIVF* classes
Definition: IndexIVF.cpp:215
bool own_fields
whether object owns the quantizer
Definition: IndexIVF.h:51
long idx_t
all indices are this type
Definition: Index.h:62
void reset() override
removes all elements from the database.
Definition: IndexIVF.cpp:532
void reset() override
removes all elements from the database.
Definition: IndexIVF.cpp:93
void update_vectors(int nv, idx_t *idx, const float *v)
Definition: IndexIVF.cpp:487
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexIVF.cpp:693
std::vector< std::vector< float > > part_norms
norm of remainder (dimensions fsize:d)
Definition: IndexIVF.h:213
size_t fsize
nb of dimensions of pre-filter
Definition: IndexIVF.h:210
virtual void merge_from_residuals(IndexIVF &other)=0
void make_direct_map(bool new_maintain_direct_map=true)
Definition: IndexIVF.cpp:68
void print_stats() const
display some stats about the inverted lists
Definition: IndexIVF.cpp:148
void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx) override
same as add_with_ids, with precomputed coarse quantizer
Definition: IndexIVF.cpp:598
size_t nlist
number of possible key values
Definition: IndexIVF.h:46
void add(idx_t n, const float *x) override
Quantizes x and calls add_with_key.
Definition: IndexIVF.cpp:63
void search_knn_inner_product(size_t nx, const float *x, const long *keys, float_minheap_array_t *res) const
Implementation of the search for the inner product metric.
Definition: IndexIVF.cpp:271
bool maintain_direct_map
map for direct access to the elements. Enables reconstruct().
Definition: IndexIVF.h:58
virtual void merge_from(IndexIVF &other, idx_t add_id)
Definition: IndexIVF.cpp:168
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:43
std::vector< std::vector< float > > vecs
Definition: IndexIVF.h:135
virtual void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx)
same as add_with_ids, with precomputed coarse quantizer
Definition: IndexIVF.cpp:220
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexIVF.cpp:372