Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/IndexIVF.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the CC-by-NC license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 // -*- c++ -*-
11 
12 #ifndef FAISS_INDEX_IVF_H
13 #define FAISS_INDEX_IVF_H
14 
15 
16 #include <vector>
17 
18 
19 #include "Index.h"
20 #include "Clustering.h"
21 #include "Heap.h"
22 
23 
24 namespace faiss {
25 
26 
27 
28 /** Index based on a inverted file (IVF)
29  *
30  * In the inverted file, the quantizer (an Index instance) provides a
31  * quantization index for each vector to be added. The quantization
32  * index maps to a list (aka inverted list or posting list), where the
33  * id of the vector is then stored.
34  *
35  * At search time, the vector to be searched is also quantized, and
36  * only the list corresponding to the quantization index is
37  * searched. This speeds up the search by making it
38  * non-exhaustive. This can be relaxed using multi-probe search: a few
39  * (nprobe) quantization indices are selected and several inverted
40  * lists are visited.
41  *
42  * Sub-classes implement a post-filtering of the index that refines
43  * the distance estimation from the query to databse vectors.
44  */
45 struct IndexIVF: Index {
46  size_t nlist; ///< number of possible key values
47  size_t nprobe; ///< number of probes at query time
48 
49  Index * quantizer; ///< quantizer that maps vectors to inverted lists
50  bool quantizer_trains_alone; ///< just pass over the trainset to quantizer
51  bool own_fields; ///< whether object owns the quantizer
52 
53  ClusteringParameters cp; ///< to override default clustering params
54 
55  std::vector < std::vector<long> > ids; ///< Inverted lists for indexes
56 
57  /// map for direct access to the elements. Enables reconstruct().
59  std::vector <long> direct_map;
60 
61  /** The Inverted file takes a quantizer (an Index) on input,
62  * which implements the function mapping a vector to a list
63  * identifier. The pointer is borrowed: the quantizer should not
64  * be deleted while the IndexIVF is in use.
65  */
66  IndexIVF (Index * quantizer, size_t d, size_t nlist,
67  MetricType metric = METRIC_INNER_PRODUCT);
68 
69  void reset() override;
70 
71  /// Trains the quantizer and calls train_residual to train sub-quantizers
72  void train(idx_t n, const float* x) override;
73 
74  /// Quantizes x and calls add_with_key
75  void add(idx_t n, const float* x) override;
76 
77  /// Sub-classes that encode the residuals can train their encoders here
78  /// does nothing by default
79  virtual void train_residual (idx_t n, const float *x);
80 
81  /** moves the entries from another dataset to self. On output,
82  * other is empty. add_id is added to all moved ids (for
83  * sequential ids, this would be this->ntotal */
84  virtual void merge_from (IndexIVF &other, idx_t add_id);
85 
86  /** implemented by sub-classes */
87  virtual void merge_from_residuals (IndexIVF &other) = 0;
88 
89  ~IndexIVF() override;
90 
91  size_t get_list_size (size_t list_no) const
92  { return ids[list_no].size(); }
93 
94 
95  /// intialize a direct map
96  void make_direct_map ();
97 
98  /// 1= perfectly balanced, >1: imbalanced
99  double imbalance_factor () const;
100 
101  /// display some stats about the inverted lists
102  void print_stats () const;
103 
104  IndexIVF ();
105 };
106 
107 
109  size_t nq; // nb of queries run
110  size_t nlist; // nb of inverted lists scanned
111  size_t ndis; // nb of distancs computed
112  size_t npartial; // nb of bound computations (IndexIVFFlatIPBounds)
113 
114  IndexIVFFlatStats () {reset (); }
115  void reset ();
116 };
117 
118 // global var that collects them all
119 extern IndexIVFFlatStats indexIVFFlat_stats;
120 
121 
122 
123 
124 
125 /** Inverted file with stored vectors. Here the inverted file
126  * pre-selects the vectors to be searched, but they are not otherwise
127  * encoded.
128  */
130  /** Inverted list of original vectors. Each list is a nl * d
131  * matrix, where nl is the nb of vectors stored in the list. */
132  std::vector < std::vector<float> > vecs;
133 
134  IndexIVFFlat (
135  Index * quantizer, size_t d, size_t nlist_,
136  MetricType = METRIC_INNER_PRODUCT);
137 
138  /// same as add_with_ids, with precomputed coarse quantizer
139  virtual void add_core (idx_t n, const float * x, const long *xids,
140  const long *precomputed_idx);
141 
142  /// implemented for all IndexIVF* classes
143  void add_with_ids(idx_t n, const float* x, const long* xids) override;
144 
145  void search(
146  idx_t n,
147  const float* x,
148  idx_t k,
149  float* distances,
150  idx_t* labels) const override;
151 
152  /// perform search, without computing the assignment to the quantizer
153  void search_preassigned (idx_t n, const float *x, idx_t k,
154  const idx_t *assign,
155  float *distances, idx_t *labels) const;
156 
157  void range_search(
158  idx_t n,
159  const float* x,
160  float radius,
161  RangeSearchResult* result) const override;
162 
163  /** copy a subset of the entries index to the other index
164  *
165  * if subset_type == 0: copies ids in [a1, a2)
166  * if subset_type == 1: copies ids if id % a1 == a2
167  */
168  void copy_subset_to (IndexIVFFlat & other, int subset_type,
169  long a1, long a2) const;
170 
171  void reset() override;
172 
173  long remove_ids(const IDSelector& sel) override;
174 
175  /// Implementation of the search for the inner product metric
177  size_t nx, const float * x,
178  const long * keys,
179  float_minheap_array_t * res) const;
180 
181  /// Implementation of the search for the L2 metric
182  void search_knn_L2sqr (
183  size_t nx, const float * x,
184  const long * keys,
185  float_maxheap_array_t * res) const;
186 
187  void reconstruct(idx_t key, float* recons) const override;
188 
189  void merge_from_residuals(IndexIVF& other) override;
190 
191  IndexIVFFlat () {}
192 };
193 
195 
196  /// nb of dimensions of pre-filter
197  size_t fsize;
198 
199  /// norm of remainder (dimensions fsize:d)
200  std::vector<std::vector<float> > part_norms;
201 
203  Index * quantizer, size_t d, size_t nlist,
204  size_t fsize);
205 
206  void add_core(
207  idx_t n,
208  const float* x,
209  const long* xids,
210  const long* precomputed_idx) override;
211 
212  void search(
213  idx_t n,
214  const float* x,
215  idx_t k,
216  float* distances,
217  idx_t* labels) const override;
218 };
219 
220 
221 
222 } // namespace faiss
223 
224 
225 
226 
227 
228 #endif
void search_preassigned(idx_t n, const float *x, idx_t k, const idx_t *assign, float *distances, idx_t *labels) const
perform search, without computing the assignment to the quantizer
Definition: IndexIVF.cpp:375
void search_knn_L2sqr(size_t nx, const float *x, const long *keys, float_maxheap_array_t *res) const
Implementation of the search for the L2 metric.
Definition: IndexIVF.cpp:315
double imbalance_factor() const
1= perfectly balanced, &gt;1: imbalanced
Definition: IndexIVF.cpp:133
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:47
void reconstruct(idx_t key, float *recons) const override
Definition: IndexIVF.cpp:522
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:23
bool quantizer_trains_alone
just pass over the trainset to quantizer
Definition: IndexIVF.h:50
void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const override
Definition: IndexIVF.cpp:393
void copy_subset_to(IndexIVFFlat &other, int subset_type, long a1, long a2) const
Definition: IndexIVF.cpp:455
void merge_from_residuals(IndexIVF &other) override
Definition: IndexIVF.cpp:443
virtual void train_residual(idx_t n, const float *x)
Definition: IndexIVF.cpp:124
long remove_ids(const IDSelector &sel) override
Definition: IndexIVF.cpp:490
std::vector< std::vector< long > > ids
Inverted lists for indexes.
Definition: IndexIVF.h:55
int d
vector dimension
Definition: Index.h:64
Index * quantizer
quantizer that maps vectors to inverted lists
Definition: IndexIVF.h:49
void train(idx_t n, const float *x) override
Trains the quantizer and calls train_residual to train sub-quantizers.
Definition: IndexIVF.cpp:95
ClusteringParameters cp
to override default clustering params
Definition: IndexIVF.h:53
void add_with_ids(idx_t n, const float *x, const long *xids) override
implemented for all IndexIVF* classes
Definition: IndexIVF.cpp:210
bool own_fields
whether object owns the quantizer
Definition: IndexIVF.h:51
long idx_t
all indices are this type
Definition: Index.h:62
void reset() override
removes all elements from the database.
Definition: IndexIVF.cpp:482
void make_direct_map()
intialize a direct map
Definition: IndexIVF.cpp:68
void reset() override
removes all elements from the database.
Definition: IndexIVF.cpp:86
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexIVF.cpp:643
std::vector< std::vector< float > > part_norms
norm of remainder (dimensions fsize:d)
Definition: IndexIVF.h:200
size_t fsize
nb of dimensions of pre-filter
Definition: IndexIVF.h:197
virtual void merge_from_residuals(IndexIVF &other)=0
void print_stats() const
display some stats about the inverted lists
Definition: IndexIVF.cpp:142
void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx) override
same as add_with_ids, with precomputed coarse quantizer
Definition: IndexIVF.cpp:548
size_t nlist
number of possible key values
Definition: IndexIVF.h:46
void add(idx_t n, const float *x) override
Quantizes x and calls add_with_key.
Definition: IndexIVF.cpp:63
void search_knn_inner_product(size_t nx, const float *x, const long *keys, float_minheap_array_t *res) const
Implementation of the search for the inner product metric.
Definition: IndexIVF.cpp:264
bool maintain_direct_map
map for direct access to the elements. Enables reconstruct().
Definition: IndexIVF.h:58
virtual void merge_from(IndexIVF &other, idx_t add_id)
Definition: IndexIVF.cpp:162
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:43
std::vector< std::vector< float > > vecs
Definition: IndexIVF.h:132
virtual void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx)
same as add_with_ids, with precomputed coarse quantizer
Definition: IndexIVF.cpp:215
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexIVF.cpp:365