Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/IndexIVF.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 // -*- c++ -*-
11 
12 #ifndef FAISS_INDEX_IVF_H
13 #define FAISS_INDEX_IVF_H
14 
15 
16 #include <vector>
17 
18 
19 #include "Index.h"
20 #include "Clustering.h"
21 #include "Heap.h"
22 
23 
24 namespace faiss {
25 
26 
27 
28 /** Index based on a inverted file (IVF)
29  *
30  * In the inverted file, the quantizer (an Index instance) provides a
31  * quantization index for each vector to be added. The quantization
32  * index maps to a list (aka inverted list or posting list), where the
33  * id of the vector is then stored.
34  *
35  * At search time, the vector to be searched is also quantized, and
36  * only the list corresponding to the quantization index is
37  * searched. This speeds up the search by making it
38  * non-exhaustive. This can be relaxed using multi-probe search: a few
39  * (nprobe) quantization indices are selected and several inverted
40  * lists are visited.
41  *
42  * Sub-classes implement a post-filtering of the index that refines
43  * the distance estimation from the query to databse vectors.
44  */
45 struct IndexIVF: Index {
46  size_t nlist; ///< number of possible key values
47  size_t nprobe; ///< number of probes at query time
48 
49  Index * quantizer; ///< quantizer that maps vectors to inverted lists
50  bool quantizer_trains_alone; ///< just pass over the trainset to quantizer
51  bool own_fields; ///< whether object owns the quantizer
52 
53  ClusteringParameters cp; ///< to override default clustering params
54 
55  std::vector < std::vector<long> > ids; ///< Inverted lists for indexes
56 
57  size_t code_size; ///< code size per vector in bytes
58  std::vector < std::vector<uint8_t> > codes; // binary codes, size nlist
59 
60  /// map for direct access to the elements. Enables reconstruct().
62  std::vector <long> direct_map;
63 
64  /** The Inverted file takes a quantizer (an Index) on input,
65  * which implements the function mapping a vector to a list
66  * identifier. The pointer is borrowed: the quantizer should not
67  * be deleted while the IndexIVF is in use.
68  */
69  IndexIVF (Index * quantizer, size_t d, size_t nlist,
70  MetricType metric = METRIC_INNER_PRODUCT);
71 
72  void reset() override;
73 
74  /// Trains the quantizer and calls train_residual to train sub-quantizers
75  void train(idx_t n, const float* x) override;
76 
77  /// Quantizes x and calls add_with_key
78  void add(idx_t n, const float* x) override;
79 
80  /// Sub-classes that encode the residuals can train their encoders here
81  /// does nothing by default
82  virtual void train_residual (idx_t n, const float *x);
83 
84 
85  /** search a set of vectors, that are pre-quantized by the IVF
86  * quantizer. Fill in the corresponding heaps with the query
87  * results. search() calls this.
88  *
89  * @param n nb of vectors to query
90  * @param x query vectors, size nx * d
91  * @param assign coarse quantization indices, size nx * nprobe
92  * @param centroid_dis
93  * distances to coarse centroids, size nx * nprobe
94  * @param distance
95  * output distances, size n * k
96  * @param labels output labels, size n * k
97  * @param store_pairs store inv list index + inv list offset
98  * instead in upper/lower 32 bit of result,
99  * instead of ids (used for reranking).
100  */
101  virtual void search_preassigned (idx_t n, const float *x, idx_t k,
102  const idx_t *assign,
103  const float *centroid_dis,
104  float *distances, idx_t *labels,
105  bool store_pairs) const = 0;
106 
107  /** assign the vectors, then call search_preassign */
108  virtual void search (idx_t n, const float *x, idx_t k,
109  float *distances, idx_t *labels) const override;
110 
111 
112  /// Dataset manipulation functions
113 
114  long remove_ids(const IDSelector& sel) override;
115 
116  /** moves the entries from another dataset to self. On output,
117  * other is empty. add_id is added to all moved ids (for
118  * sequential ids, this would be this->ntotal */
119  virtual void merge_from (IndexIVF &other, idx_t add_id);
120 
121  /** copy a subset of the entries index to the other index
122  *
123  * if subset_type == 0: copies ids in [a1, a2)
124  * if subset_type == 1: copies ids if id % a1 == a2
125  * if subset_type == 2: copies inverted lists such that a1
126  * elements are left before and a2 elements are after
127  */
128  virtual void copy_subset_to (IndexIVF & other, int subset_type,
129  long a1, long a2) const;
130 
131  ~IndexIVF() override;
132 
133  size_t get_list_size (size_t list_no) const
134  { return ids[list_no].size(); }
135 
136  /** intialize a direct map
137  *
138  * @param new_maintain_direct_map if true, create a direct map,
139  * else clear it
140  */
141  void make_direct_map (bool new_maintain_direct_map=true);
142 
143  /// 1= perfectly balanced, >1: imbalanced
144  double imbalance_factor () const;
145 
146  /// display some stats about the inverted lists
147  void print_stats () const;
148 
149  IndexIVF ();
150 };
151 
152 
154  size_t nq; // nb of queries run
155  size_t nlist; // nb of inverted lists scanned
156  size_t ndis; // nb of distancs computed
157  size_t npartial; // nb of bound computations (IndexIVFFlatIPBounds)
158 
159  IndexIVFFlatStats () {reset (); }
160  void reset ();
161 };
162 
163 // global var that collects them all
164 extern IndexIVFFlatStats indexIVFFlat_stats;
165 
166 
167 
168 
169 
170 /** Inverted file with stored vectors. Here the inverted file
171  * pre-selects the vectors to be searched, but they are not otherwise
172  * encoded, the code array just contains the raw float entries.
173  */
175 
176  IndexIVFFlat (
177  Index * quantizer, size_t d, size_t nlist_,
178  MetricType = METRIC_INNER_PRODUCT);
179 
180  /// same as add_with_ids, with precomputed coarse quantizer
181  virtual void add_core (idx_t n, const float * x, const long *xids,
182  const long *precomputed_idx);
183 
184  /// implemented for all IndexIVF* classes
185  void add_with_ids(idx_t n, const float* x, const long* xids) override;
186 
187  void search_preassigned (idx_t n, const float *x, idx_t k,
188  const idx_t *assign,
189  const float *centroid_dis,
190  float *distances, idx_t *labels,
191  bool store_pairs) const override;
192 
193  void range_search(
194  idx_t n,
195  const float* x,
196  float radius,
197  RangeSearchResult* result) const override;
198 
199  /** Update a subset of vectors.
200  *
201  * The index must have a direct_map
202  *
203  * @param nv nb of vectors to update
204  * @param idx vector indices to update, size nv
205  * @param v vectors of new values, size nv*d
206  */
207  void update_vectors (int nv, idx_t *idx, const float *v);
208 
209  void reconstruct(idx_t key, float* recons) const override;
210 
211  IndexIVFFlat () {}
212 };
213 
214 
215 
216 } // namespace faiss
217 
218 
219 
220 
221 
222 #endif
virtual void search_preassigned(idx_t n, const float *x, idx_t k, const idx_t *assign, const float *centroid_dis, float *distances, idx_t *labels, bool store_pairs) const =0
double imbalance_factor() const
1= perfectly balanced, &gt;1: imbalanced
Definition: IndexIVF.cpp:192
virtual void copy_subset_to(IndexIVF &other, int subset_type, long a1, long a2) const
Definition: IndexIVF.cpp:248
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:47
void reconstruct(idx_t key, float *recons) const override
Definition: IndexIVF.cpp:610
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
Definition: Index.cpp:23
bool quantizer_trains_alone
just pass over the trainset to quantizer
Definition: IndexIVF.h:50
void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const override
Definition: IndexIVF.cpp:512
virtual void train_residual(idx_t n, const float *x)
Definition: IndexIVF.cpp:184
std::vector< std::vector< long > > ids
Inverted lists for indexes.
Definition: IndexIVF.h:55
int d
vector dimension
Definition: Index.h:64
Index * quantizer
quantizer that maps vectors to inverted lists
Definition: IndexIVF.h:49
void train(idx_t n, const float *x) override
Trains the quantizer and calls train_residual to train sub-quantizers.
Definition: IndexIVF.cpp:155
ClusteringParameters cp
to override default clustering params
Definition: IndexIVF.h:53
void add_with_ids(idx_t n, const float *x, const long *xids) override
implemented for all IndexIVF* classes
Definition: IndexIVF.cpp:325
bool own_fields
whether object owns the quantizer
Definition: IndexIVF.h:51
long idx_t
all indices are this type
Definition: Index.h:62
void reset() override
removes all elements from the database.
Definition: IndexIVF.cpp:110
void update_vectors(int nv, idx_t *idx, const float *v)
Definition: IndexIVF.cpp:562
void make_direct_map(bool new_maintain_direct_map=true)
Definition: IndexIVF.cpp:69
void print_stats() const
display some stats about the inverted lists
Definition: IndexIVF.cpp:201
size_t nlist
number of possible key values
Definition: IndexIVF.h:46
void add(idx_t n, const float *x) override
Quantizes x and calls add_with_key.
Definition: IndexIVF.cpp:64
long remove_ids(const IDSelector &sel) override
Dataset manipulation functions.
Definition: IndexIVF.cpp:121
bool maintain_direct_map
map for direct access to the elements. Enables reconstruct().
Definition: IndexIVF.h:61
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexIVF.cpp:94
void search_preassigned(idx_t n, const float *x, idx_t k, const idx_t *assign, const float *centroid_dis, float *distances, idx_t *labels, bool store_pairs) const override
Definition: IndexIVF.cpp:493
virtual void merge_from(IndexIVF &other, idx_t add_id)
Definition: IndexIVF.cpp:221
size_t code_size
code size per vector in bytes
Definition: IndexIVF.h:57
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:43
virtual void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx)
same as add_with_ids, with precomputed coarse quantizer
Definition: IndexIVF.cpp:330