Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
IndexIVF.h
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 // -*- c++ -*-
12 
13 #ifndef FAISS_INDEX_IVF_H
14 #define FAISS_INDEX_IVF_H
15 
16 
17 #include <vector>
18 
19 
20 #include "Index.h"
21 #include "Clustering.h"
22 #include "Heap.h"
23 
24 
25 namespace faiss {
26 
27 
28 
29 /** Index based on a inverted file (IVF)
30  *
31  * In the inverted file, the quantizer (an Index instance) provides a
32  * quantization index for each vector to be added. The quantization
33  * index maps to a list (aka inverted list or posting list), where the
34  * id of the vector is then stored.
35  *
36  * At search time, the vector to be searched is also quantized, and
37  * only the list corresponding to the quantization index is
38  * searched. This speeds up the search by making it
39  * non-exhaustive. This can be relaxed using multi-probe search: a few
40  * (nprobe) quantization indices are selected and several inverted
41  * lists are visited.
42  *
43  * Sub-classes implement a post-filtering of the index that refines
44  * the distance estimation from the query to databse vectors.
45  */
46 struct IndexIVF: Index {
47  size_t nlist; ///< number of possible key values
48  size_t nprobe; ///< number of probes at query time
49 
50  Index * quantizer; ///< quantizer that maps vectors to inverted lists
51  bool quantizer_trains_alone; ///< just pass over the trainset to quantizer
52  bool own_fields; ///< whether object owns the quantizer
53 
54  ClusteringParameters cp; ///< to override default clustering params
55 
56  std::vector < std::vector<long> > ids; ///< Inverted lists for indexes
57 
58  /// map for direct access to the elements. Enables reconstruct().
60  std::vector <long> direct_map;
61 
62  /** The Inverted file takes a quantizer (an Index) on input,
63  * which implements the function mapping a vector to a list
64  * identifier. The pointer is borrowed: the quantizer should not
65  * be deleted while the IndexIVF is in use.
66  */
67  IndexIVF (Index * quantizer, size_t d, size_t nlist,
68  MetricType metric = METRIC_INNER_PRODUCT);
69 
70  virtual void reset () override;
71 
72  /// Trains the quantizer and calls train_residual to train sub-quantizers
73  virtual void train (idx_t n, const float *x) override;
74 
75  /// Quantizes x and calls add_with_key
76  virtual void add (idx_t n, const float *x) override;
77 
78 
79 
80  /// Sub-classes that encode the residuals can train their encoders here
81  /// does nothing by default
82  virtual void train_residual (idx_t n, const float *x);
83 
84  /** moves the entries from another dataset to self. On output,
85  * other is empty. add_id is added to all moved ids (for
86  * sequential ids, this would be this->ntotal */
87  virtual void merge_from (IndexIVF &other, idx_t add_id);
88 
89  /** implemented by sub-classes */
90  virtual void merge_from_residuals (IndexIVF &other) = 0;
91 
92  virtual ~IndexIVF();
93 
94  /// intialize a direct map
95  void make_direct_map ();
96 
97  /// 1= perfectly balanced, >1: imbalanced
98  double imbalance_factor () const;
99 
100  /// display some stats about the inverted lists
101  void print_stats () const;
102 
103  IndexIVF ();
104 };
105 
106 
107 
108 
109 /** Inverted file with stored vectors. Here the inverted file
110  * pre-selects the vectors to be searched, but they are not otherwise
111  * encoded.
112  */
114  /** Inverted list of original vectors. Each list is a nl * d
115  * matrix, where nl is the nb of vectors stored in the list. */
116  std::vector < std::vector<float> > vecs;
117 
118  IndexIVFFlat (
119  Index * quantizer, size_t d, size_t nlist_,
120  MetricType = METRIC_INNER_PRODUCT);
121 
122  /** Return the typeName of the index (which includes main parameters */
123  virtual void set_typename () override;
124 
125 
126  /// same as add_with_ids, with precomputed coarse quantizer
127  void add_core (idx_t n, const float * x, const long *xids,
128  const long *precomputed_idx);
129 
130 
131  /// implemented for all IndexIVF* classes
132  virtual void add_with_ids (idx_t n, const float * x, const long *xids)
133  override;
134 
135  virtual void search (
136  idx_t n, const float *x, idx_t k,
137  float *distances, idx_t *labels) const override;
138 
139  virtual void range_search (
140  idx_t n, const float *x, float radius,
141  RangeSearchResult *result) const override;
142 
143  /** copy a subset of the entries index to the other index
144  *
145  * if subset_type == 0: copies ids in [a1, a2)
146  * if subset_type == 1: copies ids if id % a1 == a2
147  */
148  void copy_subset_to (IndexIVFFlat & other, int subset_type,
149  long a1, long a2) const;
150 
151 
152  virtual void reset() override;
153 
154  virtual long remove_ids (const IDSelector & sel) override;
155 
156  /// Implementation of the search for the inner product metric
158  size_t nx, const float * x,
159  const long * keys,
160  float_minheap_array_t * res) const;
161 
162  /// Implementation of the search for the L2 metric
163  void search_knn_L2sqr (
164  size_t nx, const float * x,
165  const long * keys,
166  float_maxheap_array_t * res) const;
167 
168  virtual void reconstruct (idx_t key, float * recons) const override;
169 
170  virtual void merge_from_residuals (IndexIVF &other) override;
171 
172 
173  IndexIVFFlat () {}
174 };
175 
176 
177 
178 
179 
180 
181 } // namespace faiss
182 
183 
184 
185 
186 
187 #endif
void search_knn_L2sqr(size_t nx, const float *x, const long *keys, float_maxheap_array_t *res) const
Implementation of the search for the L2 metric.
Definition: IndexIVF.cpp:319
double imbalance_factor() const
1= perfectly balanced, &gt;1: imbalanced
Definition: IndexIVF.cpp:134
size_t nprobe
number of probes at query time
Definition: IndexIVF.h:48
virtual void reconstruct(idx_t key, float *recons) const override
Definition: IndexIVF.cpp:508
bool quantizer_trains_alone
just pass over the trainset to quantizer
Definition: IndexIVF.h:51
virtual void set_typename() override
Definition: IndexIVF.cpp:207
virtual void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const override
Definition: IndexIVF.cpp:385
void copy_subset_to(IndexIVFFlat &other, int subset_type, long a1, long a2) const
Definition: IndexIVF.cpp:441
virtual void merge_from_residuals(IndexIVF &other) override
Definition: IndexIVF.cpp:429
virtual void train_residual(idx_t n, const float *x)
Definition: IndexIVF.cpp:125
virtual long remove_ids(const IDSelector &sel) override
Definition: IndexIVF.cpp:476
std::vector< std::vector< long > > ids
Inverted lists for indexes.
Definition: IndexIVF.h:56
int d
vector dimension
Definition: Index.h:66
Index * quantizer
quantizer that maps vectors to inverted lists
Definition: IndexIVF.h:50
virtual void train(idx_t n, const float *x) override
Trains the quantizer and calls train_residual to train sub-quantizers.
Definition: IndexIVF.cpp:96
ClusteringParameters cp
to override default clustering params
Definition: IndexIVF.h:54
virtual void add_with_ids(idx_t n, const float *x, const long *xids) override
implemented for all IndexIVF* classes
Definition: IndexIVF.cpp:224
bool own_fields
whether object owns the quantizer
Definition: IndexIVF.h:52
long idx_t
all indices are this type
Definition: Index.h:64
virtual void reset() override
removes all elements from the database.
Definition: IndexIVF.cpp:468
void make_direct_map()
intialize a direct map
Definition: IndexIVF.cpp:69
virtual void reset() override
removes all elements from the database.
Definition: IndexIVF.cpp:87
virtual void merge_from_residuals(IndexIVF &other)=0
void print_stats() const
display some stats about the inverted lists
Definition: IndexIVF.cpp:143
size_t nlist
number of possible key values
Definition: IndexIVF.h:47
virtual void add(idx_t n, const float *x) override
Quantizes x and calls add_with_key.
Definition: IndexIVF.cpp:64
void search_knn_inner_product(size_t nx, const float *x, const long *keys, float_minheap_array_t *res) const
Implementation of the search for the inner product metric.
Definition: IndexIVF.cpp:273
bool maintain_direct_map
map for direct access to the elements. Enables reconstruct().
Definition: IndexIVF.h:59
virtual void merge_from(IndexIVF &other, idx_t add_id)
Definition: IndexIVF.cpp:163
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:44
std::vector< std::vector< float > > vecs
Definition: IndexIVF.h:116
void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx)
same as add_with_ids, with precomputed coarse quantizer
Definition: IndexIVF.cpp:229
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
Definition: IndexIVF.cpp:364