Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
IndexIVF_c.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 // -*- c -*-
11 
12 #ifndef FAISS_INDEX_IVF_C_H
13 #define FAISS_INDEX_IVF_C_H
14 
15 #include "faiss_c.h"
16 #include "Index_c.h"
17 #include "Clustering_c.h"
18 
19 #ifdef __cplusplus
20 extern "C" {
21 #endif
22 
23 /** Index based on a inverted file (IVF)
24  *
25  * In the inverted file, the quantizer (an Index instance) provides a
26  * quantization index for each vector to be added. The quantization
27  * index maps to a list (aka inverted list or posting list), where the
28  * id of the vector is then stored.
29  *
30  * At search time, the vector to be searched is also quantized, and
31  * only the list corresponding to the quantization index is
32  * searched. This speeds up the search by making it
33  * non-exhaustive. This can be relaxed using multi-probe search: a few
34  * (nprobe) quantization indices are selected and several inverted
35  * lists are visited.
36  *
37  * Sub-classes implement a post-filtering of the index that refines
38  * the distance estimation from the query to databse vectors.
39  */
40 FAISS_DECLARE_CLASS_INHERITED(IndexIVF, Index)
41 FAISS_DECLARE_DESTRUCTOR(IndexIVF)
42 FAISS_DECLARE_INDEX_DOWNCAST(IndexIVF)
43 
44 /// number of possible key values
45 FAISS_DECLARE_GETTER(IndexIVF, size_t, nlist)
46 /// number of probes at query time
47 FAISS_DECLARE_GETTER(IndexIVF, size_t, nprobe)
48 /// quantizer that maps vectors to inverted lists
49 FAISS_DECLARE_GETTER(IndexIVF, FaissIndex*, quantizer)
50 /**
51  * = 0: use the quantizer as index in a kmeans training
52  * = 1: just pass on the training set to the train() of the quantizer
53  * = 2: kmeans training on a flat index + add the centroids to the quantizer
54  */
55 FAISS_DECLARE_GETTER(IndexIVF, char, quantizer_trains_alone)
56 
57 /// whether object owns the quantizer
58 FAISS_DECLARE_GETTER(IndexIVF, int, own_fields)
59 
60 /** moves the entries from another dataset to self. On output,
61  * other is empty. add_id is added to all moved ids (for
62  * sequential ids, this would be this->ntotal */
63 int faiss_IndexIVF_merge_from(
64  FaissIndexIVF* index, FaissIndexIVF* other, idx_t add_id);
65 
66 /** copy a subset of the entries index to the other index
67  *
68  * if subset_type == 0: copies ids in [a1, a2)
69  * if subset_type == 1: copies ids if id % a1 == a2
70  * if subset_type == 2: copies inverted lists such that a1
71  * elements are left before and a2 elements are after
72  */
73 int faiss_IndexIVF_copy_subset_to(
74  const FaissIndexIVF* index, FaissIndexIVF* other, int subset_type, long a1,
75  long a2);
76 
77 /** search a set of vectors, that are pre-quantized by the IVF
78  * quantizer. Fill in the corresponding heaps with the query
79  * results. search() calls this.
80  *
81  * @param n nb of vectors to query
82  * @param x query vectors, size nx * d
83  * @param assign coarse quantization indices, size nx * nprobe
84  * @param centroid_dis
85  * distances to coarse centroids, size nx * nprobe
86  * @param distance
87  * output distances, size n * k
88  * @param labels output labels, size n * k
89  * @param store_pairs store inv list index + inv list offset
90  * instead in upper/lower 32 bit of result,
91  * instead of ids (used for reranking).
92  */
93 int faiss_IndexIVF_search_preassigned (const FaissIndexIVF* index,
94  idx_t n, const float *x, idx_t k, const idx_t *assign,
95  const float *centroid_dis, float *distances, idx_t *labels,
96  int store_pairs);
97 
98 size_t faiss_IndexIVF_get_list_size(const FaissIndexIVF* index,
99  size_t list_no);
100 
101 /** intialize a direct map
102  *
103  * @param new_maintain_direct_map if true, create a direct map,
104  * else clear it
105  */
106 int faiss_IndexIVF_make_direct_map(FaissIndexIVF* index,
107  int new_maintain_direct_map);
108 
109 /// 1= perfectly balanced, >1: imbalanced
110 double faiss_IndexIVF_imbalance_factor (const FaissIndexIVF* index);
111 
112 /// display some stats about the inverted lists
113 void faiss_IndexIVF_print_stats (const FaissIndexIVF* index);
114 
115 
116 typedef struct FaissIndexIVFStats {
117  size_t nq; // nb of queries run
118  size_t nlist; // nb of inverted lists scanned
119  size_t ndis; // nb of distancs computed
121 
122 void faiss_IndexIVFStats_reset(FaissIndexIVFStats* stats);
123 
124 inline void faiss_IndexIVFStats_init(FaissIndexIVFStats* stats) {
125  faiss_IndexIVFStats_reset(stats);
126 }
127 
128 /** Inverted file with stored vectors. Here the inverted file
129  * pre-selects the vectors to be searched, but they are not otherwise
130  * encoded, the code array just contains the raw float entries.
131  */
132 FAISS_DECLARE_CLASS(IndexIVFFlat)
133 FAISS_DECLARE_DESTRUCTOR(IndexIVFFlat)
134 
135 int faiss_IndexIVFFlat_new(FaissIndexIVFFlat** p_index);
136 
137 int faiss_IndexIVFFlat_new_with(FaissIndexIVFFlat** p_index,
138  FaissIndex* quantizer, size_t d, size_t nlist);
139 
140 int faiss_IndexIVFFlat_new_with_metric(
141  FaissIndexIVFFlat** p_index, FaissIndex* quantizer, size_t d, size_t nlist,
142  FaissMetricType metric);
143 
144 int faiss_IndexIVFFlat_add_core(FaissIndexIVFFlat* index, idx_t n,
145  const float * x, const long *xids, const long *precomputed_idx);
146 
147 /** Update a subset of vectors.
148  *
149  * The index must have a direct_map
150  *
151  * @param nv nb of vectors to update
152  * @param idx vector indices to update, size nv
153  * @param v vectors of new values, size nv*d
154  */
155 int faiss_IndexIVFFlat_update_vectors(FaissIndexIVFFlat* index, int nv,
156  idx_t *idx, const float *v);
157 
158 #ifdef __cplusplus
159 }
160 #endif
161 
162 
163 #endif