13 #include "IndexIVFFlat.h"
19 #include "FaissAssert.h"
20 #include "IndexFlat.h"
21 #include "AuxIndexStructures.h"
30 IndexIVFFlat::IndexIVFFlat (Index * quantizer,
32 IndexIVF (quantizer, d, nlist, sizeof(float) * d, metric)
34 code_size =
sizeof(float) * d;
46 const long *precomputed_idx)
52 "cannot have direct map and add with ids");
56 if (precomputed_idx) {
57 idx = precomputed_idx;
59 long * idx0 =
new long [n];
65 for (
size_t i = 0; i < n; i++) {
66 long id = xids ? xids[i] :
ntotal + i;
67 long list_no = idx [i];
71 const float *xi = x + i *
d;
73 list_no,
id, (
const uint8_t*) xi);
76 direct_map.push_back (list_no << 32 | offset);
80 printf(
"IndexIVFFlat::add_core: added %ld / %ld vectors\n",
97 const size_t k = res->
k;
98 size_t nlistv = 0, ndis = 0;
101 #pragma omp parallel for reduction(+: nlistv, ndis)
102 for (
size_t i = 0; i < nx; i++) {
103 const float * xi = x + i * d;
104 const long * keysi = keys + i * ivf.
nprobe;
105 float * __restrict simi = res->
get_val (i);
106 long * __restrict idxi = res->
get_ids (i);
107 minheap_heapify (k, simi, idxi);
110 for (
size_t ik = 0; ik < ivf.
nprobe; ik++) {
111 long key = keysi[ik];
116 FAISS_THROW_IF_NOT_FMT (
117 key < (
long) ivf.
nlist,
118 "Invalid key=%ld at ik=%ld nlist=%ld\n",
123 const float * list_vecs =
128 for (
size_t j = 0; j < list_size; j++) {
129 const float * yj = list_vecs + d * j;
130 float ip = fvec_inner_product (xi, yj, d);
132 minheap_pop (k, simi, idxi);
133 long id = store_pairs ? (key << 32 | j) : ids[j];
134 minheap_push (k, simi, idxi, ip,
id);
142 minheap_reorder (k, simi, idxi);
144 indexIVF_stats.nq += nx;
145 indexIVF_stats.nlist += nlistv;
146 indexIVF_stats.ndis += ndis;
150 void search_knn_L2sqr (
const IndexIVFFlat &ivf,
154 float_maxheap_array_t * res,
157 const size_t k = res->k;
158 size_t nlistv = 0, ndis = 0;
160 #pragma omp parallel for reduction(+: nlistv, ndis)
161 for (
size_t i = 0; i < nx; i++) {
162 const float * xi = x + i * d;
163 const long * keysi = keys + i * ivf.nprobe;
164 float * __restrict disi = res->get_val (i);
165 long * __restrict idxi = res->get_ids (i);
166 maxheap_heapify (k, disi, idxi);
170 for (
size_t ik = 0; ik < ivf.nprobe; ik++) {
171 long key = keysi[ik];
176 FAISS_THROW_IF_NOT_FMT (
177 key < (
long) ivf.nlist,
178 "Invalid key=%ld at ik=%ld nlist=%ld\n",
182 size_t list_size = ivf.invlists->list_size(key);
183 const float * list_vecs =
184 (
const float*)ivf.invlists->get_codes (key);
186 ivf.invlists->get_ids (key);
188 for (
size_t j = 0; j < list_size; j++) {
189 const float * yj = list_vecs + d * j;
191 if (disij < disi[0]) {
192 maxheap_pop (k, disi, idxi);
193 long id = store_pairs ? (key << 32 | j) : ids[j];
194 maxheap_push (k, disi, idxi, disij,
id);
198 if (ivf.max_codes && nscan >= ivf.max_codes)
202 maxheap_reorder (k, disi, idxi);
204 indexIVF_stats.nq += nx;
205 indexIVF_stats.nlist += nlistv;
206 indexIVF_stats.ndis += ndis;
215 float *distances,
idx_t *labels,
216 bool store_pairs)
const
220 size_t(n), size_t(k), labels, distances};
221 search_knn_inner_product (*
this, n, x, idx, &res, store_pairs);
225 size_t(n), size_t(k), labels, distances};
226 search_knn_L2sqr (*
this, n, x, idx, &res, store_pairs);
242 for (
size_t i = 0; i < nx; i++) {
243 const float * xi = x + i *
d;
244 const long * keysi = keys + i *
nprobe;
249 for (
size_t ik = 0; ik <
nprobe; ik++) {
250 long key = keysi[ik];
251 if (key < 0 || key >= (
long)
nlist) {
252 fprintf (stderr,
"Invalid key=%ld at ik=%ld nlist=%ld\n",
258 const float * list_vecs =
262 for (
size_t j = 0; j < list_size; j++) {
263 const float * yj = list_vecs + d * j;
266 if (disij < radius) {
267 qres.add (disij, ids[j]);
270 float disij = fvec_inner_product(xi, yj, d);
271 if (disij > radius) {
272 qres.add (disij, ids[j]);
288 std::vector<idx_t>
assign (n);
291 for (
size_t i = 0; i < n; i++) {
292 idx_t id = new_ids[i];
293 FAISS_THROW_IF_NOT_MSG (0 <=
id &&
id <
ntotal,
294 "id to update out of range");
296 long dm = direct_map[id];
297 long ofs = dm & 0xffffffff;
302 direct_map[id2] = (il << 32) | ofs;
303 invlists->update_entry (il, ofs, id2,
311 long dm = (il << 32) | l;
result structure for a single query
float fvec_L2sqr(const float *x, const float *y, size_t d)
Squared L2 distance between two vectors.
virtual const idx_t * get_ids(size_t list_no) const =0
T * get_val(size_t key)
Return the list of values for a heap.
size_t nprobe
number of probes at query time
void assign(idx_t n, const float *x, idx_t *labels, idx_t k=1)
virtual size_t list_size(size_t list_no) const =0
get the size of a list
void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const override
size_t k
allocated size per heap
virtual idx_t get_single_id(size_t list_no, size_t offset) const
virtual const uint8_t * get_single_code(size_t list_no, size_t offset) const
void add_with_ids(idx_t n, const float *x, const long *xids) override
implemented for all IndexIVF* classes
virtual size_t add_entry(size_t list_no, idx_t theid, const uint8_t *code)
add one entry to an inverted list
long idx_t
all indices are this type
idx_t ntotal
total nb of indexed vectors
bool verbose
verbosity level
void reconstruct_from_offset(long list_no, long offset, float *recons) const override
QueryResult & new_result(idx_t qno)
begin a new result
void update_vectors(int nv, idx_t *idx, const float *v)
the entries in the buffers are split per query
TI * get_ids(size_t key)
Correspponding identifiers.
MetricType metric_type
type of metric this index uses for search
InvertedLists * invlists
Acess to the actual data.
virtual const uint8_t * get_codes(size_t list_no) const =0
Index * quantizer
quantizer that maps vectors to inverted lists
bool is_trained
set if the Index does not require training, or if training is done already
size_t max_codes
max nb of codes to visit to do a query
bool maintain_direct_map
map for direct access to the elements. Enables reconstruct().
void search_preassigned(idx_t n, const float *x, idx_t k, const idx_t *assign, const float *centroid_dis, float *distances, idx_t *labels, bool store_pairs) const override
size_t nlist
number of possible key values
size_t code_size
code size per vector in bytes
MetricType
Some algorithms support both an inner product version and a L2 search version.
virtual void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx)
same as add_with_ids, with precomputed coarse quantizer