21 #include "FaissAssert.h"
22 #include "IndexFlat.h"
23 #include "AuxIndexStructures.h"
32 IndexIVF::IndexIVF (
Index * quantizer,
size_t d,
size_t nlist,
37 quantizer (quantizer),
38 quantizer_trains_alone (false),
41 maintain_direct_map (false)
43 FAISS_ASSERT (d == quantizer->
d);
57 IndexIVF::IndexIVF ():
58 nlist (0), nprobe (1), quantizer (nullptr),
59 quantizer_trains_alone (false), own_fields (false),
60 maintain_direct_map (false)
73 direct_map.resize (
ntotal, -1);
74 for (
size_t key = 0; key <
nlist; key++) {
75 const std::vector<long> & idlist =
ids[key];
77 for (
long ofs = 0; ofs < idlist.size(); ofs++) {
78 direct_map [idlist [ofs]] =
91 for (
size_t i = 0; i <
ids.size(); i++)
100 printf (
"IVF quantizer does not need training.\n");
103 printf (
"IVF quantizer trains alone...\n");
106 !
"nlist not consistent with quantizer size");
109 printf (
"Training IVF quantizer on %ld vectors in %dD\n",
119 printf (
"Training IVF residual\n");
128 printf (
"IndexIVF: no residual training\n");
136 std::vector<int> hist (
nlist);
137 for (
int i = 0; i <
nlist; i++) {
138 hist[i] =
ids[i].size();
145 std::vector<int> sizes(40);
146 for (
int i = 0; i <
nlist; i++) {
147 for (
int j = 0; j < sizes.size(); j++) {
148 if ((
ids[i].size() >> j) == 0) {
154 for (
int i = 0; i < sizes.size(); i++) {
156 printf (
"list size in < %d: %d instances\n",
166 FAISS_ASSERT (other.
d ==
d);
169 !
"direct map copy not implemented");
170 FAISS_ASSERT (
typeid (*
this) ==
typeid (other) ||
171 !
"can only merge indexes of the same type");
172 for (
long i = 0; i <
nlist; i++) {
173 std::vector<idx_t> & src = other.
ids[i];
174 std::vector<idx_t> & dest =
ids[i];
175 for (
long j = 0; j < src.size(); j++)
176 dest.push_back (src[j] + add_id);
187 IndexIVF::~IndexIVF()
198 IndexIVFFlat::IndexIVFFlat (Index * quantizer,
200 IndexIVF (quantizer, d, nlist, metric)
215 s <<
"[" << nlist <<
":" << quantizer->index_typename <<
"]";
216 index_typename = s.str();
230 const long *precomputed_idx)
236 if (precomputed_idx) {
237 idx = precomputed_idx;
239 long * idx0 =
new long [n];
240 quantizer->assign (n, x, idx0);
244 for (
size_t i = 0; i < n; i++) {
245 long id = xids ? xids[i] :
ntotal + i;
246 long list_no = idx [i];
249 FAISS_ASSERT (list_no < nlist);
251 ids[list_no].push_back (
id);
252 const float *xi = x + i *
d;
254 for (
size_t j = 0 ; j <
d ; j++)
255 vecs[list_no].push_back (xi [j]);
258 direct_map.push_back (list_no << 32 | (
ids[list_no].size() - 1));
262 printf(
"IndexIVFFlat::add_core: added %ld / %ld vectors\n",
265 if (!precomputed_idx)
276 const long * __restrict keys,
280 const size_t k = res->
k;
282 #pragma omp parallel for
283 for (
size_t i = 0; i < nx; i++) {
284 const float * xi = x + i *
d;
285 const long * keysi = keys + i *
nprobe;
286 float * __restrict simi = res->
get_val (i);
287 long * __restrict idxi = res->
get_ids (i);
288 minheap_heapify (k, simi, idxi);
290 for (
size_t ik = 0; ik <
nprobe; ik++) {
291 long key = keysi[ik];
296 if (key >= (
long) nlist) {
297 fprintf (stderr,
"Invalid key=%ld at ik=%ld nlist=%ld\n",
302 const size_t list_size =
ids[key].size();
303 const float * list_vecs =
vecs[key].data();
305 for (
size_t j = 0; j < list_size; j++) {
306 const float * yj = list_vecs + d * j;
307 float ip = fvec_inner_product (xi, yj, d);
309 minheap_pop (k, simi, idxi);
310 minheap_push (k, simi, idxi, ip,
ids[key][j]);
314 minheap_reorder (k, simi, idxi);
322 const long * __restrict keys,
325 const size_t k = res->
k;
327 #pragma omp parallel for
328 for (
size_t i = 0; i < nx; i++) {
329 const float * xi = x + i *
d;
330 const long * keysi = keys + i *
nprobe;
331 float * __restrict disi = res->
get_val (i);
332 long * __restrict idxi = res->
get_ids (i);
333 maxheap_heapify (k, disi, idxi);
335 for (
size_t ik = 0; ik <
nprobe; ik++) {
336 long key = keysi[ik];
341 if (key >= (
long) nlist) {
342 fprintf (stderr,
"Invalid key=%ld at ik=%ld nlist=%ld\n",
347 const size_t list_size =
ids[key].size();
348 const float * list_vecs =
vecs[key].data();
350 for (
size_t j = 0; j < list_size; j++) {
351 const float * yj = list_vecs + d * j;
353 if (disij < disi[0]) {
354 maxheap_pop (k, disi, idxi);
355 maxheap_push (k, disi, idxi, disij,
ids[key][j]);
359 maxheap_reorder (k, disi, idxi);
365 float *distances,
idx_t *labels)
const
368 quantizer->assign (n, x, idx,
nprobe);
372 size_t(n), size_t(k), labels, distances};
377 size_t(n), size_t(k), labels, distances};
389 quantizer->assign (nx, x, keys,
nprobe);
391 assert (
metric_type == METRIC_L2 || !
"Only L2 implemented");
396 for (
size_t i = 0; i < nx; i++) {
397 const float * xi = x + i *
d;
398 const long * keysi = keys + i *
nprobe;
403 for (
size_t ik = 0; ik <
nprobe; ik++) {
404 long key = keysi[ik];
405 if (key < 0 || key >= (
long) nlist) {
406 fprintf (stderr,
"Invalid key=%ld at ik=%ld nlist=%ld\n",
411 const size_t list_size =
ids[key].size();
412 const float * list_vecs =
vecs[key].data();
414 for (
size_t j = 0; j < list_size; j++) {
415 const float * yj = list_vecs + d * j;
417 if (disij < radius) {
418 qres.add (disij,
ids[key][j]);
432 for (
int i = 0; i <
nlist; i++) {
433 std::vector<float> & src = other.
vecs[i];
434 std::vector<float> & dest =
vecs[i];
435 for (
int j = 0; j < src.size(); j++)
436 dest.push_back (src[j]);
442 long a1,
long a2)
const
444 FAISS_ASSERT (nlist == other.
nlist);
447 for (
long list_no = 0; list_no <
nlist; list_no++) {
448 const std::vector<idx_t> & ids_in =
ids[list_no];
449 std::vector<idx_t> & ids_out = other.
ids[list_no];
450 const std::vector<float> & vecs_in =
vecs[list_no];
451 std::vector<float> & vecs_out = other.
vecs[list_no];
453 for (
long i = 0; i < ids_in.size(); i++) {
454 idx_t id = ids_in[i];
455 if (subset_type == 0 && a1 <=
id &&
id < a2) {
456 ids_out.push_back (
id);
457 vecs_out.insert (vecs_out.end(),
458 vecs_in.begin() + i *
d,
459 vecs_in.begin() + (i + 1) * d);
471 for (
size_t key = 0; key <
nlist; key++) {
479 !
"direct map remove not implemented");
481 #pragma omp parallel for reduction(+: nremove)
482 for (
long i = 0; i <
nlist; i++) {
483 std::vector<idx_t> & idsi =
ids[i];
484 float *vecsi =
vecs[i].data();
486 long l = idsi.size(), j = 0;
488 if (sel.is_member (idsi[j])) {
491 memmove (vecsi + j * d,
492 vecsi + l * d, d *
sizeof (
float));
497 if (l < idsi.size()) {
498 nremove += idsi.size() - l;
500 vecs[i].resize (l * d);
510 assert (direct_map.size() ==
ntotal);
511 int list_no = direct_map[key] >> 32;
512 int ofs = direct_map[key] & 0xffffffff;
513 memcpy (recons, &
vecs[list_no][ofs * d], d *
sizeof(recons[0]));
int niter
clustering iterations
result structure for a single query
float fvec_L2sqr(const float *x, const float *y, size_t d)
Squared L2 distance between two vectors.
void search_knn_L2sqr(size_t nx, const float *x, const long *keys, float_maxheap_array_t *res) const
Implementation of the search for the L2 metric.
T * get_val(size_t key)
Return the list of values for a heap.
double imbalance_factor() const
1= perfectly balanced, >1: imbalanced
virtual void reset()=0
removes all elements from the database.
size_t nprobe
number of probes at query time
virtual void reconstruct(idx_t key, float *recons) const override
bool quantizer_trains_alone
just pass over the trainset to quantizer
virtual void set_typename() override
virtual void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const override
void copy_subset_to(IndexIVFFlat &other, int subset_type, long a1, long a2) const
virtual void merge_from_residuals(IndexIVF &other) override
virtual void add_with_ids(idx_t n, const float *x, const long *xids)
virtual void train_residual(idx_t n, const float *x)
size_t k
allocated size per heap
double imbalance_factor(int n, int k, const long *assign)
a balanced assignment has a IF of 1
virtual long remove_ids(const IDSelector &sel) override
std::vector< std::vector< long > > ids
Inverted lists for indexes.
Index * quantizer
quantizer that maps vectors to inverted lists
virtual void train(idx_t n, const float *x) override
Trains the quantizer and calls train_residual to train sub-quantizers.
ClusteringParameters cp
to override default clustering params
virtual void add_with_ids(idx_t n, const float *x, const long *xids) override
implemented for all IndexIVF* classes
bool own_fields
whether object owns the quantizer
long idx_t
all indices are this type
virtual void reset() override
removes all elements from the database.
void make_direct_map()
intialize a direct map
idx_t ntotal
total nb of indexed vectors
bool verbose
verbosity level
virtual void reset() override
removes all elements from the database.
QueryResult & new_result(idx_t qno)
begin a new result
the entries in the buffers are split per query
virtual void merge_from_residuals(IndexIVF &other)=0
TI * get_ids(size_t key)
Correspponding identifiers.
MetricType metric_type
type of metric this index uses for search
void print_stats() const
display some stats about the inverted lists
size_t nlist
number of possible key values
virtual void add(idx_t n, const float *x) override
Quantizes x and calls add_with_key.
virtual void train(idx_t n, const float *x, faiss::Index &index)
Index is used during the assignment stage.
bool is_trained
set if the Index does not require training, or if training is done already
void search_knn_inner_product(size_t nx, const float *x, const long *keys, float_minheap_array_t *res) const
Implementation of the search for the inner product metric.
virtual void train(idx_t n, const float *x)
bool maintain_direct_map
map for direct access to the elements. Enables reconstruct().
bool spherical
do we want normalized centroids?
virtual void merge_from(IndexIVF &other, idx_t add_id)
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
std::vector< std::vector< float > > vecs
void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx)
same as add_with_ids, with precomputed coarse quantizer
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override