11 #include "MetaIndexes.h"
17 #include "FaissAssert.h"
19 #include "AuxIndexStructures.h"
28 IndexIDMap::IndexIDMap (Index *index):
32 FAISS_THROW_IF_NOT_MSG (index->ntotal == 0,
"index must be empty on input");
33 is_trained = index->is_trained;
34 metric_type = index->metric_type;
35 verbose = index->verbose;
41 FAISS_THROW_MSG (
"add does not make sense with IndexIDMap, "
49 is_trained = index->is_trained;
63 for (
idx_t i = 0; i < n; i++)
64 id_map.push_back (xids[i]);
65 ntotal = index->ntotal;
70 float *distances,
idx_t *labels)
const
72 index->search (n, x, k, distances, labels);
74 #pragma omp parallel for
75 for (
idx_t i = 0; i < n * k; i++) {
76 li[i] = li[i] < 0 ? li[i] : id_map[li[i]];
84 index->range_search(n, x, radius, result);
85 #pragma omp parallel for
86 for (
idx_t i = 0; i < result->
lims[result->
nq]; i++) {
95 const std::vector <long> & id_map;
97 IDTranslatedSelector (
const std::vector <long> & id_map,
99 id_map (id_map), sel (sel)
101 bool is_member(idx_t
id)
const override {
102 return sel.is_member(id_map[
id]);
111 IDTranslatedSelector sel2 (id_map, sel);
112 long nremove = index->remove_ids (sel2);
115 for (
idx_t i = 0; i < ntotal; i++) {
116 if (sel.is_member (id_map[i])) {
119 id_map[j] = id_map[i];
123 FAISS_ASSERT (j == index->ntotal);
125 id_map.resize(ntotal);
132 IndexIDMap::~IndexIDMap ()
134 if (own_fields)
delete index;
141 IndexIDMap2::IndexIDMap2 (Index *index): IndexIDMap (index)
146 size_t prev_ntotal =
ntotal;
148 for (
size_t i = prev_ntotal; i <
ntotal; i++) {
156 for (
size_t i = 0; i <
ntotal; i++) {
174 }
catch (
const std::out_of_range& e) {
175 FAISS_THROW_FMT (
"key %ld not found", key);
199 explicit Thread (
const Job & job): job(job) {}
202 pthread_create (&thread,
nullptr, run,
this);
206 pthread_join (thread,
nullptr);
209 static void * run (
void *arg) {
210 static_cast<Thread*
> (arg)->job.run();
227 printf (
"begin train shard %d on %ld points\n", no, n);
228 index->shard_indexes [no]->train(n, x);
230 printf (
"end train shard %d\n", no);
246 printf (
"begin add shard %d on %ld points\n", no, n);
248 index->shard_indexes[no]->add_with_ids (n, x, ids);
250 index->shard_indexes[no]->add (n, x);
252 printf (
"end add shard %d on %ld points\n", no, n);
260 const IndexShards *index;
274 printf (
"begin query shard %d on %ld points\n", no, n);
275 index->shard_indexes [no]->search (n, x, k,
278 printf (
"end query shard %d\n", no);
288 void translate_labels (
long n, idx_t *labels,
long translation)
290 if (translation == 0)
return;
291 for (
long i = 0; i < n; i++) {
292 if(labels[i] < 0)
return;
293 labels[i] += translation;
305 void merge_tables (
long n,
long k,
long nshard,
306 float *distances, idx_t *labels,
307 const float *all_distances,
309 const long *translations)
318 std::vector<int> buf (2 * nshard);
319 int * pointer = buf.data();
320 int * shard_ids = pointer + nshard;
321 std::vector<float> buf2 (nshard);
322 float * heap_vals = buf2.data();
324 for (
long i = 0; i < n; i++) {
327 const float *D_in = all_distances + i * k;
328 const idx_t *I_in = all_labels + i * k;
331 for (
long s = 0; s < nshard; s++) {
333 if (I_in[stride * s] >= 0)
334 heap_push<C> (++heap_size, heap_vals, shard_ids,
335 D_in[stride * s], s);
338 float *D = distances + i * k;
339 idx_t *I = labels + i * k;
341 for (
int j = 0; j < k; j++) {
342 if (heap_size == 0) {
347 int s = shard_ids[0];
348 int & p = pointer[s];
350 I[j] = I_in[stride * s + p] + translations[s];
352 heap_pop<C> (heap_size--, heap_vals, shard_ids);
354 if (p < k && I_in[stride * s + p] >= 0)
355 heap_push<C> (++heap_size, heap_vals, shard_ids,
356 D_in[stride * s + p], s);
370 Index (d), own_fields (false),
371 threaded (threaded), successive_ids (successive_ids)
376 void IndexShards::add_shard (
Index *idx)
378 shard_indexes.push_back (idx);
379 sync_with_shard_indexes ();
382 void IndexShards::sync_with_shard_indexes ()
384 if (shard_indexes.empty())
return;
385 Index * index0 = shard_indexes[0];
390 for (
int i = 1; i < shard_indexes.size(); i++) {
391 Index * index = shard_indexes[i];
392 FAISS_THROW_IF_NOT (
metric_type == index->metric_type);
393 FAISS_THROW_IF_NOT (
d == index->d);
403 std::vector<Thread<TrainJob > > tss (shard_indexes.size());
405 for (
int i = 0; i < shard_indexes.size(); i++) {
407 TrainJob ts = {
this, i, n, x};
409 tss[nt] = Thread<TrainJob> (ts);
416 for (
int i = 0; i < nt; i++) {
419 sync_with_shard_indexes ();
431 FAISS_THROW_IF_NOT_MSG(!(successive_ids && xids),
432 "It makes no sense to pass in ids and "
433 "request them to be shifted");
435 if (successive_ids) {
436 FAISS_THROW_IF_NOT_MSG(!xids,
437 "It makes no sense to pass in ids and "
438 "request them to be shifted");
439 FAISS_THROW_IF_NOT_MSG(
ntotal == 0,
440 "when adding to IndexShards with sucessive_ids, "
441 "only add() in a single pass is supported");
444 long nshard = shard_indexes.size();
445 const long *ids = xids;
447 if (!ids && !successive_ids) {
448 long *aids =
new long[n];
449 for (
long i = 0; i < n; i++)
455 std::vector<Thread<AddJob > > asa (shard_indexes.size());
457 for (
int i = 0; i < nshard; i++) {
458 long i0 = i * n / nshard;
459 long i1 = (i + 1) * n / nshard;
461 AddJob as = {
this, i,
463 ids ? ids + i0 :
nullptr};
465 asa[nt] = Thread<AddJob>(as);
471 for (
int i = 0; i < nt; i++) {
483 for (
int i = 0; i < shard_indexes.size(); i++) {
484 shard_indexes[i]->reset ();
486 sync_with_shard_indexes ();
490 idx_t n,
const float *x, idx_t k,
491 float *distances, idx_t *labels)
const
493 long nshard = shard_indexes.size();
494 float *all_distances =
new float [nshard * k * n];
495 idx_t *all_labels =
new idx_t [nshard * k * n];
502 std::vector<Thread<QueryJob> > qss (nshard);
503 for (
int i = 0; i < nshard; i++) {
506 all_distances + i * k * n,
507 all_labels + i * k * n
510 qss[i] = Thread<QueryJob> (qs);
518 for (
int i = 0; i < qss.size(); i++) {
525 std::vector<QueryJob> qss (nshard);
526 for (
int i = 0; i < nshard; i++) {
529 all_distances + i * k * n,
530 all_labels + i * k * n
540 #pragma omp parallel for
541 for (
int i = 0; i < qss.size(); i++) {
547 std::vector<long> translations (nshard, 0);
548 if (successive_ids) {
550 for (
int s = 0; s + 1 < nshard; s++)
551 translations [s + 1] = translations [s] +
552 shard_indexes [s]->
ntotal;
556 merge_tables< CMin<float, int> > (
557 n, k, nshard, distances, labels,
558 all_distances, all_labels, translations.data ());
560 merge_tables< CMax<float, int> > (
561 n, k, nshard, distances, labels,
562 all_distances, all_labels, translations.data ());
569 IndexShards::~IndexShards ()
572 for (
int s = 0; s < shard_indexes.size(); s++)
573 delete shard_indexes [s];
584 Index (d), own_fields (false),
585 threaded (threaded), sum_d (0)
590 void IndexSplitVectors::add_sub_index (
Index *index)
592 sub_indexes.push_back (index);
593 sync_with_sub_indexes ();
596 void IndexSplitVectors::sync_with_sub_indexes ()
598 if (sub_indexes.empty())
return;
599 Index * index0 = sub_indexes[0];
604 for (
int i = 1; i < sub_indexes.size(); i++) {
605 Index * index = sub_indexes[i];
606 FAISS_THROW_IF_NOT (
metric_type == index->metric_type);
607 FAISS_THROW_IF_NOT (
ntotal == index->ntotal);
614 FAISS_THROW_MSG(
"not implemented");
620 struct SplitQueryJob {
635 printf (
"begin query shard %d on %ld points\n", no, n);
636 const Index * sub_index = index->sub_indexes[no];
637 long sub_d = sub_index->
d, d = index->
d;
639 for (
int i = 0; i < no; i++) ofs += index->sub_indexes[i]->
d;
640 float *sub_x =
new float [sub_d * n];
642 for (idx_t i = 0; i < n; i++)
643 memcpy (sub_x + i * sub_d, x + ofs + i * d, sub_d *
sizeof (sub_x));
644 sub_index->
search (n, sub_x, k, distances, labels);
646 printf (
"end query shard %d\n", no);
658 idx_t n,
const float *x, idx_t k,
659 float *distances, idx_t *labels)
const
661 FAISS_THROW_IF_NOT_MSG (k == 1,
662 "search implemented only for k=1");
663 FAISS_THROW_IF_NOT_MSG (sum_d == d,
664 "not enough indexes compared to # dimensions");
666 long nshard = sub_indexes.size();
667 float *all_distances =
new float [nshard * k * n];
668 idx_t *all_labels =
new idx_t [nshard * k * n];
673 std::vector<Thread<SplitQueryJob> > qss (nshard);
674 for (
int i = 0; i < nshard; i++) {
677 i == 0 ? distances : all_distances + i * k * n,
678 i == 0 ? labels : all_labels + i * k * n
681 qss[i] = Thread<SplitQueryJob> (qs);
689 for (
int i = 0; i < qss.size(); i++) {
695 for (
int i = 0; i < nshard; i++) {
697 const float *distances_i = all_distances + i * k * n;
698 const idx_t *labels_i = all_labels + i * k * n;
699 for (
long j = 0; j < n; j++) {
700 if (labels[j] >= 0 && labels_i[j] >= 0) {
701 labels[j] += labels_i[j] * factor;
702 distances[j] += distances_i[j];
705 distances[j] = 0.0 / 0.0;
709 factor *= sub_indexes[i]->ntotal;
715 FAISS_THROW_MSG(
"not implemented");
720 FAISS_THROW_MSG (
"not implemented");
724 IndexSplitVectors::~IndexSplitVectors ()
727 for (
int s = 0; s < sub_indexes.size(); s++)
728 delete sub_indexes [s];
void train(idx_t n, const float *x) override
IndexShards(idx_t d, bool threaded=false, bool successive_ids=true)
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void add_with_ids(idx_t n, const float *x, const long *xids) override
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void add_with_ids(idx_t n, const float *x, const long *xids) override
void reset() override
removes all elements from the database.
void add(idx_t n, const float *x) override
this will fail. Use add_with_ids
std::vector< long > id_map
! whether pointers are deleted in destructo
void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const override
void train(idx_t n, const float *x) override
void add(idx_t n, const float *x) override
supported only for sub-indices that implement add_with_ids
long idx_t
all indices are this type
idx_t ntotal
total nb of indexed vectors
void construct_rev_map()
make the rev_map from scratch
bool verbose
verbosity level
void add(idx_t n, const float *x) override
long remove_ids(const IDSelector &sel) override
remove ids adapted to IndexFlat
bool threaded
should the sub-indexes be deleted along with this?
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
void reset() override
removes all elements from the database.
void reset() override
removes all elements from the database.
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
long remove_ids(const IDSelector &sel) override
remove ids adapted to IndexFlat
MetricType metric_type
type of metric this index uses for search
void reconstruct(idx_t key, float *recons) const override
void train(idx_t n, const float *x) override
size_t * lims
size (nq + 1)
bool is_trained
set if the Index does not require training, or if training is done already
virtual void reconstruct(idx_t key, float *recons) const
void add_with_ids(idx_t n, const float *x, const long *xids) override
idx_t * labels
result for query i is labels[lims[i]:lims[i+1]]
IndexSplitVectors(idx_t d, bool threaded=false)
sum of dimensions seen so far