12 #include "MetaIndexes.h"
18 #include "FaissAssert.h"
20 #include "AuxIndexStructures.h"
29 IndexIDMap::IndexIDMap (Index *index):
33 FAISS_THROW_IF_NOT_MSG (index->ntotal == 0,
"index must be empty on input");
42 FAISS_THROW_MSG (
"add does not make sense with IndexIDMap, "
63 for (
idx_t i = 0; i < n; i++)
64 id_map.push_back (xids[i]);
70 float *distances,
idx_t *labels)
const
72 index->
search (n, x, k, distances, labels);
74 for (
idx_t i = 0; i < n * k; i++) {
75 li[i] = li[i] < 0 ? li[i] :
id_map[li[i]];
82 const std::vector <long> & id_map;
84 IDTranslatedSelector (
const std::vector <long> & id_map,
86 id_map (id_map), sel (sel)
88 bool is_member(idx_t
id)
const override {
89 return sel.is_member(id_map[
id]);
98 IDTranslatedSelector sel2 (
id_map, sel);
103 if (sel.is_member (
id_map[i])) {
110 FAISS_ASSERT (j == index->
ntotal);
118 IndexIDMap::~IndexIDMap ()
127 IndexIDMap2::IndexIDMap2 (Index *index): IndexIDMap (index)
132 size_t prev_ntotal =
ntotal;
134 for (
size_t i = prev_ntotal; i <
ntotal; i++) {
142 for (
size_t i = 0; i <
ntotal; i++) {
160 }
catch (
const std::out_of_range& e) {
161 FAISS_THROW_FMT (
"key %ld not found", key);
185 explicit Thread (
const Job & job): job(job) {}
188 pthread_create (&thread,
nullptr, run,
this);
192 pthread_join (thread,
nullptr);
195 static void * run (
void *arg) {
196 static_cast<Thread*
> (arg)->job.run();
213 printf (
"begin train shard %d on %ld points\n", no, n);
214 index->shard_indexes [no]->train(n, x);
216 printf (
"end train shard %d\n", no);
232 printf (
"begin add shard %d on %ld points\n", no, n);
234 index->shard_indexes[no]->add_with_ids (n, x, ids);
236 index->shard_indexes[no]->add (n, x);
238 printf (
"end add shard %d on %ld points\n", no, n);
246 const IndexShards *index;
260 printf (
"begin query shard %d on %ld points\n", no, n);
261 index->shard_indexes [no]->search (n, x, k,
264 printf (
"end query shard %d\n", no);
274 void translate_labels (
long n, idx_t *labels,
long translation)
276 if (translation == 0)
return;
277 for (
long i = 0; i < n; i++) {
278 if(labels[i] < 0)
return;
279 labels[i] += translation;
291 void merge_tables (
long n,
long k,
long nshard,
292 float *distances, idx_t *labels,
293 const float *all_distances,
295 const long *translations)
304 std::vector<int> buf (2 * nshard);
305 int * pointer = buf.data();
306 int * shard_ids = pointer + nshard;
307 std::vector<float> buf2 (nshard);
308 float * heap_vals = buf2.data();
310 for (
long i = 0; i < n; i++) {
313 const float *D_in = all_distances + i * k;
314 const idx_t *I_in = all_labels + i * k;
317 for (
long s = 0; s < nshard; s++) {
319 if (I_in[stride * s] >= 0)
320 heap_push<C> (++heap_size, heap_vals, shard_ids,
321 D_in[stride * s], s);
324 float *D = distances + i * k;
325 idx_t *I = labels + i * k;
327 for (
int j = 0; j < k; j++) {
328 if (heap_size == 0) {
333 int s = shard_ids[0];
334 int & p = pointer[s];
336 I[j] = I_in[stride * s + p] + translations[s];
338 heap_pop<C> (heap_size--, heap_vals, shard_ids);
340 if (p < k && I_in[stride * s + p] >= 0)
341 heap_push<C> (++heap_size, heap_vals, shard_ids,
342 D_in[stride * s + p], s);
356 Index (d), own_fields (false),
357 threaded (threaded), successive_ids (successive_ids)
362 void IndexShards::add_shard (
Index *idx)
364 shard_indexes.push_back (idx);
365 sync_with_shard_indexes ();
368 void IndexShards::sync_with_shard_indexes ()
370 if (shard_indexes.empty())
return;
371 Index * index0 = shard_indexes[0];
376 for (
int i = 1; i < shard_indexes.size(); i++) {
377 Index * index = shard_indexes[i];
378 FAISS_THROW_IF_NOT (
metric_type == index->metric_type);
379 FAISS_THROW_IF_NOT (
d == index->d);
389 std::vector<Thread<TrainJob > > tss (shard_indexes.size());
391 for (
int i = 0; i < shard_indexes.size(); i++) {
393 TrainJob ts = {
this, i, n, x};
395 tss[nt] = Thread<TrainJob> (ts);
402 for (
int i = 0; i < nt; i++) {
405 sync_with_shard_indexes ();
428 FAISS_THROW_IF_NOT_MSG(!(successive_ids && xids),
429 "It makes no sense to pass in ids and "
430 "request them to be shifted");
432 if (successive_ids) {
433 FAISS_THROW_IF_NOT_MSG(!xids,
434 "It makes no sense to pass in ids and "
435 "request them to be shifted");
436 FAISS_THROW_IF_NOT_MSG(
ntotal == 0,
437 "when adding to IndexShards with sucessive_ids, "
438 "only add() in a single pass is supported");
441 long nshard = shard_indexes.size();
442 const long *ids = xids;
444 if (!ids && !successive_ids) {
445 long *aids =
new long[n];
446 for (
long i = 0; i < n; i++)
452 std::vector<Thread<AddJob > > asa (shard_indexes.size());
454 for (
int i = 0; i < nshard; i++) {
455 long i0 = i * n / nshard;
456 long i1 = (i + 1) * n / nshard;
458 AddJob as = {
this, i,
460 ids ? ids + i0 :
nullptr};
462 asa[nt] = Thread<AddJob>(as);
468 for (
int i = 0; i < nt; i++) {
480 for (
int i = 0; i < shard_indexes.size(); i++) {
481 shard_indexes[i]->reset ();
483 sync_with_shard_indexes ();
487 idx_t n,
const float *x, idx_t k,
488 float *distances, idx_t *labels)
const
490 long nshard = shard_indexes.size();
491 float *all_distances =
new float [nshard * k * n];
492 idx_t *all_labels =
new idx_t [nshard * k * n];
499 std::vector<Thread<QueryJob> > qss (nshard);
500 for (
int i = 0; i < nshard; i++) {
503 all_distances + i * k * n,
504 all_labels + i * k * n
507 qss[i] = Thread<QueryJob> (qs);
515 for (
int i = 0; i < qss.size(); i++) {
522 std::vector<QueryJob> qss (nshard);
523 for (
int i = 0; i < nshard; i++) {
526 all_distances + i * k * n,
527 all_labels + i * k * n
537 #pragma omp parallel for
538 for (
int i = 0; i < qss.size(); i++) {
544 std::vector<long> translations (nshard, 0);
545 if (successive_ids) {
547 for (
int s = 0; s + 1 < nshard; s++)
548 translations [s + 1] = translations [s] +
549 shard_indexes [s]->
ntotal;
553 merge_tables< CMin<float, int> > (
554 n, k, nshard, distances, labels,
555 all_distances, all_labels, translations.data ());
557 merge_tables< CMax<float, int> > (
558 n, k, nshard, distances, labels,
559 all_distances, all_labels, translations.data ());
566 IndexShards::~IndexShards ()
569 for (
int s = 0; s < shard_indexes.size(); s++)
570 delete shard_indexes [s];
581 Index (d), own_fields (false),
582 threaded (threaded), sum_d (0)
587 void IndexSplitVectors::add_sub_index (
Index *index)
589 sub_indexes.push_back (index);
590 sync_with_sub_indexes ();
593 void IndexSplitVectors::sync_with_sub_indexes ()
595 if (sub_indexes.empty())
return;
596 Index * index0 = sub_indexes[0];
601 for (
int i = 1; i < sub_indexes.size(); i++) {
602 Index * index = sub_indexes[i];
603 FAISS_THROW_IF_NOT (
metric_type == index->metric_type);
604 FAISS_THROW_IF_NOT (
ntotal == index->ntotal);
611 FAISS_THROW_MSG(
"not implemented");
617 struct SplitQueryJob {
632 printf (
"begin query shard %d on %ld points\n", no, n);
633 const Index * sub_index = index->sub_indexes[no];
634 long sub_d = sub_index->
d, d = index->
d;
636 for (
int i = 0; i < no; i++) ofs += index->sub_indexes[i]->
d;
637 float *sub_x =
new float [sub_d * n];
639 for (idx_t i = 0; i < n; i++)
640 memcpy (sub_x + i * sub_d, x + ofs + i * d, sub_d *
sizeof (sub_x));
641 sub_index->
search (n, sub_x, k, distances, labels);
643 printf (
"end query shard %d\n", no);
655 idx_t n,
const float *x, idx_t k,
656 float *distances, idx_t *labels)
const
658 FAISS_THROW_IF_NOT_MSG (k == 1,
659 "search implemented only for k=1");
660 FAISS_THROW_IF_NOT_MSG (sum_d == d,
661 "not enough indexes compared to # dimensions");
663 long nshard = sub_indexes.size();
664 float *all_distances =
new float [nshard * k * n];
665 idx_t *all_labels =
new idx_t [nshard * k * n];
670 std::vector<Thread<SplitQueryJob> > qss (nshard);
671 for (
int i = 0; i < nshard; i++) {
674 i == 0 ? distances : all_distances + i * k * n,
675 i == 0 ? labels : all_labels + i * k * n
678 qss[i] = Thread<SplitQueryJob> (qs);
686 for (
int i = 0; i < qss.size(); i++) {
692 for (
int i = 0; i < nshard; i++) {
694 const float *distances_i = all_distances + i * k * n;
695 const idx_t *labels_i = all_labels + i * k * n;
696 for (
long j = 0; j < n; j++) {
697 if (labels[j] >= 0 && labels_i[j] >= 0) {
698 labels[j] += labels_i[j] * factor;
699 distances[j] += distances_i[j];
702 distances[j] = 0.0 / 0.0;
706 factor *= sub_indexes[i]->ntotal;
712 FAISS_THROW_MSG(
"not implemented");
717 FAISS_THROW_MSG (
"not implemented");
721 IndexSplitVectors::~IndexSplitVectors ()
724 for (
int s = 0; s < sub_indexes.size(); s++)
725 delete sub_indexes [s];
void train(idx_t n, const float *x) override
IndexShards(idx_t d, bool threaded=false, bool successive_ids=true)
virtual void reset()=0
removes all elements from the database.
virtual void train(idx_t, const float *)
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void add_with_ids(idx_t n, const float *x, const long *xids) override
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void add_with_ids(idx_t n, const float *x, const long *xids) override
void reset() override
removes all elements from the database.
void add(idx_t n, const float *x) override
this will fail. Use add_with_ids
std::vector< long > id_map
! whether pointers are deleted in destructo
virtual void add(idx_t n, const float *x)=0
void train(idx_t n, const float *x) override
void add(idx_t n, const float *x) override
supported only for sub-indices that implement add_with_ids
long idx_t
all indices are this type
idx_t ntotal
total nb of indexed vectors
void construct_rev_map()
make the rev_map from scratch
bool verbose
verbosity level
void add(idx_t n, const float *x) override
virtual long remove_ids(const IDSelector &sel)
long remove_ids(const IDSelector &sel) override
remove ids adapted to IndexFlat
bool threaded
should the sub-indexes be deleted along with this?
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
void reset() override
removes all elements from the database.
void reset() override
removes all elements from the database.
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
long remove_ids(const IDSelector &sel) override
remove ids adapted to IndexFlat
MetricType metric_type
type of metric this index uses for search
void reconstruct(idx_t key, float *recons) const override
void train(idx_t n, const float *x) override
bool is_trained
set if the Index does not require training, or if training is done already
virtual void reconstruct(idx_t key, float *recons) const
void add_with_ids(idx_t n, const float *x, const long *xids) override
IndexSplitVectors(idx_t d, bool threaded=false)
sum of dimensions seen so far
bool own_fields
! the sub-index