12 #include "MetaIndexes.h"
18 #include "FaissAssert.h"
20 #include "AuxIndexStructures.h"
29 IndexIDMap::IndexIDMap (Index *index):
33 FAISS_THROW_IF_NOT_MSG (index->ntotal == 0,
"index must be empty on input");
42 FAISS_THROW_MSG (
"add does not make sense with IndexIDMap, "
63 for (
idx_t i = 0; i < n; i++)
64 id_map.push_back (xids[i]);
70 float *distances,
idx_t *labels)
const
72 index->
search (n, x, k, distances, labels);
74 for (
idx_t i = 0; i < n * k; i++) {
75 li[i] = li[i] < 0 ? li[i] :
id_map[li[i]];
84 for (
idx_t i = 0; i < result->
lims[result->
nq]; i++) {
93 const std::vector <long> & id_map;
95 IDTranslatedSelector (
const std::vector <long> & id_map,
97 id_map (id_map), sel (sel)
99 bool is_member(idx_t
id)
const override {
100 return sel.is_member(id_map[
id]);
109 IDTranslatedSelector sel2 (
id_map, sel);
114 if (sel.is_member (
id_map[i])) {
121 FAISS_ASSERT (j == index->
ntotal);
130 IndexIDMap::~IndexIDMap ()
139 IndexIDMap2::IndexIDMap2 (Index *index): IndexIDMap (index)
144 size_t prev_ntotal =
ntotal;
146 for (
size_t i = prev_ntotal; i <
ntotal; i++) {
154 for (
size_t i = 0; i <
ntotal; i++) {
172 }
catch (
const std::out_of_range& e) {
173 FAISS_THROW_FMT (
"key %ld not found", key);
197 explicit Thread (
const Job & job): job(job) {}
200 pthread_create (&thread,
nullptr, run,
this);
204 pthread_join (thread,
nullptr);
207 static void * run (
void *arg) {
208 static_cast<Thread*
> (arg)->job.run();
225 printf (
"begin train shard %d on %ld points\n", no, n);
226 index->shard_indexes [no]->train(n, x);
228 printf (
"end train shard %d\n", no);
244 printf (
"begin add shard %d on %ld points\n", no, n);
246 index->shard_indexes[no]->add_with_ids (n, x, ids);
248 index->shard_indexes[no]->add (n, x);
250 printf (
"end add shard %d on %ld points\n", no, n);
258 const IndexShards *index;
272 printf (
"begin query shard %d on %ld points\n", no, n);
273 index->shard_indexes [no]->search (n, x, k,
276 printf (
"end query shard %d\n", no);
286 void translate_labels (
long n, idx_t *labels,
long translation)
288 if (translation == 0)
return;
289 for (
long i = 0; i < n; i++) {
290 if(labels[i] < 0)
return;
291 labels[i] += translation;
303 void merge_tables (
long n,
long k,
long nshard,
304 float *distances, idx_t *labels,
305 const float *all_distances,
307 const long *translations)
316 std::vector<int> buf (2 * nshard);
317 int * pointer = buf.data();
318 int * shard_ids = pointer + nshard;
319 std::vector<float> buf2 (nshard);
320 float * heap_vals = buf2.data();
322 for (
long i = 0; i < n; i++) {
325 const float *D_in = all_distances + i * k;
326 const idx_t *I_in = all_labels + i * k;
329 for (
long s = 0; s < nshard; s++) {
331 if (I_in[stride * s] >= 0)
332 heap_push<C> (++heap_size, heap_vals, shard_ids,
333 D_in[stride * s], s);
336 float *D = distances + i * k;
337 idx_t *I = labels + i * k;
339 for (
int j = 0; j < k; j++) {
340 if (heap_size == 0) {
345 int s = shard_ids[0];
346 int & p = pointer[s];
348 I[j] = I_in[stride * s + p] + translations[s];
350 heap_pop<C> (heap_size--, heap_vals, shard_ids);
352 if (p < k && I_in[stride * s + p] >= 0)
353 heap_push<C> (++heap_size, heap_vals, shard_ids,
354 D_in[stride * s + p], s);
368 Index (d), own_fields (false),
369 threaded (threaded), successive_ids (successive_ids)
374 void IndexShards::add_shard (
Index *idx)
376 shard_indexes.push_back (idx);
377 sync_with_shard_indexes ();
380 void IndexShards::sync_with_shard_indexes ()
382 if (shard_indexes.empty())
return;
383 Index * index0 = shard_indexes[0];
388 for (
int i = 1; i < shard_indexes.size(); i++) {
389 Index * index = shard_indexes[i];
390 FAISS_THROW_IF_NOT (
metric_type == index->metric_type);
391 FAISS_THROW_IF_NOT (
d == index->d);
401 std::vector<Thread<TrainJob > > tss (shard_indexes.size());
403 for (
int i = 0; i < shard_indexes.size(); i++) {
405 TrainJob ts = {
this, i, n, x};
407 tss[nt] = Thread<TrainJob> (ts);
414 for (
int i = 0; i < nt; i++) {
417 sync_with_shard_indexes ();
440 FAISS_THROW_IF_NOT_MSG(!(successive_ids && xids),
441 "It makes no sense to pass in ids and "
442 "request them to be shifted");
444 if (successive_ids) {
445 FAISS_THROW_IF_NOT_MSG(!xids,
446 "It makes no sense to pass in ids and "
447 "request them to be shifted");
448 FAISS_THROW_IF_NOT_MSG(
ntotal == 0,
449 "when adding to IndexShards with sucessive_ids, "
450 "only add() in a single pass is supported");
453 long nshard = shard_indexes.size();
454 const long *ids = xids;
456 if (!ids && !successive_ids) {
457 long *aids =
new long[n];
458 for (
long i = 0; i < n; i++)
464 std::vector<Thread<AddJob > > asa (shard_indexes.size());
466 for (
int i = 0; i < nshard; i++) {
467 long i0 = i * n / nshard;
468 long i1 = (i + 1) * n / nshard;
470 AddJob as = {
this, i,
472 ids ? ids + i0 :
nullptr};
474 asa[nt] = Thread<AddJob>(as);
480 for (
int i = 0; i < nt; i++) {
492 for (
int i = 0; i < shard_indexes.size(); i++) {
493 shard_indexes[i]->reset ();
495 sync_with_shard_indexes ();
499 idx_t n,
const float *x, idx_t k,
500 float *distances, idx_t *labels)
const
502 long nshard = shard_indexes.size();
503 float *all_distances =
new float [nshard * k * n];
504 idx_t *all_labels =
new idx_t [nshard * k * n];
511 std::vector<Thread<QueryJob> > qss (nshard);
512 for (
int i = 0; i < nshard; i++) {
515 all_distances + i * k * n,
516 all_labels + i * k * n
519 qss[i] = Thread<QueryJob> (qs);
527 for (
int i = 0; i < qss.size(); i++) {
534 std::vector<QueryJob> qss (nshard);
535 for (
int i = 0; i < nshard; i++) {
538 all_distances + i * k * n,
539 all_labels + i * k * n
549 #pragma omp parallel for
550 for (
int i = 0; i < qss.size(); i++) {
556 std::vector<long> translations (nshard, 0);
557 if (successive_ids) {
559 for (
int s = 0; s + 1 < nshard; s++)
560 translations [s + 1] = translations [s] +
561 shard_indexes [s]->
ntotal;
565 merge_tables< CMin<float, int> > (
566 n, k, nshard, distances, labels,
567 all_distances, all_labels, translations.data ());
569 merge_tables< CMax<float, int> > (
570 n, k, nshard, distances, labels,
571 all_distances, all_labels, translations.data ());
578 IndexShards::~IndexShards ()
581 for (
int s = 0; s < shard_indexes.size(); s++)
582 delete shard_indexes [s];
593 Index (d), own_fields (false),
594 threaded (threaded), sum_d (0)
599 void IndexSplitVectors::add_sub_index (
Index *index)
601 sub_indexes.push_back (index);
602 sync_with_sub_indexes ();
605 void IndexSplitVectors::sync_with_sub_indexes ()
607 if (sub_indexes.empty())
return;
608 Index * index0 = sub_indexes[0];
613 for (
int i = 1; i < sub_indexes.size(); i++) {
614 Index * index = sub_indexes[i];
615 FAISS_THROW_IF_NOT (
metric_type == index->metric_type);
616 FAISS_THROW_IF_NOT (
ntotal == index->ntotal);
623 FAISS_THROW_MSG(
"not implemented");
629 struct SplitQueryJob {
644 printf (
"begin query shard %d on %ld points\n", no, n);
645 const Index * sub_index = index->sub_indexes[no];
646 long sub_d = sub_index->
d, d = index->
d;
648 for (
int i = 0; i < no; i++) ofs += index->sub_indexes[i]->
d;
649 float *sub_x =
new float [sub_d * n];
651 for (idx_t i = 0; i < n; i++)
652 memcpy (sub_x + i * sub_d, x + ofs + i * d, sub_d *
sizeof (sub_x));
653 sub_index->
search (n, sub_x, k, distances, labels);
655 printf (
"end query shard %d\n", no);
667 idx_t n,
const float *x, idx_t k,
668 float *distances, idx_t *labels)
const
670 FAISS_THROW_IF_NOT_MSG (k == 1,
671 "search implemented only for k=1");
672 FAISS_THROW_IF_NOT_MSG (sum_d == d,
673 "not enough indexes compared to # dimensions");
675 long nshard = sub_indexes.size();
676 float *all_distances =
new float [nshard * k * n];
677 idx_t *all_labels =
new idx_t [nshard * k * n];
682 std::vector<Thread<SplitQueryJob> > qss (nshard);
683 for (
int i = 0; i < nshard; i++) {
686 i == 0 ? distances : all_distances + i * k * n,
687 i == 0 ? labels : all_labels + i * k * n
690 qss[i] = Thread<SplitQueryJob> (qs);
698 for (
int i = 0; i < qss.size(); i++) {
704 for (
int i = 0; i < nshard; i++) {
706 const float *distances_i = all_distances + i * k * n;
707 const idx_t *labels_i = all_labels + i * k * n;
708 for (
long j = 0; j < n; j++) {
709 if (labels[j] >= 0 && labels_i[j] >= 0) {
710 labels[j] += labels_i[j] * factor;
711 distances[j] += distances_i[j];
714 distances[j] = 0.0 / 0.0;
718 factor *= sub_indexes[i]->ntotal;
724 FAISS_THROW_MSG(
"not implemented");
729 FAISS_THROW_MSG (
"not implemented");
733 IndexSplitVectors::~IndexSplitVectors ()
736 for (
int s = 0; s < sub_indexes.size(); s++)
737 delete sub_indexes [s];
void train(idx_t n, const float *x) override
IndexShards(idx_t d, bool threaded=false, bool successive_ids=true)
virtual void reset()=0
removes all elements from the database.
virtual void train(idx_t, const float *)
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void add_with_ids(idx_t n, const float *x, const long *xids) override
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void add_with_ids(idx_t n, const float *x, const long *xids) override
void reset() override
removes all elements from the database.
void add(idx_t n, const float *x) override
this will fail. Use add_with_ids
std::vector< long > id_map
! whether pointers are deleted in destructo
void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const override
virtual void add(idx_t n, const float *x)=0
void train(idx_t n, const float *x) override
void add(idx_t n, const float *x) override
supported only for sub-indices that implement add_with_ids
long idx_t
all indices are this type
idx_t ntotal
total nb of indexed vectors
void construct_rev_map()
make the rev_map from scratch
bool verbose
verbosity level
void add(idx_t n, const float *x) override
virtual long remove_ids(const IDSelector &sel)
long remove_ids(const IDSelector &sel) override
remove ids adapted to IndexFlat
bool threaded
should the sub-indexes be deleted along with this?
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
void reset() override
removes all elements from the database.
void reset() override
removes all elements from the database.
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
long remove_ids(const IDSelector &sel) override
remove ids adapted to IndexFlat
MetricType metric_type
type of metric this index uses for search
void reconstruct(idx_t key, float *recons) const override
void train(idx_t n, const float *x) override
size_t * lims
size (nq + 1)
virtual void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const
bool is_trained
set if the Index does not require training, or if training is done already
virtual void reconstruct(idx_t key, float *recons) const
void add_with_ids(idx_t n, const float *x, const long *xids) override
idx_t * labels
result for query i is labels[lims[i]:lims[i+1]]
IndexSplitVectors(idx_t d, bool threaded=false)
sum of dimensions seen so far
bool own_fields
! the sub-index