12 #include "MetaIndexes.h"
18 #include "FaissAssert.h"
20 #include "AuxIndexStructures.h"
29 IndexIDMap::IndexIDMap (Index *index):
33 FAISS_THROW_IF_NOT_MSG (index->ntotal == 0,
"index must be empty on input");
42 FAISS_THROW_MSG (
"add does not make sense with IndexIDMap, "
63 for (
idx_t i = 0; i < n; i++)
64 id_map.push_back (xids[i]);
70 float *distances,
idx_t *labels)
const
72 index->
search (n, x, k, distances, labels);
74 for (
idx_t i = 0; i < n * k; i++) {
75 li[i] = li[i] < 0 ? li[i] :
id_map[li[i]];
82 const std::vector <long> & id_map;
84 IDTranslatedSelector (
const std::vector <long> & id_map,
86 id_map (id_map), sel (sel)
88 bool is_member(idx_t
id)
const override {
89 return sel.is_member(id_map[
id]);
98 IDTranslatedSelector sel2 = {
id_map, sel};
103 if (sel.is_member (
id_map[i])) {
110 FAISS_ASSERT (j == index->
ntotal);
118 IndexIDMap::~IndexIDMap ()
143 explicit Thread (
const Job & job): job(job) {}
146 pthread_create (&thread,
nullptr, run,
this);
150 pthread_join (thread,
nullptr);
153 static void * run (
void *arg) {
154 static_cast<Thread*
> (arg)->job.run();
171 printf (
"begin train shard %d on %ld points\n", no, n);
172 index->shard_indexes [no]->train(n, x);
174 printf (
"end train shard %d\n", no);
190 printf (
"begin add shard %d on %ld points\n", no, n);
192 index->shard_indexes[no]->add_with_ids (n, x, ids);
194 index->shard_indexes[no]->add (n, x);
196 printf (
"end add shard %d on %ld points\n", no, n);
204 const IndexShards *index;
218 printf (
"begin query shard %d on %ld points\n", no, n);
219 index->shard_indexes [no]->search (n, x, k,
222 printf (
"end query shard %d\n", no);
232 void translate_labels (
long n, idx_t *labels,
long translation)
234 if (translation == 0)
return;
235 for (
long i = 0; i < n; i++) {
236 if(labels[i] < 0)
return;
237 labels[i] += translation;
249 void merge_tables (
long n,
long k,
long nshard,
250 float *distances, idx_t *labels,
251 const float *all_distances,
253 const long *translations)
262 std::vector<int> buf (2 * nshard);
263 int * pointer = buf.data();
264 int * shard_ids = pointer + nshard;
265 std::vector<float> buf2 (nshard);
266 float * heap_vals = buf2.data();
268 for (
long i = 0; i < n; i++) {
271 const float *D_in = all_distances + i * k;
272 const idx_t *I_in = all_labels + i * k;
275 for (
long s = 0; s < nshard; s++) {
277 if (I_in[stride * s] >= 0)
278 heap_push<C> (++heap_size, heap_vals, shard_ids,
279 D_in[stride * s], s);
282 float *D = distances + i * k;
283 idx_t *I = labels + i * k;
285 for (
int j = 0; j < k; j++) {
286 if (heap_size == 0) {
291 int s = shard_ids[0];
292 int & p = pointer[s];
294 I[j] = I_in[stride * s + p] + translations[s];
296 heap_pop<C> (heap_size--, heap_vals, shard_ids);
298 if (p < k && I_in[stride * s + p] >= 0)
299 heap_push<C> (++heap_size, heap_vals, shard_ids,
300 D_in[stride * s + p], s);
314 Index (d), own_fields (false),
315 threaded (threaded), successive_ids (successive_ids)
320 void IndexShards::add_shard (
Index *idx)
322 shard_indexes.push_back (idx);
323 sync_with_shard_indexes ();
326 void IndexShards::sync_with_shard_indexes ()
328 if (shard_indexes.empty())
return;
329 Index * index0 = shard_indexes[0];
334 for (
int i = 1; i < shard_indexes.size(); i++) {
335 Index * index = shard_indexes[i];
336 FAISS_THROW_IF_NOT (
metric_type == index->metric_type);
337 FAISS_THROW_IF_NOT (
d == index->d);
347 std::vector<Thread<TrainJob > > tss (shard_indexes.size());
349 for (
int i = 0; i < shard_indexes.size(); i++) {
351 TrainJob ts = {
this, i, n, x};
353 tss[nt] = Thread<TrainJob> (ts);
360 for (
int i = 0; i < nt; i++) {
363 sync_with_shard_indexes ();
386 FAISS_THROW_IF_NOT_MSG(!(successive_ids && xids),
387 "It makes no sense to pass in ids and "
388 "request them to be shifted");
390 if (successive_ids) {
391 FAISS_THROW_IF_NOT_MSG(!xids,
392 "It makes no sense to pass in ids and "
393 "request them to be shifted");
394 FAISS_THROW_IF_NOT_MSG(
ntotal == 0,
395 "when adding to IndexShards with sucessive_ids, "
396 "only add() in a single pass is supported");
399 long nshard = shard_indexes.size();
400 const long *ids = xids;
402 if (!ids && !successive_ids) {
403 long *aids =
new long[n];
404 for (
long i = 0; i < n; i++)
410 std::vector<Thread<AddJob > > asa (shard_indexes.size());
412 for (
int i = 0; i < nshard; i++) {
413 long i0 = i * n / nshard;
414 long i1 = (i + 1) * n / nshard;
416 AddJob as = {
this, i,
418 ids ? ids + i0 :
nullptr};
420 asa[nt] = Thread<AddJob>(as);
426 for (
int i = 0; i < nt; i++) {
438 for (
int i = 0; i < shard_indexes.size(); i++) {
439 shard_indexes[i]->reset ();
441 sync_with_shard_indexes ();
445 idx_t n,
const float *x, idx_t k,
446 float *distances, idx_t *labels)
const
448 long nshard = shard_indexes.size();
449 float *all_distances =
new float [nshard * k * n];
450 idx_t *all_labels =
new idx_t [nshard * k * n];
457 std::vector<Thread<QueryJob> > qss (nshard);
458 for (
int i = 0; i < nshard; i++) {
461 all_distances + i * k * n,
462 all_labels + i * k * n
465 qss[i] = Thread<QueryJob> (qs);
473 for (
int i = 0; i < qss.size(); i++) {
480 std::vector<QueryJob> qss (nshard);
481 for (
int i = 0; i < nshard; i++) {
484 all_distances + i * k * n,
485 all_labels + i * k * n
495 #pragma omp parallel for
496 for (
int i = 0; i < qss.size(); i++) {
502 std::vector<long> translations (nshard, 0);
503 if (successive_ids) {
505 for (
int s = 0; s + 1 < nshard; s++)
506 translations [s + 1] = translations [s] +
507 shard_indexes [s]->
ntotal;
511 merge_tables< CMin<float, int> > (
512 n, k, nshard, distances, labels,
513 all_distances, all_labels, translations.data ());
515 merge_tables< CMax<float, int> > (
516 n, k, nshard, distances, labels,
517 all_distances, all_labels, translations.data ());
524 IndexShards::~IndexShards ()
527 for (
int s = 0; s < shard_indexes.size(); s++)
528 delete shard_indexes [s];
539 Index (d), own_fields (false),
540 threaded (threaded), sum_d (0)
545 void IndexSplitVectors::add_sub_index (
Index *index)
547 sub_indexes.push_back (index);
548 sync_with_sub_indexes ();
551 void IndexSplitVectors::sync_with_sub_indexes ()
553 if (sub_indexes.empty())
return;
554 Index * index0 = sub_indexes[0];
559 for (
int i = 1; i < sub_indexes.size(); i++) {
560 Index * index = sub_indexes[i];
561 FAISS_THROW_IF_NOT (
metric_type == index->metric_type);
562 FAISS_THROW_IF_NOT (
ntotal == index->ntotal);
570 FAISS_THROW_MSG (
"not implemented");
576 struct SplitQueryJob {
591 printf (
"begin query shard %d on %ld points\n", no, n);
592 const Index * sub_index = index->sub_indexes[no];
593 long sub_d = sub_index->
d, d = index->
d;
595 for (
int i = 0; i < no; i++) ofs += index->sub_indexes[i]->
d;
596 float *sub_x =
new float [sub_d * n];
598 for (idx_t i = 0; i < n; i++)
599 memcpy (sub_x + i * sub_d, x + ofs + i * d, sub_d *
sizeof (sub_x));
600 sub_index->
search (n, sub_x, k, distances, labels);
602 printf (
"end query shard %d\n", no);
614 idx_t n,
const float *x, idx_t k,
615 float *distances, idx_t *labels)
const
617 FAISS_THROW_IF_NOT_MSG (k == 1,
618 "search implemented only for k=1");
619 FAISS_THROW_IF_NOT_MSG (sum_d == d,
620 "not enough indexes compared to # dimensions");
622 long nshard = sub_indexes.size();
623 float *all_distances =
new float [nshard * k * n];
624 idx_t *all_labels =
new idx_t [nshard * k * n];
629 std::vector<Thread<SplitQueryJob> > qss (nshard);
630 for (
int i = 0; i < nshard; i++) {
633 i == 0 ? distances : all_distances + i * k * n,
634 i == 0 ? labels : all_labels + i * k * n
637 qss[i] = Thread<SplitQueryJob> (qs);
645 for (
int i = 0; i < qss.size(); i++) {
651 for (
int i = 0; i < nshard; i++) {
653 const float *distances_i = all_distances + i * k * n;
654 const idx_t *labels_i = all_labels + i * k * n;
655 for (
long j = 0; j < n; j++) {
656 if (labels[j] >= 0 && labels_i[j] >= 0) {
657 labels[j] += labels_i[j] * factor;
658 distances[j] += distances_i[j];
661 distances[j] = 0.0 / 0.0;
665 factor *= sub_indexes[i]->ntotal;
673 FAISS_THROW_MSG (
"not implemented");
678 FAISS_THROW_MSG (
"not implemented");
682 IndexSplitVectors::~IndexSplitVectors ()
685 for (
int s = 0; s < sub_indexes.size(); s++)
686 delete sub_indexes [s];
void train(idx_t n, const float *x) override
IndexShards(idx_t d, bool threaded=false, bool successive_ids=true)
virtual void reset()=0
removes all elements from the database.
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void add_with_ids(idx_t n, const float *x, const long *xids) override
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void reset() override
removes all elements from the database.
void add(idx_t n, const float *x) override
this will fail. Use add_with_ids
std::vector< long > id_map
! whether pointers are deleted in destructo
virtual void add(idx_t n, const float *x)=0
void train(idx_t n, const float *x) override
void add(idx_t n, const float *x) override
supported only for sub-indices that implement add_with_ids
long idx_t
all indices are this type
idx_t ntotal
total nb of indexed vectors
bool verbose
verbosity level
void add(idx_t n, const float *x) override
virtual long remove_ids(const IDSelector &sel)
bool threaded
should the sub-indexes be deleted along with this?
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
void reset() override
removes all elements from the database.
void reset() override
removes all elements from the database.
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
long remove_ids(const IDSelector &sel) override
remove ids adapted to IndexFlat
MetricType metric_type
type of metric this index uses for search
void train(idx_t n, const float *x) override
bool is_trained
set if the Index does not require training, or if training is done already
virtual void train(idx_t n, const float *x)
void add_with_ids(idx_t n, const float *x, const long *xids) override
IndexSplitVectors(idx_t d, bool threaded=false)
sum of dimensions seen so far
bool own_fields
! the sub-index