13 #include "MetaIndexes.h"
19 #include "FaissAssert.h"
28 IndexIDMap::IndexIDMap (Index *index):
32 FAISS_ASSERT (index->ntotal == 0 || !
"index must be empty on input");
42 FAISS_ASSERT (!
"add does not make sense with IndexIDMap, "
63 for (
idx_t i = 0; i < n; i++)
64 id_map.push_back (xids[i]);
70 float *distances,
idx_t *labels)
const
72 index->
search (n, x, k, distances, labels);
74 for (
idx_t i = 0; i < n * k; i++) {
75 li[i] = li[i] < 0 ? li[i] :
id_map[li[i]];
81 IndexIDMap::~IndexIDMap ()
86 void IndexIDMap::set_typename ()
88 index_typename =
"IDMap[" + index->index_typename +
"]";
110 explicit Thread (
const Job & job): job(job) {}
113 pthread_create (&thread,
nullptr, run,
this);
117 pthread_join (thread,
nullptr);
120 static void * run (
void *arg) {
121 static_cast<Thread*
> (arg)->job.run();
138 printf (
"begin train shard %d on %ld points\n", no, n);
139 index->shard_indexes [no]->train(n, x);
141 printf (
"end train shard %d\n", no);
157 printf (
"begin add shard %d on %ld points\n", no, n);
159 index->shard_indexes[no]->add_with_ids (n, x, ids);
161 index->shard_indexes[no]->add (n, x);
163 printf (
"end add shard %d on %ld points\n", no, n);
171 const IndexShards *index;
185 printf (
"begin query shard %d on %ld points\n", no, n);
186 index->shard_indexes [no]->search (n, x, k,
189 printf (
"end query shard %d\n", no);
199 void translate_labels (
long n, idx_t *labels,
long translation)
201 if (translation == 0)
return;
202 for (
long i = 0; i < n; i++) {
203 if(labels[i] < 0)
return;
204 labels[i] += translation;
215 void merge_tables (
long n,
long k,
long nshard,
216 float *distances, idx_t *labels,
217 const float *all_distances,
219 const long *translations)
221 long shard_stride = n * k;
222 #pragma omp parallel for
223 for (
long i = 0; i < n; i++) {
224 float *D = distances + i * k;
225 idx_t *I = labels + i * k;
226 const float *Ds = all_distances + i * k;
227 idx_t *Is = all_labels + i * k;
228 translate_labels (k, Is, translations[0]);
229 heap_heapify<C>(k, D, I, Ds, Is, k);
230 for (
int s = 1; s < nshard; s++) {
233 translate_labels (k, Is, translations[s]);
234 heap_addn<C> (k, D, I, Ds, Is, k);
236 heap_reorder<C>(k, D, I);
247 Index (d), own_fields (false),
248 threaded (threaded), successive_ids (successive_ids)
253 void IndexShards::add_shard (
Index *idx)
255 shard_indexes.push_back (idx);
256 sync_with_shard_indexes ();
259 void IndexShards::sync_with_shard_indexes ()
261 if (shard_indexes.empty())
return;
262 Index * index0 = shard_indexes[0];
267 for (
int i = 1; i < shard_indexes.size(); i++) {
268 Index * index = shard_indexes[i];
270 FAISS_ASSERT (
d == index->d);
280 std::vector<Thread<TrainJob > > tss (shard_indexes.size());
282 for (
int i = 0; i < shard_indexes.size(); i++) {
284 TrainJob ts = {
this, i, n, x};
286 tss[nt] = Thread<TrainJob> (ts);
293 for (
int i = 0; i < nt; i++) {
296 sync_with_shard_indexes ();
319 FAISS_ASSERT(!(successive_ids && xids) ||
320 !
"It makes no sense to pass in ids and request them to be shifted");
322 if (successive_ids) {
323 FAISS_ASSERT(!xids ||
324 !
"It makes no sense to pass in ids and request them to be shifted");
325 FAISS_ASSERT(
ntotal == 0 ||
326 !
"when adding to IndexShards with sucessive_ids, only add() "
327 "in a single pass is supported");
330 long nshard = shard_indexes.size();
331 const long *ids = xids;
332 if (!ids && !successive_ids) {
333 long *aids =
new long[n];
334 for (
long i = 0; i < n; i++)
339 std::vector<Thread<AddJob > > asa (shard_indexes.size());
341 for (
int i = 0; i < nshard; i++) {
342 long i0 = i * n / nshard;
343 long i1 = (i + 1) * n / nshard;
345 AddJob as = {
this, i,
347 ids ? ids + i0 :
nullptr};
349 asa[nt] = Thread<AddJob>(as);
355 for (
int i = 0; i < nt; i++) {
358 if (ids != xids)
delete [] ids;
365 for (
int i = 0; i < shard_indexes.size(); i++) {
366 shard_indexes[i]->reset ();
368 sync_with_shard_indexes ();
372 idx_t n,
const float *x, idx_t k,
373 float *distances, idx_t *labels)
const
375 long nshard = shard_indexes.size();
376 float *all_distances =
new float [nshard * k * n];
377 idx_t *all_labels =
new idx_t [nshard * k * n];
382 std::vector<Thread<QueryJob> > qss (nshard);
383 for (
int i = 0; i < nshard; i++) {
386 all_distances + i * k * n,
387 all_labels + i * k * n
390 qss[i] = Thread<QueryJob> (qs);
398 for (
int i = 0; i < qss.size(); i++) {
405 std::vector<QueryJob> qss (nshard);
406 for (
int i = 0; i < nshard; i++) {
409 all_distances + i * k * n,
410 all_labels + i * k * n
420 #pragma omp parallel for
421 for (
int i = 0; i < qss.size(); i++) {
427 std::vector<long> translations (nshard, 0);
428 if (successive_ids) {
430 for (
int s = 0; s + 1 < nshard; s++)
431 translations [s + 1] = translations [s] +
432 shard_indexes [s]->
ntotal;
436 merge_tables< CMax<float, idx_t> > (
437 n, k, nshard, distances, labels,
438 all_distances, all_labels, translations.data ());
440 merge_tables< CMin<float, idx_t> > (
441 n, k, nshard, distances, labels,
442 all_distances, all_labels, translations.data ());
445 delete [] all_distances;
446 delete [] all_labels;
450 void IndexShards::set_typename ()
455 IndexShards::~IndexShards ()
458 for (
int s = 0; s < shard_indexes.size(); s++)
459 delete shard_indexes [s];
470 Index (d), own_fields (false),
471 threaded (threaded), sum_d (0)
476 void IndexSplitVectors::add_sub_index (
Index *index)
478 sub_indexes.push_back (index);
479 sync_with_sub_indexes ();
482 void IndexSplitVectors::sync_with_sub_indexes ()
484 if (sub_indexes.empty())
return;
485 Index * index0 = sub_indexes[0];
490 for (
int i = 1; i < sub_indexes.size(); i++) {
491 Index * index = sub_indexes[i];
493 FAISS_ASSERT (
ntotal == index->ntotal);
501 FAISS_ASSERT (!
"not implemented");
507 struct SplitQueryJob {
522 printf (
"begin query shard %d on %ld points\n", no, n);
523 const Index * sub_index = index->sub_indexes[no];
524 long sub_d = sub_index->
d, d = index->
d;
526 for (
int i = 0; i < no; i++) ofs += index->sub_indexes[i]->
d;
527 float *sub_x =
new float [sub_d * n];
528 for (idx_t i = 0; i < n; i++)
529 memcpy (sub_x + i * sub_d, x + ofs + i * d, sub_d *
sizeof (sub_x));
530 sub_index->
search (n, sub_x, k, distances, labels);
533 printf (
"end query shard %d\n", no);
545 idx_t n,
const float *x, idx_t k,
546 float *distances, idx_t *labels)
const
548 FAISS_ASSERT (k == 1 || !
"search implemented only for k=1");
549 FAISS_ASSERT (sum_d == d || !
"not enough indexes compared to # dimensions");
551 long nshard = sub_indexes.size();
552 float *all_distances =
new float [nshard * k * n];
553 idx_t *all_labels =
new idx_t [nshard * k * n];
556 std::vector<Thread<SplitQueryJob> > qss (nshard);
557 for (
int i = 0; i < nshard; i++) {
560 i == 0 ? distances : all_distances + i * k * n,
561 i == 0 ? labels : all_labels + i * k * n
564 qss[i] = Thread<SplitQueryJob> (qs);
572 for (
int i = 0; i < qss.size(); i++) {
578 for (
int i = 0; i < nshard; i++) {
580 const float *distances_i = all_distances + i * k * n;
581 const idx_t *labels_i = all_labels + i * k * n;
582 for (
long j = 0; j < n; j++) {
583 if (labels[j] >= 0 && labels_i[j] >= 0) {
584 labels[j] += labels_i[j] * factor;
585 distances[j] += distances_i[j];
588 distances[j] = 0.0 / 0.0;
592 factor *= sub_indexes[i]->ntotal;
594 delete [] all_labels;
595 delete [] all_distances;
601 FAISS_ASSERT (!
"not implemented");
606 FAISS_ASSERT (!
"not implemented");
609 void IndexSplitVectors::set_typename ()
612 IndexSplitVectors::~IndexSplitVectors ()
615 for (
int s = 0; s < sub_indexes.size(); s++)
616 delete sub_indexes [s];
virtual void train(idx_t n, const float *x) override
IndexShards(idx_t d, bool threaded=false, bool successive_ids=true)
virtual void reset()=0
removes all elements from the database.
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
virtual void add_with_ids(idx_t n, const float *x, const long *xids) override
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
virtual void reset() override
removes all elements from the database.
virtual void add(idx_t n, const float *x) override
this will fail. Use add_with_ids
std::vector< long > id_map
! whether pointers are deleted in destructo
virtual void add(idx_t n, const float *x)=0
virtual void train(idx_t n, const float *x) override
virtual void add(idx_t n, const float *x) override
supported only for sub-indices that implement add_with_ids
long idx_t
all indices are this type
idx_t ntotal
total nb of indexed vectors
bool verbose
verbosity level
virtual void add(idx_t n, const float *x) override
bool threaded
should the sub-indexes be deleted along with this?
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
virtual void reset() override
removes all elements from the database.
virtual void reset() override
removes all elements from the database.
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
MetricType metric_type
type of metric this index uses for search
virtual void train(idx_t n, const float *x) override
bool is_trained
set if the Index does not require training, or if training is done already
virtual void train(idx_t n, const float *x)
virtual void add_with_ids(idx_t n, const float *x, const long *xids) override
IndexSplitVectors(idx_t d, bool threaded=false)
sum of dimensions seen so far
bool own_fields
! the sub-index