10 #include "MetaIndexes.h"
14 #include "FaissAssert.h"
16 #include "AuxIndexStructures.h"
17 #include "WorkerThread.h"
32 IndexIDMap::IndexIDMap (Index *index):
36 FAISS_THROW_IF_NOT_MSG (index->ntotal == 0,
"index must be empty on input");
37 is_trained = index->is_trained;
38 metric_type = index->metric_type;
39 verbose = index->verbose;
45 FAISS_THROW_MSG (
"add does not make sense with IndexIDMap, "
53 is_trained = index->is_trained;
67 for (
idx_t i = 0; i < n; i++)
68 id_map.push_back (xids[i]);
69 ntotal = index->ntotal;
74 float *distances,
idx_t *labels)
const
76 index->search (n, x, k, distances, labels);
78 #pragma omp parallel for
79 for (
idx_t i = 0; i < n * k; i++) {
80 li[i] = li[i] < 0 ? li[i] : id_map[li[i]];
88 index->range_search(n, x, radius, result);
89 #pragma omp parallel for
90 for (
idx_t i = 0; i < result->
lims[result->
nq]; i++) {
99 const std::vector <long> & id_map;
101 IDTranslatedSelector (
const std::vector <long> & id_map,
103 id_map (id_map), sel (sel)
105 bool is_member(idx_t
id)
const override {
106 return sel.is_member(id_map[
id]);
115 IDTranslatedSelector sel2 (id_map, sel);
116 long nremove = index->remove_ids (sel2);
119 for (
idx_t i = 0; i < ntotal; i++) {
120 if (sel.is_member (id_map[i])) {
123 id_map[j] = id_map[i];
127 FAISS_ASSERT (j == index->ntotal);
129 id_map.resize(ntotal);
136 IndexIDMap::~IndexIDMap ()
138 if (own_fields)
delete index;
145 IndexIDMap2::IndexIDMap2 (Index *index): IndexIDMap (index)
150 size_t prev_ntotal =
ntotal;
152 for (
size_t i = prev_ntotal; i <
ntotal; i++) {
160 for (
size_t i = 0; i <
ntotal; i++) {
178 }
catch (
const std::out_of_range& e) {
179 FAISS_THROW_FMT (
"key %ld not found", key);
189 Index (d), own_fields (false),
190 threaded (threaded), sum_d (0)
195 void IndexSplitVectors::add_sub_index (
Index *index)
197 sub_indexes.push_back (index);
198 sync_with_sub_indexes ();
201 void IndexSplitVectors::sync_with_sub_indexes ()
203 if (sub_indexes.empty())
return;
204 Index * index0 = sub_indexes[0];
209 for (
int i = 1; i < sub_indexes.size(); i++) {
210 Index * index = sub_indexes[i];
211 FAISS_THROW_IF_NOT (
metric_type == index->metric_type);
212 FAISS_THROW_IF_NOT (
ntotal == index->ntotal);
219 FAISS_THROW_MSG(
"not implemented");
226 float *distances,
idx_t *labels)
const
228 FAISS_THROW_IF_NOT_MSG (k == 1,
229 "search implemented only for k=1");
230 FAISS_THROW_IF_NOT_MSG (sum_d ==
d,
231 "not enough indexes compared to # dimensions");
233 long nshard = sub_indexes.size();
234 float *all_distances =
new float [nshard * k * n];
235 idx_t *all_labels =
new idx_t [nshard * k * n];
239 auto query_func = [n, x, k, distances, labels, all_distances, all_labels,
this]
242 float *distances1 = no == 0 ? distances : all_distances + no * k * n;
243 idx_t *labels1 = no == 0 ? labels : all_labels + no * k * n;
245 printf (
"begin query shard %d on %ld points\n", no, n);
246 const Index * sub_index = index->sub_indexes[no];
247 long sub_d = sub_index->
d,
d = index->
d;
249 for (
int i = 0; i < no; i++) ofs += index->sub_indexes[i]->d;
250 float *sub_x =
new float [sub_d * n];
252 for (
idx_t i = 0; i < n; i++)
253 memcpy (sub_x + i * sub_d, x + ofs + i *
d, sub_d *
sizeof (sub_x));
254 sub_index->
search (n, sub_x, k, distances1, labels1);
256 printf (
"end query shard %d\n", no);
260 for (
int i = 0; i < nshard; i++) {
264 std::vector<std::unique_ptr<WorkerThread> > threads;
265 std::vector<std::future<bool>> v;
267 for (
int i = 0; i < nshard; i++) {
270 v.emplace_back(wt->
add([i, query_func](){query_func(i); }));
274 for (
auto& func : v) {
280 for (
int i = 0; i < nshard; i++) {
282 const float *distances_i = all_distances + i * k * n;
283 const idx_t *labels_i = all_labels + i * k * n;
284 for (
long j = 0; j < n; j++) {
285 if (labels[j] >= 0 && labels_i[j] >= 0) {
286 labels[j] += labels_i[j] * factor;
287 distances[j] += distances_i[j];
290 distances[j] = 0.0 / 0.0;
294 factor *= sub_indexes[i]->ntotal;
299 void IndexSplitVectors::train(
idx_t ,
const float* ) {
300 FAISS_THROW_MSG(
"not implemented");
303 void IndexSplitVectors::reset ()
305 FAISS_THROW_MSG (
"not implemented");
309 IndexSplitVectors::~IndexSplitVectors ()
312 for (
int s = 0; s < sub_indexes.size(); s++)
313 delete sub_indexes [s];
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void add_with_ids(idx_t n, const float *x, const long *xids) override
void add(idx_t n, const float *x) override
this will fail. Use add_with_ids
long idx_t
all indices are this type
std::vector< long > id_map
! whether pointers are deleted in destructo
void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const override
idx_t ntotal
total nb of indexed vectors
void construct_rev_map()
make the rev_map from scratch
bool verbose
verbosity level
void add(idx_t n, const float *x) override
long remove_ids(const IDSelector &sel) override
remove ids adapted to IndexFlat
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
void reset() override
removes all elements from the database.
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
long remove_ids(const IDSelector &sel) override
remove ids adapted to IndexFlat
MetricType metric_type
type of metric this index uses for search
void reconstruct(idx_t key, float *recons) const override
void train(idx_t n, const float *x) override
size_t * lims
size (nq + 1)
std::future< bool > add(std::function< void()> f)
bool is_trained
set if the Index does not require training, or if training is done already
virtual void reconstruct(idx_t key, float *recons) const
void add_with_ids(idx_t n, const float *x, const long *xids) override
idx_t * labels
result for query i is labels[lims[i]:lims[i+1]]
IndexSplitVectors(idx_t d, bool threaded=false)
sum of dimensions seen so far