17 #include <sys/types.h>
23 #include "../AutoTune.h"
39 float * fvecs_read (
const char *fname,
40 size_t *d_out,
size_t *n_out)
42 FILE *f = fopen(fname,
"r");
44 fprintf(stderr,
"could not open %s\n", fname);
49 fread(&d, 1,
sizeof(
int), f);
50 assert((d > 0 && d < 1000000) || !
"unreasonable dimension");
51 fseek(f, 0, SEEK_SET);
53 fstat(fileno(f), &st);
54 size_t sz = st.st_size;
55 assert(sz % ((d + 1) * 4) == 0 || !
"weird file size");
56 size_t n = sz / ((d + 1) * 4);
58 *d_out = d; *n_out = n;
59 float *x =
new float[n * (d + 1)];
60 size_t nr = fread(x,
sizeof(
float), n * (d + 1), f);
61 assert(nr == n * (d + 1) || !
"could not read whole file");
64 for(
size_t i = 0; i < n; i++)
65 memmove(x + i * d, x + 1 + i * (d + 1), d *
sizeof(*x));
72 int *ivecs_read(
const char *fname,
size_t *d_out,
size_t *n_out)
74 return (
int*)fvecs_read(fname, d_out, n_out);
80 gettimeofday (&tv,
nullptr);
81 return tv.tv_sec + tv.tv_usec * 1e-6;
88 double t0 = elapsed();
91 const char *index_key =
"IVF4096,Flat";
108 printf (
"[%.3f s] Loading train set\n", elapsed() - t0);
111 float *xt = fvecs_read(
"sift1M/sift_learn.fvecs", &d, &nt);
113 printf (
"[%.3f s] Preparing index \"%s\" d=%ld\n",
114 elapsed() - t0, index_key, d);
117 printf (
"[%.3f s] Training on %ld vectors\n", elapsed() - t0, nt);
119 index->
train(nt, xt);
125 printf (
"[%.3f s] Loading database\n", elapsed() - t0);
128 float *xb = fvecs_read(
"sift1M/sift_base.fvecs", &d2, &nb);
129 assert(d == d2 || !
"dataset does not have same dimension as train set");
131 printf (
"[%.3f s] Indexing database, size %ld*%ld\n",
132 elapsed() - t0, nb, d);
143 printf (
"[%.3f s] Loading queries\n", elapsed() - t0);
146 xq = fvecs_read(
"sift1M/sift_query.fvecs", &d2, &nq);
147 assert(d == d2 || !
"query does not have same dimension as train set");
155 printf (
"[%.3f s] Loading ground truth for %ld queries\n",
160 int *gt_int = ivecs_read(
"sift1M/sift_groundtruth.ivecs", &k, &nq2);
161 assert(nq2 == nq || !
"incorrect nb of ground truth entries");
164 for(
int i = 0; i < k * nq; i++) {
171 std::string selected_params;
175 printf (
"[%.3f s] Preparing auto-tune criterion 1-recall at 1 "
176 "criterion, with k=%ld nq=%ld\n", elapsed() - t0, k, nq);
179 crit.set_groundtruth (k,
nullptr, gt);
182 printf (
"[%.3f s] Preparing auto-tune parameters\n", elapsed() - t0);
187 printf (
"[%.3f s] Auto-tuning over %ld parameters (%ld combinations)\n",
192 params.
explore (index, nq, xq, crit, &ops);
194 printf (
"[%.3f s] Found the following operating points: \n",
200 for (
int i = 0; i < ops.optimal_pts.size(); i++) {
201 if (ops.optimal_pts[i].perf > 0.5) {
202 selected_params = ops.optimal_pts[i].key;
206 assert (selected_params.size() >= 0 ||
207 !
"could not find good enough op point");
215 printf (
"[%.3f s] Setting parameter configuration \"%s\" on index\n",
216 elapsed() - t0, selected_params.c_str());
220 printf (
"[%.3f s] Perform a search on %ld queries\n",
225 float *D =
new float[nq * k];
227 index->
search(nq, xq, k, D, I);
229 printf (
"[%.3f s] Compute recalls\n", elapsed() - t0);
232 int n_1 = 0, n_10 = 0, n_100 = 0;
233 for(
int i = 0; i < nq; i++) {
234 int gt_nn = gt[i * k];
235 for(
int j = 0; j < k; j++) {
236 if (I[i * k + j] == gt_nn) {
243 printf(
"R@1 = %.4f\n", n_1 /
float(nq));
244 printf(
"R@10 = %.4f\n", n_10 /
float(nq));
245 printf(
"R@100 = %.4f\n", n_100 /
float(nq));
void explore(Index *index, size_t nq, const float *xq, const AutoTuneCriterion &crit, OperatingPoints *ops) const
std::vector< ParameterRange > parameter_ranges
all tunable parameters
virtual void train(idx_t n, const float *x)
virtual void add(idx_t n, const float *x)=0
long idx_t
all indices are this type
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
virtual void initialize(const Index *index)
initialize with reasonable parameters for the index
size_t n_combinations() const
nb of combinations, = product of values sizes
void set_index_parameters(Index *index, size_t cno) const
set a combination of parameters on an index
Index * index_factory(int d, const char *description_in, MetricType metric)