18 #include <sys/types.h>
24 #include "../AutoTune.h"
40 float * fvecs_read (
const char *fname,
41 size_t *d_out,
size_t *n_out)
43 FILE *f = fopen(fname,
"r");
45 fprintf(stderr,
"could not open %s\n", fname);
50 fread(&d, 1,
sizeof(
int), f);
51 assert((d > 0 && d < 1000000) || !
"unreasonable dimension");
52 fseek(f, 0, SEEK_SET);
54 fstat(fileno(f), &st);
55 size_t sz = st.st_size;
56 assert(sz % ((d + 1) * 4) == 0 || !
"weird file size");
57 size_t n = sz / ((d + 1) * 4);
59 *d_out = d; *n_out = n;
60 float *x =
new float[n * (d + 1)];
61 size_t nr = fread(x,
sizeof(
float), n * (d + 1), f);
62 assert(nr == n * (d + 1) || !
"could not read whole file");
65 for(
size_t i = 0; i < n; i++)
66 memmove(x + i * d, x + 1 + i * (d + 1), d *
sizeof(*x));
73 int *ivecs_read(
const char *fname,
size_t *d_out,
size_t *n_out)
75 return (
int*)fvecs_read(fname, d_out, n_out);
81 gettimeofday (&tv,
nullptr);
82 return tv.tv_sec + tv.tv_usec * 1e-6;
89 double t0 = elapsed();
92 const char *index_key =
"IVF4096,Flat";
109 printf (
"[%.3f s] Loading train set\n", elapsed() - t0);
112 float *xt = fvecs_read(
"sift1M/sift_learn.fvecs", &d, &nt);
114 printf (
"[%.3f s] Preparing index \"%s\" d=%ld\n",
115 elapsed() - t0, index_key, d);
118 printf (
"[%.3f s] Training on %ld vectors\n", elapsed() - t0, nt);
120 index->
train(nt, xt);
126 printf (
"[%.3f s] Loading database\n", elapsed() - t0);
129 float *xb = fvecs_read(
"sift1M/sift_base.fvecs", &d2, &nb);
130 assert(d == d2 || !
"dataset does not have same dimension as train set");
132 printf (
"[%.3f s] Indexing database, size %ld*%ld\n",
133 elapsed() - t0, nb, d);
144 printf (
"[%.3f s] Loading queries\n", elapsed() - t0);
147 xq = fvecs_read(
"sift1M/sift_query.fvecs", &d2, &nq);
148 assert(d == d2 || !
"query does not have same dimension as train set");
156 printf (
"[%.3f s] Loading ground truth for %ld queries\n",
161 int *gt_int = ivecs_read(
"sift1M/sift_groundtruth.ivecs", &k, &nq2);
162 assert(nq2 == nq || !
"incorrect nb of ground truth entries");
165 for(
int i = 0; i < k * nq; i++) {
172 std::string selected_params;
176 printf (
"[%.3f s] Preparing auto-tune criterion 1-recall at 1 "
177 "criterion, with k=%ld nq=%ld\n", elapsed() - t0, k, nq);
180 crit.set_groundtruth (k,
nullptr, gt);
183 printf (
"[%.3f s] Preparing auto-tune parameters\n", elapsed() - t0);
188 printf (
"[%.3f s] Auto-tuning over %ld parameters (%ld combinations)\n",
193 params.
explore (index, nq, xq, crit, &ops);
195 printf (
"[%.3f s] Found the following operating points: \n",
201 for (
int i = 0; i < ops.optimal_pts.size(); i++) {
202 if (ops.optimal_pts[i].perf > 0.5) {
203 selected_params = ops.optimal_pts[i].key;
207 assert (selected_params.size() >= 0 ||
208 !
"could not find good enough op point");
216 printf (
"[%.3f s] Setting parameter configuration \"%s\" on index\n",
217 elapsed() - t0, selected_params.c_str());
221 printf (
"[%.3f s] Perform a search on %ld queries\n",
226 float *D =
new float[nq * k];
228 index->
search(nq, xq, k, D, I);
230 printf (
"[%.3f s] Compute recalls\n", elapsed() - t0);
233 int n_1 = 0, n_10 = 0, n_100 = 0;
234 for(
int i = 0; i < nq; i++) {
235 int gt_nn = gt[i * k];
236 for(
int j = 0; j < k; j++) {
237 if (I[i * k + j] == gt_nn) {
244 printf(
"R@1 = %.4f\n", n_1 /
float(nq));
245 printf(
"R@10 = %.4f\n", n_10 /
float(nq));
246 printf(
"R@100 = %.4f\n", n_100 /
float(nq));
void explore(Index *index, size_t nq, const float *xq, const AutoTuneCriterion &crit, OperatingPoints *ops) const
std::vector< ParameterRange > parameter_ranges
all tunable parameters
virtual void train(idx_t, const float *)
virtual void add(idx_t n, const float *x)=0
long idx_t
all indices are this type
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
virtual void initialize(const Index *index)
initialize with reasonable parameters for the index
size_t n_combinations() const
nb of combinations, = product of values sizes
void set_index_parameters(Index *index, size_t cno) const
set a combination of parameters on an index
Index * index_factory(int d, const char *description_in, MetricType metric)