19 #include <sys/types.h>
25 #include "../AutoTune.h"
41 float * fvecs_read (
const char *fname,
42 size_t *d_out,
size_t *n_out)
44 FILE *f = fopen(fname,
"r");
46 fprintf(stderr,
"could not open %s\n", fname);
51 fread(&d, 1,
sizeof(
int), f);
52 assert((d > 0 && d < 1000000) || !
"unreasonable dimension");
53 fseek(f, 0, SEEK_SET);
55 fstat(fileno(f), &st);
56 size_t sz = st.st_size;
57 assert(sz % ((d + 1) * 4) == 0 || !
"weird file size");
58 size_t n = sz / ((d + 1) * 4);
60 *d_out = d; *n_out = n;
61 float *x =
new float[n * (d + 1)];
62 size_t nr = fread(x,
sizeof(
float), n * (d + 1), f);
63 assert(nr == n * (d + 1) || !
"could not read whole file");
66 for(
size_t i = 0; i < n; i++)
67 memmove(x + i * d, x + 1 + i * (d + 1), d *
sizeof(*x));
74 int *ivecs_read(
const char *fname,
size_t *d_out,
size_t *n_out)
76 return (
int*)fvecs_read(fname, d_out, n_out);
82 gettimeofday (&tv,
nullptr);
83 return tv.tv_sec + tv.tv_usec * 1e-6;
90 double t0 = elapsed();
93 const char *index_key =
"IVF4096,Flat";
110 printf (
"[%.3f s] Loading train set\n", elapsed() - t0);
113 float *xt = fvecs_read(
"sift1M/sift_learn.fvecs", &d, &nt);
115 printf (
"[%.3f s] Preparing index \"%s\" d=%ld\n",
116 elapsed() - t0, index_key, d);
119 printf (
"[%.3f s] Training on %ld vectors\n", elapsed() - t0, nt);
121 index->
train(nt, xt);
127 printf (
"[%.3f s] Loading database\n", elapsed() - t0);
130 float *xb = fvecs_read(
"sift1M/sift_base.fvecs", &d2, &nb);
131 assert(d == d2 || !
"dataset does not have same dimension as train set");
133 printf (
"[%.3f s] Indexing database, size %ld*%ld\n",
134 elapsed() - t0, nb, d);
145 printf (
"[%.3f s] Loading queries\n", elapsed() - t0);
148 xq = fvecs_read(
"sift1M/sift_query.fvecs", &d2, &nq);
149 assert(d == d2 || !
"query does not have same dimension as train set");
157 printf (
"[%.3f s] Loading ground truth for %ld queries\n",
162 int *gt_int = ivecs_read(
"sift1M/sift_groundtruth.ivecs", &k, &nq2);
163 assert(nq2 == nq || !
"incorrect nb of ground truth entries");
166 for(
int i = 0; i < k * nq; i++) {
173 std::string selected_params;
177 printf (
"[%.3f s] Preparing auto-tune criterion 1-recall at 1 "
178 "criterion, with k=%ld nq=%ld\n", elapsed() - t0, k, nq);
181 crit.set_groundtruth (k,
nullptr, gt);
184 printf (
"[%.3f s] Preparing auto-tune parameters\n", elapsed() - t0);
189 printf (
"[%.3f s] Auto-tuning over %ld parameters (%ld combinations)\n",
194 params.
explore (index, nq, xq, crit, &ops);
196 printf (
"[%.3f s] Found the following operating points: \n",
202 for (
int i = 0; i < ops.optimal_pts.size(); i++) {
203 if (ops.optimal_pts[i].perf > 0.5) {
204 selected_params = ops.optimal_pts[i].key;
208 assert (selected_params.size() >= 0 ||
209 !
"could not find good enough op point");
217 printf (
"[%.3f s] Setting parameter configuration \"%s\" on index\n",
218 elapsed() - t0, selected_params.c_str());
222 printf (
"[%.3f s] Perform a search on %ld queries\n",
227 float *D =
new float[nq * k];
229 index->
search(nq, xq, k, D, I);
231 printf (
"[%.3f s] Compute recalls\n", elapsed() - t0);
234 int n_1 = 0, n_10 = 0, n_100 = 0;
235 for(
int i = 0; i < nq; i++) {
236 int gt_nn = gt[i * k];
237 for(
int j = 0; j < k; j++) {
238 if (I[i * k + j] == gt_nn) {
245 printf(
"R@1 = %.4f\n", n_1 /
float(nq));
246 printf(
"R@10 = %.4f\n", n_10 /
float(nq));
247 printf(
"R@100 = %.4f\n", n_100 /
float(nq));
void explore(Index *index, size_t nq, const float *xq, const AutoTuneCriterion &crit, OperatingPoints *ops) const
std::vector< ParameterRange > parameter_ranges
all tunable parameters
virtual void add(idx_t n, const float *x)=0
long idx_t
all indices are this type
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0
virtual void initialize(const Index *index)
initialize with reasonable parameters for the index
size_t n_combinations() const
nb of combinations, = product of values sizes
void set_index_parameters(Index *index, size_t cno) const
set a combination of parameters on an index
virtual void train(idx_t n, const float *x)
Index * index_factory(int d, const char *description_in, MetricType metric)