18 #include "../IndexIVFPQ.h"
19 #include "../IndexFlat.h"
20 #include "../index_io.h"
25 gettimeofday (&tv, NULL);
26 return tv.tv_sec + tv.tv_usec * 1e-6;
33 double t0 = elapsed();
39 size_t nb = 200 * 1000;
43 size_t nt = 100 * 1000;
49 int ncentroids = int (4 * sqrt (nb));
59 printf (
"[%.3f s] Generating %ld vectors in %dD for training\n",
60 elapsed() - t0, nt, d);
62 std::vector <float> trainvecs (nt * d);
63 for (
size_t i = 0; i < nt * d; i++) {
64 trainvecs[i] = drand48();
67 printf (
"[%.3f s] Training the index\n",
71 index.train (nt, trainvecs.data());
75 const char *outfilename =
"/tmp/index_trained.faissindex";
76 printf (
"[%.3f s] storing the pre-trained index to %s\n",
77 elapsed() - t0, outfilename);
79 write_index (&index, outfilename);
83 std::vector<float> queries;
86 printf (
"[%.3f s] Building a dataset of %ld vectors to index\n",
89 std::vector <float> database (nb * d);
90 for (
size_t i = 0; i < nb * d; i++) {
91 database[i] = drand48();
94 printf (
"[%.3f s] Adding the vectors to the index\n",
97 index.add (nb, database.data());
99 printf (
"[%.3f s] imbalance factor: %g\n",
100 elapsed() - t0, index.imbalance_factor ());
107 queries.resize (nq * d);
108 for (
int i = i0; i < i1; i++) {
109 for (
int j = 0; j < d; j++) {
110 queries [(i - i0) * d + j] = database [i * d + j];
118 printf (
"[%.3f s] Searching the %d nearest neighbors "
119 "of %ld vectors in the index\n",
120 elapsed() - t0, k, nq);
122 std::vector<faiss::Index::idx_t> nns (k * nq);
123 std::vector<float> dis (k * nq);
125 index.search (nq, queries.data(), k, dis.data(), nns.data());
127 printf (
"[%.3f s] Query results (vector ids, then distances):\n",
130 for (
int i = 0; i < nq; i++) {
131 printf (
"query %2d: ", i);
132 for (
int j = 0; j < k; j++) {
133 printf (
"%7ld ", nns[j + i * k]);
136 for (
int j = 0; j < k; j++) {
137 printf (
"%7g ", dis[j + i * k]);
142 printf (
"note that the nearest neighbor is not at "
143 "distance 0 due to quantization errors\n");