19 #include "../IndexIVFPQ.h"
20 #include "../IndexFlat.h"
21 #include "../index_io.h"
26 gettimeofday (&tv, NULL);
27 return tv.tv_sec + tv.tv_usec * 1e-6;
34 double t0 = elapsed();
40 size_t nb = 200 * 1000;
44 size_t nt = 100 * 1000;
50 int ncentroids = int (4 * sqrt (nb));
60 printf (
"[%.3f s] Generating %ld vectors in %dD for training\n",
61 elapsed() - t0, nt, d);
63 std::vector <float> trainvecs (nt * d);
64 for (
size_t i = 0; i < nt * d; i++) {
65 trainvecs[i] = drand48();
68 printf (
"[%.3f s] Training the index\n",
72 index.train (nt, trainvecs.data());
76 const char *outfilename =
"/tmp/index_trained.faissindex";
77 printf (
"[%.3f s] storing the pre-trained index to %s\n",
78 elapsed() - t0, outfilename);
80 write_index (&index, outfilename);
84 std::vector<float> queries;
87 printf (
"[%.3f s] Building a dataset of %ld vectors to index\n",
90 std::vector <float> database (nb * d);
91 for (
size_t i = 0; i < nb * d; i++) {
92 database[i] = drand48();
95 printf (
"[%.3f s] Adding the vectors to the index\n",
98 index.add (nb, database.data());
100 printf (
"[%.3f s] imbalance factor: %g\n",
101 elapsed() - t0, index.imbalance_factor ());
108 queries.resize (nq * d);
109 for (
int i = i0; i < i1; i++) {
110 for (
int j = 0; j < d; j++) {
111 queries [(i - i0) * d + j] = database [i * d + j];
119 printf (
"[%.3f s] Searching the %d nearest neighbors "
120 "of %ld vectors in the index\n",
121 elapsed() - t0, k, nq);
123 std::vector<faiss::Index::idx_t> nns (k * nq);
124 std::vector<float> dis (k * nq);
126 index.search (nq, queries.data(), k, dis.data(), nns.data());
128 printf (
"[%.3f s] Query results (vector ids, then distances):\n",
131 for (
int i = 0; i < nq; i++) {
132 printf (
"query %2d: ", i);
133 for (
int j = 0; j < k; j++) {
134 printf (
"%7ld ", nns[j + i * k]);
137 for (
int j = 0; j < k; j++) {
138 printf (
"%7g ", dis[j + i * k]);
143 printf (
"note that the nearest neighbor is not at "
144 "distance 0 due to quantization errors\n");