20 #include "../IndexIVFPQ.h"
21 #include "../IndexFlat.h"
22 #include "../index_io.h"
27 gettimeofday (&tv, NULL);
28 return tv.tv_sec + tv.tv_usec * 1e-6;
35 double t0 = elapsed();
41 size_t nb = 200 * 1000;
45 size_t nt = 100 * 1000;
51 int ncentroids = int (4 * sqrt (nb));
61 printf (
"[%.3f s] Generating %ld vectors in %dD for training\n",
62 elapsed() - t0, nt, d);
64 std::vector <float> trainvecs (nt * d);
65 for (
size_t i = 0; i < nt * d; i++) {
66 trainvecs[i] = drand48();
69 printf (
"[%.3f s] Training the index\n",
73 index.train (nt, trainvecs.data());
77 const char *outfilename =
"/tmp/index_trained.faissindex";
78 printf (
"[%.3f s] storing the pre-trained index to %s\n",
79 elapsed() - t0, outfilename);
81 write_index (&index, outfilename);
85 std::vector<float> queries;
88 printf (
"[%.3f s] Building a dataset of %ld vectors to index\n",
91 std::vector <float> database (nb * d);
92 for (
size_t i = 0; i < nb * d; i++) {
93 database[i] = drand48();
96 printf (
"[%.3f s] Adding the vectors to the index\n",
99 index.add (nb, database.data());
101 printf (
"[%.3f s] imbalance factor: %g\n",
102 elapsed() - t0, index.imbalance_factor ());
109 queries.resize (nq * d);
110 for (
int i = i0; i < i1; i++) {
111 for (
int j = 0; j < d; j++) {
112 queries [(i - i0) * d + j] = database [i * d + j];
120 printf (
"[%.3f s] Searching the %d nearest neighbors "
121 "of %ld vectors in the index\n",
122 elapsed() - t0, k, nq);
124 std::vector<faiss::Index::idx_t> nns (k * nq);
125 std::vector<float> dis (k * nq);
127 index.search (nq, queries.data(), k, dis.data(), nns.data());
129 printf (
"[%.3f s] Query results (vector ids, then distances):\n",
132 for (
int i = 0; i < nq; i++) {
133 printf (
"query %2d: ", i);
134 for (
int j = 0; j < k; j++) {
135 printf (
"%7ld ", nns[j + i * k]);
138 for (
int j = 0; j < k; j++) {
139 printf (
"%7g ", dis[j + i * k]);
144 printf (
"note that the nearest neighbor is not at "
145 "distance 0 due to quantization errors\n");