20 #include <faiss/IndexPQ.h>
21 #include <faiss/IndexIVFPQ.h>
22 #include <faiss/IndexFlat.h>
23 #include <faiss/index_io.h>
28 gettimeofday (&tv,
nullptr);
29 return tv.tv_sec + tv.tv_usec * 1e-6;
35 double t0 = elapsed();
41 size_t nb = 1000 * 1000;
42 size_t add_bs = 10000;
46 size_t nt = 100 * 1000;
72 size_t nbits_subq = 9;
73 size_t ncentroids = 1 << (nhash * nbits_subq);
74 int bytes_per_code = 16;
78 printf (
"IMI (%ld,%ld): %ld virtual centroids (target: %ld base vectors)",
79 nhash, nbits_subq, ncentroids, nb);
86 index.quantizer_trains_alone =
true;
100 printf (
"[%.3f s] Generating %ld vectors in %dD for training\n",
101 elapsed() - t0, nt, d);
103 std::vector <float> trainvecs (nt * d);
104 for (
size_t i = 0; i < nt; i++) {
105 for (
size_t j = 0; j < d; j++) {
106 trainvecs[i * d + j] = drand48();
110 printf (
"[%.3f s] Training the index\n", elapsed() - t0);
111 index.verbose =
true;
112 index.train (nt, trainvecs.data());
117 faiss::write_index(&index,
"/tmp/trained_index.faissindex");
120 std::vector<float> queries;
123 printf (
"[%.3f s] Building a dataset of %ld vectors to index\n",
126 std::vector <float> database (nb * d);
127 std::vector <long> ids (nb);
128 for (
size_t i = 0; i < nb; i++) {
129 for (
size_t j = 0; j < d; j++) {
130 database[i * d + j] = drand48();
132 ids[i] = 8760000000L + i;
135 printf (
"[%.3f s] Adding the vectors to the index\n", elapsed() - t0);
137 for (
size_t begin = 0; begin < nb; begin += add_bs) {
138 size_t end = std::min (begin + add_bs, nb);
139 index.add_with_ids (end - begin,
140 database.data() + d * begin,
149 queries.resize (nq * d);
150 for (
int i = i0; i < i1; i++) {
151 for (
int j = 0; j < d; j++) {
152 queries [(i - i0) * d + j] = database [i * d + j];
173 faiss::write_index(&index,
"/tmp/populated_index.faissindex");
177 printf (
"[%.3f s] Searching the %d nearest neighbors "
178 "of %ld vectors in the index\n",
179 elapsed() - t0, k, nq);
181 std::vector<faiss::Index::idx_t> nns (k * nq);
182 std::vector<float> dis (k * nq);
184 index.search (nq, queries.data(), k, dis.data(), nns.data());
186 printf (
"[%.3f s] Query results (vector ids, then distances):\n",
189 for (
int i = 0; i < nq; i++) {
190 printf (
"query %2d: ", i);
191 for (
int j = 0; j < k; j++) {
192 printf (
"%7ld ", nns[j + i * k]);
195 for (
int j = 0; j < k; j++) {
196 printf (
"%7g ", dis[j + i * k]);
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.