Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
demo_ivfpq_indexing_gpu.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved
10 
11 
12 #include <cmath>
13 #include <cstdio>
14 #include <cstdlib>
15 
16 #include <sys/time.h>
17 
18 
19 #include "../StandardGpuResources.h"
20 #include "../GpuIndexIVFPQ.h"
21 
22 #include "../GpuAutoTune.h"
23 #include "../../index_io.h"
24 
25 double elapsed ()
26 {
27  struct timeval tv;
28  gettimeofday (&tv, NULL);
29  return tv.tv_sec + tv.tv_usec * 1e-6;
30 }
31 
32 
33 int main ()
34 {
35 
36  double t0 = elapsed();
37 
38  // dimension of the vectors to index
39  int d = 128;
40 
41  // size of the database we plan to index
42  size_t nb = 200 * 1000;
43 
44  // make a set of nt training vectors in the unit cube
45  // (could be the database)
46  size_t nt = 100 * 1000;
47 
48  int dev_no = 0;
49  /*
50  printf ("[%.3f s] Begin d=%d nb=%ld nt=%nt dev_no=%d\n",
51  elapsed() - t0, d, nb, nt, dev_no);
52  */
53  // a reasonable number of centroids to index nb vectors
54  int ncentroids = int (4 * sqrt (nb));
55 
57 
58 
59  // the coarse quantizer should not be dealloced before the index
60  // 4 = nb of bytes per code (d must be a multiple of this)
61  // 8 = nb of bits per sub-code (almost always 8)
63  config.device = dev_no;
64 
66  &resources, d, ncentroids, 4, 8, faiss::METRIC_L2, config);
67 
68  { // training
69  printf ("[%.3f s] Generating %ld vectors in %dD for training\n",
70  elapsed() - t0, nt, d);
71 
72  std::vector <float> trainvecs (nt * d);
73  for (size_t i = 0; i < nt * d; i++) {
74  trainvecs[i] = drand48();
75  }
76 
77  printf ("[%.3f s] Training the index\n",
78  elapsed() - t0);
79  index.verbose = true;
80 
81  index.train (nt, trainvecs.data());
82  }
83 
84  { // I/O demo
85  const char *outfilename = "/tmp/index_trained.faissindex";
86  printf ("[%.3f s] storing the pre-trained index to %s\n",
87  elapsed() - t0, outfilename);
88 
89  faiss::Index * cpu_index = faiss::gpu::index_gpu_to_cpu (&index);
90 
91  write_index (cpu_index, outfilename);
92 
93  delete cpu_index;
94  }
95 
96  size_t nq;
97  std::vector<float> queries;
98 
99  { // populating the database
100  printf ("[%.3f s] Building a dataset of %ld vectors to index\n",
101  elapsed() - t0, nb);
102 
103  std::vector <float> database (nb * d);
104  for (size_t i = 0; i < nb * d; i++) {
105  database[i] = drand48();
106  }
107 
108  printf ("[%.3f s] Adding the vectors to the index\n",
109  elapsed() - t0);
110 
111  index.add (nb, database.data());
112 
113  printf ("[%.3f s] done\n", elapsed() - t0);
114 
115  // remember a few elements from the database as queries
116  int i0 = 1234;
117  int i1 = 1243;
118 
119  nq = i1 - i0;
120  queries.resize (nq * d);
121  for (int i = i0; i < i1; i++) {
122  for (int j = 0; j < d; j++) {
123  queries [(i - i0) * d + j] = database [i * d + j];
124  }
125  }
126 
127  }
128 
129  { // searching the database
130  int k = 5;
131  printf ("[%.3f s] Searching the %d nearest neighbors "
132  "of %ld vectors in the index\n",
133  elapsed() - t0, k, nq);
134 
135  std::vector<faiss::Index::idx_t> nns (k * nq);
136  std::vector<float> dis (k * nq);
137 
138  index.search (nq, queries.data(), k, dis.data(), nns.data());
139 
140  printf ("[%.3f s] Query results (vector ids, then distances):\n",
141  elapsed() - t0);
142 
143  for (int i = 0; i < nq; i++) {
144  printf ("query %2d: ", i);
145  for (int j = 0; j < k; j++) {
146  printf ("%7ld ", nns[j + i * k]);
147  }
148  printf ("\n dis: ");
149  for (int j = 0; j < k; j++) {
150  printf ("%7g ", dis[j + i * k]);
151  }
152  printf ("\n");
153  }
154 
155  printf ("note that the nearest neighbor is not at "
156  "distance 0 due to quantization errors\n");
157  }
158 
159  return 0;
160 }
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:27
IVFPQ index for the GPU.
Definition: GpuIndexIVFPQ.h:40