Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
CompareIVFPQGrid.cu
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 
10 #include "../../IndexIVFPQ.h"
11 #include "../../index_io.h"
12 #include "../../utils.h"
13 
14 #include "../GpuIndexIVFPQ.h"
15 #include "IndexWrapper.h"
16 #include "../test/TestUtils.h"
17 #include "../utils/DeviceTensor.cuh"
18 #include "../utils/DeviceUtils.h"
19 #include "../utils/HostTensor.cuh"
20 #include "../utils/Timer.h"
21 #include <gflags/gflags.h>
22 #include <memory>
23 #include <vector>
24 
25 DEFINE_int32(k, 10, "final number of closest results returned");
26 DEFINE_string(in, "/home/jhj/local/ivfpq_index.out", "index file for input");
27 DEFINE_bool(use_precomputed, true, "enable or disable precomputed codes");
28 DEFINE_bool(float16_lookup, false, "use float16 residual distance tables");
29 DEFINE_int32(num_gpus, 1, "number of gpus to use");
30 DEFINE_int32(index, 2, "0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
31 
32 using namespace faiss::gpu;
33 
34 int main(int argc, char** argv) {
35  gflags::ParseCommandLineFlags(&argc, &argv, true);
36 
37  auto seed = time(nullptr);
38  auto k = FLAGS_k;
39 
40  auto index = std::unique_ptr<faiss::IndexIVFPQ>(
41  dynamic_cast<faiss::IndexIVFPQ*>(faiss::read_index(FLAGS_in.c_str())));
42  FAISS_ASSERT((bool) index);
43 
44  auto dim = index->d;
45  auto codes = index->pq.M;
46  auto bitsPerCode = index->pq.nbits;
47 
48  printf("Database: dim %d num vecs %ld\n", dim, index->ntotal);
49  printf("Coarse centroids: %ld\n", index->quantizer->ntotal);
50  printf("PQ centroids: codes %ld bits per code %ld\n", codes, bitsPerCode);
51  printf("L2 lookup: total k %d, precomputed codes %d\n\n",
52  k, FLAGS_use_precomputed);
53 
54  // Convert to GPU index
55  printf("Copying index to %d GPU(s)...\n", FLAGS_num_gpus);
56 
57  bool precomp = FLAGS_use_precomputed;
58  auto indicesOpt = (faiss::gpu::IndicesOptions) FLAGS_index;
59  auto useFloat16Lookup = FLAGS_float16_lookup;
60 
61  auto initFn = [precomp, indicesOpt, useFloat16Lookup, &index]
62  (faiss::gpu::GpuResources* res, int dev) ->
63  std::unique_ptr<faiss::gpu::GpuIndexIVFPQ> {
64 
66  config.device = dev;
67  config.usePrecomputedTables = precomp;
68  config.indicesOptions = indicesOpt;
69  config.useFloat16LookupTables = useFloat16Lookup;
70 
71  auto p = std::unique_ptr<faiss::gpu::GpuIndexIVFPQ>(
72  new faiss::gpu::GpuIndexIVFPQ(res, index.get(), config));
73 
74  return p;
75  };
76 
77  IndexWrapper<faiss::gpu::GpuIndexIVFPQ> gpuIndex(FLAGS_num_gpus, initFn);
78  printf("copy done\n");
79 
80  auto querySizes = std::vector<int>{1, 4, 16, 64, 256, 1024, 4096, 16384};
81  auto nprobeSizes = std::vector<int>{1, 4, 8, 16, 32, 64, 128, 256};
82 
83  HostTensor<float, 2, true> cpuTimePerVector(
84  {(int) querySizes.size(), (int) nprobeSizes.size()});
85  HostTensor<float, 2, true> gpuTimePerVector(
86  {(int) querySizes.size(), (int) nprobeSizes.size()});
87 
88  printf("GPU relative speedup over CPU (x):\n");
89 
90  for (auto q = 0; q < querySizes.size(); ++q) {
91  auto numQueries = querySizes[q];
92  bool first = true;
93 
94  for (auto p = 0; p < nprobeSizes.size(); ++p) {
95  auto nprobe = nprobeSizes[q];
96 
97  HostTensor<float, 2, true> cpuQuery{numQueries, dim};
98  faiss::float_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
99 
100  HostTensor<faiss::Index::idx_t, 2, true> resultIndices{numQueries, k};
101  HostTensor<float, 2, true> resultDistances{numQueries, k};
102 
103  index->nprobe = nprobe;
104 
105  float cpuTime = 0.0f;
106  {
107  CpuTimer timer;
108  if (!FLAGS_use_precomputed) {
109  index->use_precomputed_table = 0;
110  }
111  index->search(numQueries, cpuQuery.data(),
112  k, resultDistances.data(), resultIndices.data());
113  cpuTime = timer.elapsedMilliseconds();
114  cpuTimePerVector[q][p] = cpuTime / (float) numQueries;
115  }
116 
117  gpuIndex.setNumProbes(nprobe);
118 
120  gpuHostDistances({numQueries, k});
122  gpuHostIndices({numQueries, k});
123 
124  DeviceTensor<float, 2, true> gpuQuery(cpuQuery, 0);
125  DeviceTensor<float, 2, true> gpuDistances({numQueries, k});
126  DeviceTensor<long, 2, true> gpuIndices({numQueries, k});
127  CUDA_VERIFY(cudaDeviceSynchronize());
128 
129  float gpuTime = 0.0f;
130 
131  {
132  CpuTimer timer;
133 
134  gpuIndex.getIndex()->search(cpuQuery.getSize(0),
135  cpuQuery.data(),
136  FLAGS_k,
137  gpuHostDistances.data(),
138  gpuHostIndices.data());
139 
140  CUDA_VERIFY(cudaDeviceSynchronize());
141  gpuTime = timer.elapsedMilliseconds();
142  }
143 
144  gpuTimePerVector[q][p] = gpuTime / (float) numQueries;
145 
146  if (!first) {
147  printf(", ");
148  }
149  first = false;
150 
151  printf("%.2f", cpuTime / gpuTime);
152  }
153 
154  printf("\n");
155  }
156 
157  printf("\n");
158  printf("CPU time per query vector (us):\n");
159 
160  for (int q = 0; q < cpuTimePerVector.getSize(0); ++q) {
161  bool first = true;
162 
163  for (int p = 0; p < cpuTimePerVector.getSize(1); ++p) {
164  if (!first) {
165  printf(", ");
166  }
167  first = false;
168 
169  printf("%.1f", (float) cpuTimePerVector[q][p] * 1000.0f);
170  }
171 
172  printf("\n");
173  }
174 
175  printf("\n");
176  printf("GPU time per query vector (us):\n");
177 
178  for (int q = 0; q < gpuTimePerVector.getSize(0); ++q) {
179  bool first = true;
180 
181  for (int p = 0; p < gpuTimePerVector.getSize(1); ++p) {
182  if (!first) {
183  printf(", ");
184  }
185  first = false;
186 
187  printf("%.1f", (float) gpuTimePerVector[q][p] * 1000.0f);
188  }
189 
190  printf("\n");
191  }
192 
193  return 0;
194 }
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
Definition: Timer.cpp:51
CPU wallclock elapsed timer.
Definition: Timer.h:41
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:26
IVFPQ index for the GPU.
Definition: GpuIndexIVFPQ.h:39
IndicesOptions indicesOptions
Index storage options for the GPU.
Definition: GpuIndexIVF.h:30