Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
CompareIVFPQGrid.cu
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the CC-by-NC license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #include "../../IndexIVFPQ.h"
12 #include "../../index_io.h"
13 #include "../../utils.h"
14 
15 #include "../GpuIndexIVFPQ.h"
16 #include "IndexWrapper.h"
17 #include "../test/TestUtils.h"
18 #include "../utils/DeviceTensor.cuh"
19 #include "../utils/DeviceUtils.h"
20 #include "../utils/HostTensor.cuh"
21 #include "../utils/Timer.h"
22 #include <gflags/gflags.h>
23 #include <memory>
24 #include <vector>
25 
26 DEFINE_int32(k, 10, "final number of closest results returned");
27 DEFINE_string(in, "/home/jhj/local/ivfpq_index.out", "index file for input");
28 DEFINE_bool(use_precomputed, true, "enable or disable precomputed codes");
29 DEFINE_bool(float16_lookup, false, "use float16 residual distance tables");
30 DEFINE_int32(num_gpus, 1, "number of gpus to use");
31 DEFINE_int32(index, 2, "0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
32 
33 using namespace faiss::gpu;
34 
35 int main(int argc, char** argv) {
36  gflags::ParseCommandLineFlags(&argc, &argv, true);
37 
38  auto seed = time(nullptr);
39  auto k = FLAGS_k;
40 
41  auto index = std::unique_ptr<faiss::IndexIVFPQ>(
42  dynamic_cast<faiss::IndexIVFPQ*>(faiss::read_index(FLAGS_in.c_str())));
43  FAISS_ASSERT((bool) index);
44 
45  auto dim = index->d;
46  auto codes = index->pq.M;
47  auto bitsPerCode = index->pq.nbits;
48 
49  printf("Database: dim %d num vecs %ld\n", dim, index->ntotal);
50  printf("Coarse centroids: %ld\n", index->quantizer->ntotal);
51  printf("PQ centroids: codes %ld bits per code %ld\n", codes, bitsPerCode);
52  printf("L2 lookup: total k %d, precomputed codes %d\n\n",
53  k, FLAGS_use_precomputed);
54 
55  // Convert to GPU index
56  printf("Copying index to %d GPU(s)...\n", FLAGS_num_gpus);
57 
58  bool precomp = FLAGS_use_precomputed;
59  auto indicesOpt = (faiss::gpu::IndicesOptions) FLAGS_index;
60  auto useFloat16Lookup = FLAGS_float16_lookup;
61 
62  auto initFn = [precomp, indicesOpt, useFloat16Lookup, &index]
63  (faiss::gpu::GpuResources* res, int dev) ->
64  std::unique_ptr<faiss::gpu::GpuIndexIVFPQ> {
65 
67  config.device = dev;
68  config.usePrecomputedTables = precomp;
69  config.indicesOptions = indicesOpt;
70  config.useFloat16LookupTables = useFloat16Lookup;
71 
72  auto p = std::unique_ptr<faiss::gpu::GpuIndexIVFPQ>(
73  new faiss::gpu::GpuIndexIVFPQ(res, index.get(), config));
74 
75  return p;
76  };
77 
78  IndexWrapper<faiss::gpu::GpuIndexIVFPQ> gpuIndex(FLAGS_num_gpus, initFn);
79  printf("copy done\n");
80 
81  auto querySizes = std::vector<int>{1, 4, 16, 64, 256, 1024, 4096, 16384};
82  auto nprobeSizes = std::vector<int>{1, 4, 8, 16, 32, 64, 128, 256};
83 
84  HostTensor<float, 2, true> cpuTimePerVector(
85  {(int) querySizes.size(), (int) nprobeSizes.size()});
86  HostTensor<float, 2, true> gpuTimePerVector(
87  {(int) querySizes.size(), (int) nprobeSizes.size()});
88 
89  printf("GPU relative speedup over CPU (x):\n");
90 
91  for (auto q = 0; q < querySizes.size(); ++q) {
92  auto numQueries = querySizes[q];
93  bool first = true;
94 
95  for (auto p = 0; p < nprobeSizes.size(); ++p) {
96  auto nprobe = nprobeSizes[q];
97 
98  HostTensor<float, 2, true> cpuQuery{numQueries, dim};
99  faiss::float_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
100 
101  HostTensor<faiss::Index::idx_t, 2, true> resultIndices{numQueries, k};
102  HostTensor<float, 2, true> resultDistances{numQueries, k};
103 
104  index->nprobe = nprobe;
105 
106  float cpuTime = 0.0f;
107  {
108  CpuTimer timer;
109  if (!FLAGS_use_precomputed) {
110  index->use_precomputed_table = 0;
111  }
112  index->search(numQueries, cpuQuery.data(),
113  k, resultDistances.data(), resultIndices.data());
114  cpuTime = timer.elapsedMilliseconds();
115  cpuTimePerVector[q][p] = cpuTime / (float) numQueries;
116  }
117 
118  gpuIndex.setNumProbes(nprobe);
119 
121  gpuHostDistances({numQueries, k});
123  gpuHostIndices({numQueries, k});
124 
125  DeviceTensor<float, 2, true> gpuQuery(cpuQuery, 0);
126  DeviceTensor<float, 2, true> gpuDistances({numQueries, k});
127  DeviceTensor<long, 2, true> gpuIndices({numQueries, k});
128  CUDA_VERIFY(cudaDeviceSynchronize());
129 
130  float gpuTime = 0.0f;
131 
132  {
133  CpuTimer timer;
134 
135  gpuIndex.getIndex()->search(cpuQuery.getSize(0),
136  cpuQuery.data(),
137  FLAGS_k,
138  gpuHostDistances.data(),
139  gpuHostIndices.data());
140 
141  CUDA_VERIFY(cudaDeviceSynchronize());
142  gpuTime = timer.elapsedMilliseconds();
143  }
144 
145  gpuTimePerVector[q][p] = gpuTime / (float) numQueries;
146 
147  if (!first) {
148  printf(", ");
149  }
150  first = false;
151 
152  printf("%.2f", cpuTime / gpuTime);
153  }
154 
155  printf("\n");
156  }
157 
158  printf("\n");
159  printf("CPU time per query vector (us):\n");
160 
161  for (int q = 0; q < cpuTimePerVector.getSize(0); ++q) {
162  bool first = true;
163 
164  for (int p = 0; p < cpuTimePerVector.getSize(1); ++p) {
165  if (!first) {
166  printf(", ");
167  }
168  first = false;
169 
170  printf("%.1f", (float) cpuTimePerVector[q][p] * 1000.0f);
171  }
172 
173  printf("\n");
174  }
175 
176  printf("\n");
177  printf("GPU time per query vector (us):\n");
178 
179  for (int q = 0; q < gpuTimePerVector.getSize(0); ++q) {
180  bool first = true;
181 
182  for (int p = 0; p < gpuTimePerVector.getSize(1); ++p) {
183  if (!first) {
184  printf(", ");
185  }
186  first = false;
187 
188  printf("%.1f", (float) gpuTimePerVector[q][p] * 1000.0f);
189  }
190 
191  printf("\n");
192  }
193 
194  return 0;
195 }
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
Definition: Timer.cpp:52
Index * read_index(FILE *f, bool try_mmap)
Definition: index_io.cpp:517
CPU wallclock elapsed timer.
Definition: Timer.h:42
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:27
IVFPQ index for the GPU.
Definition: GpuIndexIVFPQ.h:40
IndicesOptions indicesOptions
Index storage options for the GPU.
Definition: GpuIndexIVF.h:31