Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
CompareIVFPQGrid.cu
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 
12 #include "../../IndexIVFPQ.h"
13 #include "../../index_io.h"
14 #include "../../utils.h"
15 
16 #include "../GpuIndexIVFPQ.h"
17 #include "IndexWrapper.h"
18 #include "../test/TestUtils.h"
19 #include "../utils/DeviceTensor.cuh"
20 #include "../utils/DeviceUtils.h"
21 #include "../utils/HostTensor.cuh"
22 #include "../utils/Timer.h"
23 #include <gflags/gflags.h>
24 #include <memory>
25 #include <vector>
26 
27 DEFINE_int32(k, 10, "final number of closest results returned");
28 DEFINE_string(in, "/home/jhj/local/ivfpq_index.out", "index file for input");
29 DEFINE_bool(use_precomputed, true, "enable or disable precomputed codes");
30 DEFINE_bool(float16_lookup, false, "use float16 residual distance tables");
31 DEFINE_int32(num_gpus, 1, "number of gpus to use");
32 DEFINE_int32(index, 2, "0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
33 
34 using namespace faiss::gpu;
35 
36 int main(int argc, char** argv) {
37  google::ParseCommandLineFlags(&argc, &argv, true);
38 
39  auto seed = time(nullptr);
40  auto k = FLAGS_k;
41 
42  auto index = std::unique_ptr<faiss::IndexIVFPQ>(
43  dynamic_cast<faiss::IndexIVFPQ*>(faiss::read_index(FLAGS_in.c_str())));
44  FAISS_ASSERT((bool) index);
45 
46  auto dim = index->d;
47  auto codes = index->pq.M;
48  auto bitsPerCode = index->pq.nbits;
49 
50  printf("Database: dim %d num vecs %ld\n", dim, index->ntotal);
51  printf("Coarse centroids: %ld\n", index->quantizer->ntotal);
52  printf("PQ centroids: codes %ld bits per code %ld\n", codes, bitsPerCode);
53  printf("L2 lookup: total k %d, precomputed codes %d\n\n",
54  k, FLAGS_use_precomputed);
55 
56  // Convert to GPU index
57  printf("Copying index to %d GPU(s)...\n", FLAGS_num_gpus);
58 
59  bool precomp = FLAGS_use_precomputed;
60  auto indicesOpt = (faiss::gpu::IndicesOptions) FLAGS_index;
61  auto useFloat16Lookup = FLAGS_float16_lookup;
62 
63  auto initFn = [precomp, indicesOpt, useFloat16Lookup, &index]
64  (faiss::gpu::GpuResources* res, int dev) ->
65  std::unique_ptr<faiss::gpu::GpuIndexIVFPQ> {
66  auto p = std::unique_ptr<faiss::gpu::GpuIndexIVFPQ>(
68  dev,
69  indicesOpt,
70  useFloat16Lookup,
71  index.get()));
72  p->setPrecomputedCodes(precomp);
73 
74  return p;
75  };
76 
77  IndexWrapper<faiss::gpu::GpuIndexIVFPQ> gpuIndex(FLAGS_num_gpus, initFn);
78  printf("copy done\n");
79 
80  auto querySizes = std::vector<int>{1, 4, 16, 64, 256, 1024, 4096, 16384};
81  auto nprobeSizes = std::vector<int>{1, 4, 8, 16, 32, 64, 128, 256};
82 
83  HostTensor<float, 2, true> cpuTimePerVector(
84  {(int) querySizes.size(), (int) nprobeSizes.size()});
85  HostTensor<float, 2, true> gpuTimePerVector(
86  {(int) querySizes.size(), (int) nprobeSizes.size()});
87 
88  printf("GPU relative speedup over CPU (x):\n");
89 
90  for (auto q = 0; q < querySizes.size(); ++q) {
91  auto numQueries = querySizes[q];
92  bool first = true;
93 
94  for (auto p = 0; p < nprobeSizes.size(); ++p) {
95  auto nprobe = nprobeSizes[q];
96 
97  HostTensor<float, 2, true> cpuQuery{numQueries, dim};
98  faiss::float_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
99 
100  HostTensor<faiss::Index::idx_t, 2, true> resultIndices{numQueries, k};
101  HostTensor<float, 2, true> resultDistances{numQueries, k};
102 
103  index->nprobe = nprobe;
104 
105  float cpuTime = 0.0f;
106  {
107  CpuTimer timer;
108  if (!FLAGS_use_precomputed) {
109  index->use_precomputed_table = 0;
110  }
111  index->search(numQueries, cpuQuery.data(),
112  k, resultDistances.data(), resultIndices.data());
113  cpuTime = timer.elapsedMilliseconds();
114  cpuTimePerVector[q][p] = cpuTime / (float) numQueries;
115  }
116 
117  gpuIndex.setNumProbes(nprobe);
118 
120  gpuHostDistances({numQueries, k});
122  gpuHostIndices({numQueries, k});
123 
124  DeviceTensor<float, 2, true> gpuQuery(cpuQuery, 0);
125  DeviceTensor<float, 2, true> gpuDistances({numQueries, k});
126  DeviceTensor<long, 2, true> gpuIndices({numQueries, k});
127  CUDA_VERIFY(cudaDeviceSynchronize());
128 
129  float gpuTime = 0.0f;
130 
131  {
132  CpuTimer timer;
133 
134  gpuIndex.getIndex()->search(cpuQuery.getSize(0),
135  cpuQuery.data(),
136  FLAGS_k,
137  gpuHostDistances.data(),
138  gpuHostIndices.data());
139 
140  CUDA_VERIFY(cudaDeviceSynchronize());
141  gpuTime = timer.elapsedMilliseconds();
142  }
143 
144  gpuTimePerVector[q][p] = gpuTime / (float) numQueries;
145 
146  if (!first) {
147  printf(", ");
148  }
149  first = false;
150 
151  printf("%.2f", cpuTime / gpuTime);
152  }
153 
154  printf("\n");
155  }
156 
157  printf("\n");
158  printf("CPU time per query vector (us):\n");
159 
160  for (int q = 0; q < cpuTimePerVector.getSize(0); ++q) {
161  bool first = true;
162 
163  for (int p = 0; p < cpuTimePerVector.getSize(1); ++p) {
164  if (!first) {
165  printf(", ");
166  }
167  first = false;
168 
169  printf("%.1f", (float) cpuTimePerVector[q][p] * 1000.0f);
170  }
171 
172  printf("\n");
173  }
174 
175  printf("\n");
176  printf("GPU time per query vector (us):\n");
177 
178  for (int q = 0; q < gpuTimePerVector.getSize(0); ++q) {
179  bool first = true;
180 
181  for (int p = 0; p < gpuTimePerVector.getSize(1); ++p) {
182  if (!first) {
183  printf(", ");
184  }
185  first = false;
186 
187  printf("%.1f", (float) gpuTimePerVector[q][p] * 1000.0f);
188  }
189 
190  printf("\n");
191  }
192 
193  // printf("\ncudaMalloc usage %zd\n",
194  // resources.getMemoryManager().getHighWaterCudaMalloc());
195 
196  return 0;
197 }
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
Definition: Timer.cpp:53
Index * read_index(FILE *f, bool try_mmap)
Definition: index_io.cpp:476
CPU wallclock elapsed timer.
Definition: Timer.h:43
IVFPQ index for the GPU.
Definition: GpuIndexIVFPQ.h:25