Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
PerfIVFPQ.cu
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 
9 #include "../../IndexIVFPQ.h"
10 #include "../../index_io.h"
11 #include "../../utils.h"
12 
13 #include "../GpuIndexIVFPQ.h"
14 #include "IndexWrapper.h"
15 #include "../test/TestUtils.h"
16 #include "../utils/DeviceTensor.cuh"
17 #include "../utils/DeviceUtils.h"
18 #include "../utils/HostTensor.cuh"
19 #include "../utils/Timer.h"
20 
21 #include <cuda_profiler_api.h>
22 #include <gflags/gflags.h>
23 #include <map>
24 #include <memory>
25 #include <vector>
26 
27 DEFINE_int32(nprobe, 5, "number of coarse centroids to probe");
28 DEFINE_int32(k, 3, "final number of closest results returned");
29 DEFINE_int32(num_queries, 3, "number of query vectors");
30 DEFINE_string(in, "/home/jhj/local/index.out", "index file for input");
31 DEFINE_bool(diff, true, "show exact distance + index output discrepancies");
32 DEFINE_bool(use_precomputed, true, "enable or disable precomputed codes");
33 DEFINE_bool(float16_lookup, false, "use float16 residual distance tables");
34 DEFINE_int64(seed, -1, "specify random seed");
35 DEFINE_int32(num_gpus, 1, "number of gpus to use");
36 DEFINE_int32(index, 2, "0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
37 
38 using namespace faiss::gpu;
39 
40 int main(int argc, char** argv) {
41  gflags::ParseCommandLineFlags(&argc, &argv, true);
42 
43  CUDA_VERIFY(cudaProfilerStop());
44 
45  auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(nullptr);
46  printf("using seed %ld\n", seed);
47 
48  auto numQueries = FLAGS_num_queries;
49 
50  auto index = std::unique_ptr<faiss::IndexIVFPQ>(
51  dynamic_cast<faiss::IndexIVFPQ*>(faiss::read_index(FLAGS_in.c_str())));
52  FAISS_ASSERT((bool) index);
53  index->nprobe = FLAGS_nprobe;
54 
55  if (!FLAGS_use_precomputed) {
56  index->use_precomputed_table = 0;
57  }
58 
59  auto dim = index->d;
60  auto codes = index->pq.M;
61  auto bitsPerCode = index->pq.nbits;
62 
63  printf("Database: dim %d num vecs %ld\n", dim, index->ntotal);
64  printf("Coarse centroids: %ld\n", index->quantizer->ntotal);
65  printf("PQ centroids: codes %ld bits per code %ld\n", codes, bitsPerCode);
66  printf("L2 lookup: %d queries, nprobe %d, total k %d, "
67  "precomputed codes %d\n\n",
68  numQueries, FLAGS_nprobe, FLAGS_k,
69  FLAGS_use_precomputed);
70 
71  // Convert to GPU index
72  printf("Copying index to %d GPU(s)...\n", FLAGS_num_gpus);
73 
74  auto precomp = FLAGS_use_precomputed;
75  auto indicesOpt = (faiss::gpu::IndicesOptions) FLAGS_index;
76  auto useFloat16Lookup = FLAGS_float16_lookup;
77 
78  auto initFn = [precomp, indicesOpt, useFloat16Lookup, &index]
79  (faiss::gpu::GpuResources* res, int dev) ->
80  std::unique_ptr<faiss::gpu::GpuIndexIVFPQ> {
81 
83  config.device = dev;
84  config.usePrecomputedTables = precomp;
85  config.indicesOptions = indicesOpt;
86  config.useFloat16LookupTables = useFloat16Lookup;
87 
88  auto p = std::unique_ptr<faiss::gpu::GpuIndexIVFPQ>(
89  new faiss::gpu::GpuIndexIVFPQ(res, index.get(), config));
90 
91  return p;
92  };
93 
94  IndexWrapper<faiss::gpu::GpuIndexIVFPQ> gpuIndex(FLAGS_num_gpus, initFn);
95  gpuIndex.setNumProbes(FLAGS_nprobe);
96  printf("copy done\n");
97 
98  // Build query vectors
99  HostTensor<float, 2, true> cpuQuery({numQueries, dim});
100  faiss::float_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
101 
102  // Time faiss CPU
103  HostTensor<float, 2, true> cpuDistances({numQueries, FLAGS_k});
104  HostTensor<faiss::Index::idx_t, 2, true> cpuIndices({numQueries, FLAGS_k});
105 
106  float cpuTime = 0.0f;
107 
108  {
109  CpuTimer timer;
110  index->search(numQueries,
111  cpuQuery.data(),
112  FLAGS_k,
113  cpuDistances.data(),
114  cpuIndices.data());
115 
116  cpuTime = timer.elapsedMilliseconds();
117  }
118 
119  printf("CPU time %.3f ms\n", cpuTime);
120 
121  HostTensor<float, 2, true> gpuDistances({numQueries, FLAGS_k});
122  HostTensor<faiss::Index::idx_t, 2, true> gpuIndices({numQueries, FLAGS_k});
123 
124  CUDA_VERIFY(cudaProfilerStart());
125  faiss::gpu::synchronizeAllDevices();
126 
127  float gpuTime = 0.0f;
128 
129  // Time GPU
130  {
131  CpuTimer timer;
132 
133  gpuIndex.getIndex()->search(cpuQuery.getSize(0),
134  cpuQuery.data(),
135  FLAGS_k,
136  gpuDistances.data(),
137  gpuIndices.data());
138 
139  // There is a device -> host copy above, so no need to time
140  // additional synchronization with the GPU
141  gpuTime = timer.elapsedMilliseconds();
142  }
143 
144  CUDA_VERIFY(cudaProfilerStop());
145  printf("GPU time %.3f ms\n", gpuTime);
146 
147  compareLists(cpuDistances.data(), cpuIndices.data(),
148  gpuDistances.data(), gpuIndices.data(),
149  numQueries, FLAGS_k,
150  "", true, FLAGS_diff, false);
151 
152  CUDA_VERIFY(cudaDeviceSynchronize());
153  // printf("\ncudaMalloc usage %zd\n",
154  // resources.getMemoryManager().getHighWaterCudaMalloc());
155 
156  return 0;
157 }
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
Definition: Timer.cpp:50
CPU wallclock elapsed timer.
Definition: Timer.h:40
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:25
IVFPQ index for the GPU.
Definition: GpuIndexIVFPQ.h:38
IndicesOptions indicesOptions
Index storage options for the GPU.
Definition: GpuIndexIVF.h:29