Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
PerfIVFPQAdd.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 
10 
11 #include <cuda_profiler_api.h>
12 #include "../../IndexFlat.h"
13 #include "../../IndexIVFPQ.h"
14 #include "../GpuIndexIVFPQ.h"
15 #include "../StandardGpuResources.h"
16 #include "../test/TestUtils.h"
17 #include "../utils/DeviceUtils.h"
18 #include "../utils/Timer.h"
19 #include <gflags/gflags.h>
20 #include <map>
21 #include <vector>
22 
23 DEFINE_int32(batches, 10, "number of batches of vectors to add");
24 DEFINE_int32(batch_size, 10000, "number of vectors in each batch");
25 DEFINE_int32(dim, 256, "dimension of vectors");
26 DEFINE_int32(centroids, 4096, "num coarse centroids to use");
27 DEFINE_int32(bytes_per_vec, 32, "bytes per encoded vector");
28 DEFINE_int32(bits_per_code, 8, "bits per PQ code");
29 DEFINE_int32(index, 2, "0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
30 DEFINE_bool(time_gpu, true, "time add to GPU");
31 DEFINE_bool(time_cpu, false, "time add to CPU");
32 DEFINE_bool(per_batch_time, false, "print per-batch times");
33 DEFINE_bool(reserve_memory, false, "whether or not to pre-reserve memory");
34 
35 int main(int argc, char** argv) {
36  gflags::ParseCommandLineFlags(&argc, &argv, true);
37 
38  cudaProfilerStop();
39 
40  int dim = FLAGS_dim;
41  int numCentroids = FLAGS_centroids;
42  int bytesPerVec = FLAGS_bytes_per_vec;
43  int bitsPerCode = FLAGS_bits_per_code;
44 
46 
47  // IndexIVFPQ will complain, but just give us enough to get through this
48  int numTrain = 4 * numCentroids;
49  std::vector<float> trainVecs = faiss::gpu::randVecs(numTrain, dim);
50 
51  faiss::IndexFlatL2 coarseQuantizer(dim);
52  faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, dim, numCentroids,
53  bytesPerVec, bitsPerCode);
54  if (FLAGS_time_cpu) {
55  cpuIndex.train(numTrain, trainVecs.data());
56  }
57 
59  config.device = 0;
60  config.indicesOptions = (faiss::gpu::IndicesOptions) FLAGS_index;
61 
63  &res, dim, numCentroids, bytesPerVec, bitsPerCode,
64  faiss::METRIC_L2, config);
65 
66  if (FLAGS_time_gpu) {
67  gpuIndex.train(numTrain, trainVecs.data());
68  if (FLAGS_reserve_memory) {
69  size_t numVecs = (size_t) FLAGS_batches * (size_t) FLAGS_batch_size;
70  gpuIndex.reserveMemory(numVecs);
71  }
72  }
73 
74  cudaDeviceSynchronize();
75  CUDA_VERIFY(cudaProfilerStart());
76 
77  float totalGpuTime = 0.0f;
78  float totalCpuTime = 0.0f;
79 
80  for (int i = 0; i < FLAGS_batches; ++i) {
81  if (!FLAGS_per_batch_time) {
82  if (i % 10 == 0) {
83  printf("Adding batch %d\n", i + 1);
84  }
85  }
86 
87  auto addVecs = faiss::gpu::randVecs(FLAGS_batch_size, dim);
88 
89  if (FLAGS_time_gpu) {
91  gpuIndex.add(FLAGS_batch_size, addVecs.data());
92  CUDA_VERIFY(cudaDeviceSynchronize());
93  auto time = timer.elapsedMilliseconds();
94 
95  totalGpuTime += time;
96 
97  if (FLAGS_per_batch_time) {
98  printf("Batch %d | GPU time to add %d vecs: %.3f ms (%.5f ms per)\n",
99  i + 1, FLAGS_batch_size, time, time / (float) FLAGS_batch_size);
100  }
101  }
102 
103  if (FLAGS_time_cpu) {
104  faiss::gpu::CpuTimer timer;
105  cpuIndex.add(FLAGS_batch_size, addVecs.data());
106  auto time = timer.elapsedMilliseconds();
107 
108  totalCpuTime += time;
109 
110  if (FLAGS_per_batch_time) {
111  printf("Batch %d | CPU time to add %d vecs: %.3f ms (%.5f ms per)\n",
112  i + 1, FLAGS_batch_size, time, time / (float) FLAGS_batch_size);
113  }
114  }
115  }
116 
117  CUDA_VERIFY(cudaProfilerStop());
118 
119  int total = FLAGS_batch_size * FLAGS_batches;
120 
121  if (FLAGS_time_gpu) {
122  printf("%d dim, %d centroids, %d x %d encoding\n"
123  "GPU time to add %d vectors (%d batches, %d per batch): "
124  "%.3f ms (%.3f us per)\n",
125  dim, numCentroids, bytesPerVec, bitsPerCode,
126  total, FLAGS_batches, FLAGS_batch_size,
127  totalGpuTime, totalGpuTime * 1000.0f / (float) total);
128  }
129 
130  if (FLAGS_time_cpu) {
131  printf("%d dim, %d centroids, %d x %d encoding\n"
132  "CPU time to add %d vectors (%d batches, %d per batch): "
133  "%.3f ms (%.3f us per)\n",
134  dim, numCentroids, bytesPerVec, bitsPerCode,
135  total, FLAGS_batches, FLAGS_batch_size,
136  totalCpuTime, totalCpuTime * 1000.0f / (float) total);
137  }
138 
139  return 0;
140 }
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
Definition: Timer.cpp:51
CPU wallclock elapsed timer.
Definition: Timer.h:41
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:26
IVFPQ index for the GPU.
Definition: GpuIndexIVFPQ.h:39
IndicesOptions indicesOptions
Index storage options for the GPU.
Definition: GpuIndexIVF.h:30