Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
PerfIVFPQAdd.cpp
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 
12 
13 #include <cuda_profiler_api.h>
14 #include "../../IndexFlat.h"
15 #include "../../IndexIVFPQ.h"
16 #include "../GpuIndexIVFPQ.h"
17 #include "../StandardGpuResources.h"
18 #include "../test/TestUtils.h"
19 #include "../utils/DeviceUtils.h"
20 #include "../utils/Timer.h"
21 #include <gflags/gflags.h>
22 #include <map>
23 #include <vector>
24 
25 DEFINE_int32(batches, 10, "number of batches of vectors to add");
26 DEFINE_int32(batch_size, 10000, "number of vectors in each batch");
27 DEFINE_int32(dim, 256, "dimension of vectors");
28 DEFINE_int32(centroids, 4096, "num coarse centroids to use");
29 DEFINE_int32(bytes_per_vec, 32, "bytes per encoded vector");
30 DEFINE_int32(bits_per_code, 8, "bits per PQ code");
31 DEFINE_int32(index, 2, "0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
32 DEFINE_bool(time_gpu, true, "time add to GPU");
33 DEFINE_bool(time_cpu, false, "time add to CPU");
34 DEFINE_bool(per_batch_time, false, "print per-batch times");
35 DEFINE_bool(reserve_memory, false, "whether or not to pre-reserve memory");
36 
37 int main(int argc, char** argv) {
38  gflags::ParseCommandLineFlags(&argc, &argv, true);
39 
40  cudaProfilerStop();
41 
42  int dim = FLAGS_dim;
43  int numCentroids = FLAGS_centroids;
44  int bytesPerVec = FLAGS_bytes_per_vec;
45  int bitsPerCode = FLAGS_bits_per_code;
46 
48 
49  // IndexIVFPQ will complain, but just give us enough to get through this
50  int numTrain = 4 * numCentroids;
51  std::vector<float> trainVecs = faiss::gpu::randVecs(numTrain, dim);
52 
53  faiss::IndexFlatL2 coarseQuantizer(dim);
54  faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, dim, numCentroids,
55  bytesPerVec, bitsPerCode);
56  if (FLAGS_time_cpu) {
57  cpuIndex.train(numTrain, trainVecs.data());
58  }
59 
61  &res, 0,
62  dim, numCentroids, bytesPerVec, bitsPerCode,
63  false,
64  (faiss::gpu::IndicesOptions) FLAGS_index,
65  false,
66  faiss::METRIC_L2);
67 
68  if (FLAGS_time_gpu) {
69  gpuIndex.train(numTrain, trainVecs.data());
70  if (FLAGS_reserve_memory) {
71  size_t numVecs = (size_t) FLAGS_batches * (size_t) FLAGS_batch_size;
72  gpuIndex.reserveMemory(numVecs);
73  }
74  }
75 
76  cudaDeviceSynchronize();
77  CUDA_VERIFY(cudaProfilerStart());
78 
79  float totalGpuTime = 0.0f;
80  float totalCpuTime = 0.0f;
81 
82  for (int i = 0; i < FLAGS_batches; ++i) {
83  if (!FLAGS_per_batch_time) {
84  if (i % 10 == 0) {
85  printf("Adding batch %d\n", i + 1);
86  }
87  }
88 
89  auto addVecs = faiss::gpu::randVecs(FLAGS_batch_size, dim);
90 
91  if (FLAGS_time_gpu) {
93  gpuIndex.add(FLAGS_batch_size, addVecs.data());
94  CUDA_VERIFY(cudaDeviceSynchronize());
95  auto time = timer.elapsedMilliseconds();
96 
97  totalGpuTime += time;
98 
99  if (FLAGS_per_batch_time) {
100  printf("Batch %d | GPU time to add %d vecs: %.3f ms (%.5f ms per)\n",
101  i + 1, FLAGS_batch_size, time, time / (float) FLAGS_batch_size);
102  }
103  }
104 
105  if (FLAGS_time_cpu) {
106  faiss::gpu::CpuTimer timer;
107  cpuIndex.add(FLAGS_batch_size, addVecs.data());
108  auto time = timer.elapsedMilliseconds();
109 
110  totalCpuTime += time;
111 
112  if (FLAGS_per_batch_time) {
113  printf("Batch %d | CPU time to add %d vecs: %.3f ms (%.5f ms per)\n",
114  i + 1, FLAGS_batch_size, time, time / (float) FLAGS_batch_size);
115  }
116  }
117  }
118 
119  CUDA_VERIFY(cudaProfilerStop());
120 
121  int total = FLAGS_batch_size * FLAGS_batches;
122 
123  if (FLAGS_time_gpu) {
124  printf("%d dim, %d centroids, %d x %d encoding\n"
125  "GPU time to add %d vectors (%d batches, %d per batch): "
126  "%.3f ms (%.3f us per)\n",
127  dim, numCentroids, bytesPerVec, bitsPerCode,
128  total, FLAGS_batches, FLAGS_batch_size,
129  totalGpuTime, totalGpuTime * 1000.0f / (float) total);
130  }
131 
132  if (FLAGS_time_cpu) {
133  printf("%d dim, %d centroids, %d x %d encoding\n"
134  "CPU time to add %d vectors (%d batches, %d per batch): "
135  "%.3f ms (%.3f us per)\n",
136  dim, numCentroids, bytesPerVec, bitsPerCode,
137  total, FLAGS_batches, FLAGS_batch_size,
138  totalCpuTime, totalCpuTime * 1000.0f / (float) total);
139  }
140 
141  return 0;
142 }
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
Definition: Timer.cpp:53
CPU wallclock elapsed timer.
Definition: Timer.h:43
IVFPQ index for the GPU.
Definition: GpuIndexIVFPQ.h:25