Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
PerfBinaryFlat.cu
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 
9 #include "../../IndexBinaryFlat.h"
10 #include "../../utils.h"
11 #include "../GpuIndexBinaryFlat.h"
12 #include "../StandardGpuResources.h"
13 #include "../test/TestUtils.h"
14 #include "../utils/DeviceTensor.cuh"
15 #include "../utils/DeviceUtils.h"
16 #include "../utils/HostTensor.cuh"
17 #include "../utils/Timer.h"
18 #include <gflags/gflags.h>
19 #include <map>
20 #include <memory>
21 #include <vector>
22 
23 #include <cuda_profiler_api.h>
24 
25 DEFINE_int32(k, 3, "final number of closest results returned");
26 DEFINE_int32(num, 128, "# of vecs");
27 DEFINE_int32(dim, 128, "# of dimensions");
28 DEFINE_int32(num_queries, 3, "number of query vectors");
29 DEFINE_int64(seed, -1, "specify random seed");
30 DEFINE_int64(pinned_mem, 0, "pinned memory allocation to use");
31 DEFINE_bool(cpu, true, "run the CPU code for timing and comparison");
32 DEFINE_bool(use_unified_mem, false, "use Pascal unified memory for the index");
33 
34 using namespace faiss::gpu;
35 
36 int main(int argc, char** argv) {
37  gflags::ParseCommandLineFlags(&argc, &argv, true);
38 
39  cudaProfilerStop();
40 
41  auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(nullptr);
42  printf("using seed %ld\n", seed);
43 
44  auto numQueries = FLAGS_num_queries;
45 
46  auto index = std::unique_ptr<faiss::IndexBinaryFlat>(
47  new faiss::IndexBinaryFlat(FLAGS_dim));
48 
49  HostTensor<unsigned char, 2, true> vecs({FLAGS_num, FLAGS_dim / 8});
50  faiss::byte_rand(vecs.data(), vecs.numElements(), seed);
51 
52  index->add(FLAGS_num, vecs.data());
53 
54  printf("Database: dim %d num vecs %d\n", FLAGS_dim, FLAGS_num);
55  printf("Hamming lookup: %d queries, total k %d\n",
56  numQueries, FLAGS_k);
57 
58  // Convert to GPU index
59  printf("Copying index to GPU...\n");
60 
62  config.memorySpace = FLAGS_use_unified_mem ?
63  MemorySpace::Unified : MemorySpace::Device;
64 
66 
67  faiss::gpu::GpuIndexBinaryFlat gpuIndex(&res,
68  index.get(),
69  config);
70  printf("copy done\n");
71 
72  // Build query vectors
73  HostTensor<unsigned char, 2, true> cpuQuery({numQueries, FLAGS_dim / 8});
74  faiss::byte_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
75 
76  // Time faiss CPU
78  cpuDistances({numQueries, FLAGS_k});
80  cpuIndices({numQueries, FLAGS_k});
81 
82  if (FLAGS_cpu) {
83  float cpuTime = 0.0f;
84 
85  CpuTimer timer;
86  index->search(numQueries,
87  cpuQuery.data(),
88  FLAGS_k,
89  cpuDistances.data(),
90  cpuIndices.data());
91 
92  cpuTime = timer.elapsedMilliseconds();
93  printf("CPU time %.3f ms\n", cpuTime);
94  }
95 
96  HostTensor<int, 2, true> gpuDistances({numQueries, FLAGS_k});
97  HostTensor<faiss::Index::idx_t, 2, true> gpuIndices({numQueries, FLAGS_k});
98 
99  CUDA_VERIFY(cudaProfilerStart());
100  faiss::gpu::synchronizeAllDevices();
101 
102  float gpuTime = 0.0f;
103 
104  // Time GPU
105  {
106  CpuTimer timer;
107 
108  gpuIndex.search(cpuQuery.getSize(0),
109  cpuQuery.data(),
110  FLAGS_k,
111  gpuDistances.data(),
112  gpuIndices.data());
113 
114  // There is a device -> host copy above, so no need to time
115  // additional synchronization with the GPU
116  gpuTime = timer.elapsedMilliseconds();
117  }
118 
119  CUDA_VERIFY(cudaProfilerStop());
120  printf("GPU time %.3f ms\n", gpuTime);
121 
122  CUDA_VERIFY(cudaDeviceSynchronize());
123 
124  return 0;
125 }
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
Definition: Timer.cpp:50
CPU wallclock elapsed timer.
Definition: Timer.h:40
MemorySpace memorySpace
Definition: GpuIndex.h:30