Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
PerfBinaryFlat.cu
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 
10 #include "../../IndexBinaryFlat.h"
11 #include "../../utils.h"
12 #include "../GpuIndexBinaryFlat.h"
13 #include "../StandardGpuResources.h"
14 #include "../test/TestUtils.h"
15 #include "../utils/DeviceTensor.cuh"
16 #include "../utils/DeviceUtils.h"
17 #include "../utils/HostTensor.cuh"
18 #include "../utils/Timer.h"
19 #include <gflags/gflags.h>
20 #include <map>
21 #include <memory>
22 #include <vector>
23 
24 #include <cuda_profiler_api.h>
25 
26 DEFINE_int32(k, 3, "final number of closest results returned");
27 DEFINE_int32(num, 128, "# of vecs");
28 DEFINE_int32(dim, 128, "# of dimensions");
29 DEFINE_int32(num_queries, 3, "number of query vectors");
30 DEFINE_int64(seed, -1, "specify random seed");
31 DEFINE_int64(pinned_mem, 0, "pinned memory allocation to use");
32 DEFINE_bool(cpu, true, "run the CPU code for timing and comparison");
33 DEFINE_bool(use_unified_mem, false, "use Pascal unified memory for the index");
34 
35 using namespace faiss::gpu;
36 
37 int main(int argc, char** argv) {
38  gflags::ParseCommandLineFlags(&argc, &argv, true);
39 
40  cudaProfilerStop();
41 
42  auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(nullptr);
43  printf("using seed %ld\n", seed);
44 
45  auto numQueries = FLAGS_num_queries;
46 
47  auto index = std::unique_ptr<faiss::IndexBinaryFlat>(
48  new faiss::IndexBinaryFlat(FLAGS_dim));
49 
50  HostTensor<unsigned char, 2, true> vecs({FLAGS_num, FLAGS_dim / 8});
51  faiss::byte_rand(vecs.data(), vecs.numElements(), seed);
52 
53  index->add(FLAGS_num, vecs.data());
54 
55  printf("Database: dim %d num vecs %d\n", FLAGS_dim, FLAGS_num);
56  printf("Hamming lookup: %d queries, total k %d\n",
57  numQueries, FLAGS_k);
58 
59  // Convert to GPU index
60  printf("Copying index to GPU...\n");
61 
63  config.memorySpace = FLAGS_use_unified_mem ?
64  MemorySpace::Unified : MemorySpace::Device;
65 
67 
68  faiss::gpu::GpuIndexBinaryFlat gpuIndex(&res,
69  index.get(),
70  config);
71  printf("copy done\n");
72 
73  // Build query vectors
74  HostTensor<unsigned char, 2, true> cpuQuery({numQueries, FLAGS_dim / 8});
75  faiss::byte_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
76 
77  // Time faiss CPU
79  cpuDistances({numQueries, FLAGS_k});
81  cpuIndices({numQueries, FLAGS_k});
82 
83  if (FLAGS_cpu) {
84  float cpuTime = 0.0f;
85 
86  CpuTimer timer;
87  index->search(numQueries,
88  cpuQuery.data(),
89  FLAGS_k,
90  cpuDistances.data(),
91  cpuIndices.data());
92 
93  cpuTime = timer.elapsedMilliseconds();
94  printf("CPU time %.3f ms\n", cpuTime);
95  }
96 
97  HostTensor<int, 2, true> gpuDistances({numQueries, FLAGS_k});
98  HostTensor<faiss::Index::idx_t, 2, true> gpuIndices({numQueries, FLAGS_k});
99 
100  CUDA_VERIFY(cudaProfilerStart());
101  faiss::gpu::synchronizeAllDevices();
102 
103  float gpuTime = 0.0f;
104 
105  // Time GPU
106  {
107  CpuTimer timer;
108 
109  gpuIndex.search(cpuQuery.getSize(0),
110  cpuQuery.data(),
111  FLAGS_k,
112  gpuDistances.data(),
113  gpuIndices.data());
114 
115  // There is a device -> host copy above, so no need to time
116  // additional synchronization with the GPU
117  gpuTime = timer.elapsedMilliseconds();
118  }
119 
120  CUDA_VERIFY(cudaProfilerStop());
121  printf("GPU time %.3f ms\n", gpuTime);
122 
123  CUDA_VERIFY(cudaDeviceSynchronize());
124 
125  return 0;
126 }
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
Definition: Timer.cpp:51
CPU wallclock elapsed timer.
Definition: Timer.h:41
MemorySpace memorySpace
Definition: GpuIndex.h:31