10 #include "../../IndexBinaryFlat.h"
11 #include "../../utils.h"
12 #include "../GpuIndexBinaryFlat.h"
13 #include "../StandardGpuResources.h"
14 #include "../test/TestUtils.h"
15 #include "../utils/DeviceTensor.cuh"
16 #include "../utils/DeviceUtils.h"
17 #include "../utils/HostTensor.cuh"
18 #include "../utils/Timer.h"
19 #include <gflags/gflags.h>
24 #include <cuda_profiler_api.h>
26 DEFINE_int32(k, 3,
"final number of closest results returned");
27 DEFINE_int32(num, 128,
"# of vecs");
28 DEFINE_int32(dim, 128,
"# of dimensions");
29 DEFINE_int32(num_queries, 3,
"number of query vectors");
30 DEFINE_int64(seed, -1,
"specify random seed");
31 DEFINE_int64(pinned_mem, 0,
"pinned memory allocation to use");
32 DEFINE_bool(cpu,
true,
"run the CPU code for timing and comparison");
33 DEFINE_bool(use_unified_mem,
false,
"use Pascal unified memory for the index");
35 using namespace faiss::gpu;
37 int main(
int argc,
char** argv) {
38 gflags::ParseCommandLineFlags(&argc, &argv,
true);
42 auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(
nullptr);
43 printf(
"using seed %ld\n", seed);
45 auto numQueries = FLAGS_num_queries;
47 auto index = std::unique_ptr<faiss::IndexBinaryFlat>(
51 faiss::byte_rand(vecs.data(), vecs.numElements(), seed);
53 index->add(FLAGS_num, vecs.data());
55 printf(
"Database: dim %d num vecs %d\n", FLAGS_dim, FLAGS_num);
56 printf(
"Hamming lookup: %d queries, total k %d\n",
60 printf(
"Copying index to GPU...\n");
64 MemorySpace::Unified : MemorySpace::Device;
71 printf(
"copy done\n");
75 faiss::byte_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
79 cpuDistances({numQueries, FLAGS_k});
81 cpuIndices({numQueries, FLAGS_k});
87 index->search(numQueries,
94 printf(
"CPU time %.3f ms\n", cpuTime);
100 CUDA_VERIFY(cudaProfilerStart());
101 faiss::gpu::synchronizeAllDevices();
103 float gpuTime = 0.0f;
109 gpuIndex.search(cpuQuery.getSize(0),
120 CUDA_VERIFY(cudaProfilerStop());
121 printf(
"GPU time %.3f ms\n", gpuTime);
123 CUDA_VERIFY(cudaDeviceSynchronize());
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
CPU wallclock elapsed timer.