12 #include "../../IndexFlat.h"
13 #include "../../utils.h"
14 #include "../GpuIndexFlat.h"
15 #include "IndexWrapper.h"
16 #include "../test/TestUtils.h"
17 #include "../utils/DeviceTensor.cuh"
18 #include "../utils/DeviceUtils.h"
19 #include "../utils/HostTensor.cuh"
20 #include "../utils/Timer.h"
21 #include <gflags/gflags.h>
26 #include <cuda_profiler_api.h>
28 DEFINE_int32(k, 3,
"final number of closest results returned");
29 DEFINE_int32(num, 128,
"# of vecs");
30 DEFINE_int32(dim, 128,
"# of dimensions");
31 DEFINE_int32(num_queries, 3,
"number of query vectors");
32 DEFINE_bool(diff,
true,
"show exact distance + index output discrepancies");
33 DEFINE_bool(use_float16,
false,
"use encodings in float16 instead of float32");
34 DEFINE_bool(transposed,
false,
"store vectors transposed");
35 DEFINE_int64(seed, -1,
"specify random seed");
36 DEFINE_int32(num_gpus, 1,
"number of gpus to use");
37 DEFINE_int64(pinned_mem, 0,
"pinned memory allocation to use");
39 using namespace faiss::gpu;
41 int main(
int argc,
char** argv) {
42 gflags::ParseCommandLineFlags(&argc, &argv,
true);
46 auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(
nullptr);
47 printf(
"using seed %ld\n", seed);
49 auto numQueries = FLAGS_num_queries;
51 auto index = std::unique_ptr<faiss::IndexFlatL2>(
55 faiss::float_rand(vecs.data(), vecs.numElements(), seed);
57 index->add(FLAGS_num, vecs.data());
59 printf(
"Database: dim %d num vecs %d\n", FLAGS_dim, FLAGS_num);
60 printf(
"L2 lookup: %d queries, total k %d\n",
62 printf(
"float16 encoding %s\n", FLAGS_use_float16 ?
"enabled" :
"disabled");
63 printf(
"transposed storage %s\n", FLAGS_transposed ?
"enabled" :
"disabled");
66 printf(
"Copying index to %d GPU(s)...\n", FLAGS_num_gpus);
69 std::unique_ptr<faiss::gpu::GpuIndexFlatL2> {
75 config.useFloat16 = FLAGS_use_float16;
76 config.storeTransposed = FLAGS_transposed;
78 auto p = std::unique_ptr<faiss::gpu::GpuIndexFlatL2>(
84 printf(
"copy done\n");
88 faiss::float_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
98 index->search(numQueries,
107 printf(
"CPU time %.3f ms\n", cpuTime);
112 CUDA_VERIFY(cudaProfilerStart());
113 faiss::gpu::synchronizeAllDevices();
115 float gpuTime = 0.0f;
121 gpuIndex.getIndex()->search(cpuQuery.getSize(0),
132 CUDA_VERIFY(cudaProfilerStop());
133 printf(
"GPU time %.3f ms\n", gpuTime);
135 compareLists(cpuDistances.data(), cpuIndices.data(),
136 gpuDistances.data(), gpuIndices.data(),
138 "",
true, FLAGS_diff,
false);
140 CUDA_VERIFY(cudaDeviceSynchronize());
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
CPU wallclock elapsed timer.