11 #include "../../IndexFlat.h"
12 #include "../../utils.h"
13 #include "../GpuIndexFlat.h"
14 #include "IndexWrapper.h"
15 #include "../test/TestUtils.h"
16 #include "../utils/DeviceTensor.cuh"
17 #include "../utils/DeviceUtils.h"
18 #include "../utils/HostTensor.cuh"
19 #include "../utils/Timer.h"
20 #include <gflags/gflags.h>
25 #include <cuda_profiler_api.h>
27 DEFINE_int32(k, 3,
"final number of closest results returned");
28 DEFINE_int32(num, 128,
"# of vecs");
29 DEFINE_int32(dim, 128,
"# of dimensions");
30 DEFINE_int32(num_queries, 3,
"number of query vectors");
31 DEFINE_bool(diff,
true,
"show exact distance + index output discrepancies");
32 DEFINE_bool(use_float16,
false,
"use encodings in float16");
33 DEFINE_bool(use_float16_math,
false,
"perform math in float16");
34 DEFINE_bool(transposed,
false,
"store vectors transposed");
35 DEFINE_int64(seed, -1,
"specify random seed");
36 DEFINE_int32(num_gpus, 1,
"number of gpus to use");
37 DEFINE_int64(pinned_mem, 0,
"pinned memory allocation to use");
38 DEFINE_bool(cpu,
true,
"run the CPU code for timing and comparison");
39 DEFINE_bool(use_unified_mem,
false,
"use Pascal unified memory for the index");
41 using namespace faiss::gpu;
43 int main(
int argc,
char** argv) {
44 gflags::ParseCommandLineFlags(&argc, &argv,
true);
48 auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(
nullptr);
49 printf(
"using seed %ld\n", seed);
51 auto numQueries = FLAGS_num_queries;
53 auto index = std::unique_ptr<faiss::IndexFlatL2>(
57 faiss::float_rand(vecs.data(), vecs.numElements(), seed);
59 index->add(FLAGS_num, vecs.data());
61 printf(
"Database: dim %d num vecs %d\n", FLAGS_dim, FLAGS_num);
62 printf(
"L2 lookup: %d queries, total k %d\n",
64 printf(
"float16 encoding %s\n", FLAGS_use_float16 ?
"enabled" :
"disabled");
65 printf(
"transposed storage %s\n", FLAGS_transposed ?
"enabled" :
"disabled");
68 printf(
"Copying index to %d GPU(s)...\n", FLAGS_num_gpus);
71 std::unique_ptr<faiss::gpu::GpuIndexFlatL2> {
81 MemorySpace::Unified : MemorySpace::Device;
83 auto p = std::unique_ptr<faiss::gpu::GpuIndexFlatL2>(
89 printf(
"copy done\n");
93 faiss::float_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
100 float cpuTime = 0.0f;
103 index->search(numQueries,
110 printf(
"CPU time %.3f ms\n", cpuTime);
116 CUDA_VERIFY(cudaProfilerStart());
117 faiss::gpu::synchronizeAllDevices();
119 float gpuTime = 0.0f;
125 gpuIndex.getIndex()->search(cpuQuery.getSize(0),
136 CUDA_VERIFY(cudaProfilerStop());
137 printf(
"GPU time %.3f ms\n", gpuTime);
140 compareLists(cpuDistances.data(), cpuIndices.data(),
141 gpuDistances.data(), gpuIndices.data(),
143 "",
true, FLAGS_diff,
false);
146 CUDA_VERIFY(cudaDeviceSynchronize());
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
bool useFloat16Accumulator
CPU wallclock elapsed timer.
bool useFloat16
Whether or not data is stored as float16.
int device
GPU device on which the index is resident.