10 #include "../../IndexFlat.h"
11 #include "../../utils.h"
12 #include "../GpuIndexFlat.h"
13 #include "IndexWrapper.h"
14 #include "../test/TestUtils.h"
15 #include "../utils/DeviceTensor.cuh"
16 #include "../utils/DeviceUtils.h"
17 #include "../utils/HostTensor.cuh"
18 #include "../utils/Timer.h"
19 #include <gflags/gflags.h>
24 #include <cuda_profiler_api.h>
26 DEFINE_int32(k, 3,
"final number of closest results returned");
27 DEFINE_int32(num, 128,
"# of vecs");
28 DEFINE_int32(dim, 128,
"# of dimensions");
29 DEFINE_int32(num_queries, 3,
"number of query vectors");
30 DEFINE_bool(diff,
true,
"show exact distance + index output discrepancies");
31 DEFINE_bool(use_float16,
false,
"use encodings in float16");
32 DEFINE_bool(use_float16_math,
false,
"perform math in float16");
33 DEFINE_bool(transposed,
false,
"store vectors transposed");
34 DEFINE_int64(seed, -1,
"specify random seed");
35 DEFINE_int32(num_gpus, 1,
"number of gpus to use");
36 DEFINE_int64(pinned_mem, 0,
"pinned memory allocation to use");
37 DEFINE_bool(cpu,
true,
"run the CPU code for timing and comparison");
38 DEFINE_bool(use_unified_mem,
false,
"use Pascal unified memory for the index");
40 using namespace faiss::gpu;
42 int main(
int argc,
char** argv) {
43 gflags::ParseCommandLineFlags(&argc, &argv,
true);
47 auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(
nullptr);
48 printf(
"using seed %ld\n", seed);
50 auto numQueries = FLAGS_num_queries;
52 auto index = std::unique_ptr<faiss::IndexFlatL2>(
56 faiss::float_rand(vecs.data(), vecs.numElements(), seed);
58 index->add(FLAGS_num, vecs.data());
60 printf(
"Database: dim %d num vecs %d\n", FLAGS_dim, FLAGS_num);
61 printf(
"L2 lookup: %d queries, total k %d\n",
63 printf(
"float16 encoding %s\n", FLAGS_use_float16 ?
"enabled" :
"disabled");
64 printf(
"transposed storage %s\n", FLAGS_transposed ?
"enabled" :
"disabled");
67 printf(
"Copying index to %d GPU(s)...\n", FLAGS_num_gpus);
70 std::unique_ptr<faiss::gpu::GpuIndexFlatL2> {
80 MemorySpace::Unified : MemorySpace::Device;
82 auto p = std::unique_ptr<faiss::gpu::GpuIndexFlatL2>(
88 printf(
"copy done\n");
92 faiss::float_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
102 index->search(numQueries,
109 printf(
"CPU time %.3f ms\n", cpuTime);
115 CUDA_VERIFY(cudaProfilerStart());
116 faiss::gpu::synchronizeAllDevices();
118 float gpuTime = 0.0f;
124 gpuIndex.getIndex()->search(cpuQuery.getSize(0),
135 CUDA_VERIFY(cudaProfilerStop());
136 printf(
"GPU time %.3f ms\n", gpuTime);
139 compareLists(cpuDistances.data(), cpuIndices.data(),
140 gpuDistances.data(), gpuIndices.data(),
142 "",
true, FLAGS_diff,
false);
145 CUDA_VERIFY(cudaDeviceSynchronize());
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
bool useFloat16Accumulator
CPU wallclock elapsed timer.
bool useFloat16
Whether or not data is stored as float16.
int device
GPU device on which the index is resident.