9 #include "../../IndexFlat.h"
10 #include "../../utils.h"
11 #include "../GpuIndexFlat.h"
12 #include "IndexWrapper.h"
13 #include "../test/TestUtils.h"
14 #include "../utils/DeviceTensor.cuh"
15 #include "../utils/DeviceUtils.h"
16 #include "../utils/HostTensor.cuh"
17 #include "../utils/Timer.h"
18 #include <gflags/gflags.h>
23 #include <cuda_profiler_api.h>
25 DEFINE_bool(l2,
true,
"L2 or inner product");
26 DEFINE_int32(k, 3,
"final number of closest results returned");
27 DEFINE_int32(num, 128,
"# of vecs");
28 DEFINE_int32(dim, 128,
"# of dimensions");
29 DEFINE_int32(num_queries, 3,
"number of query vectors");
30 DEFINE_bool(diff,
true,
"show exact distance + index output discrepancies");
31 DEFINE_bool(use_float16,
false,
"use encodings in float16");
32 DEFINE_bool(use_float16_math,
false,
"perform math in float16");
33 DEFINE_bool(transposed,
false,
"store vectors transposed");
34 DEFINE_int64(seed, -1,
"specify random seed");
35 DEFINE_int32(num_gpus, 1,
"number of gpus to use");
36 DEFINE_int64(pinned_mem, 0,
"pinned memory allocation to use");
37 DEFINE_bool(cpu,
true,
"run the CPU code for timing and comparison");
38 DEFINE_bool(use_unified_mem,
false,
"use Pascal unified memory for the index");
40 using namespace faiss::gpu;
42 int main(
int argc,
char** argv) {
43 gflags::ParseCommandLineFlags(&argc, &argv,
true);
47 auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(
nullptr);
48 printf(
"using seed %ld\n", seed);
50 auto numQueries = FLAGS_num_queries;
52 auto index = std::unique_ptr<faiss::IndexFlat>(
54 faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT));
57 faiss::float_rand(vecs.data(), vecs.numElements(), seed);
59 index->add(FLAGS_num, vecs.data());
61 printf(
"Database: dim %d num vecs %d\n", FLAGS_dim, FLAGS_num);
62 printf(
"%s lookup: %d queries, total k %d\n",
63 FLAGS_l2 ?
"L2" :
"IP",
65 printf(
"float16 encoding %s\n", FLAGS_use_float16 ?
"enabled" :
"disabled");
66 printf(
"transposed storage %s\n", FLAGS_transposed ?
"enabled" :
"disabled");
69 printf(
"Copying index to %d GPU(s)...\n", FLAGS_num_gpus);
72 std::unique_ptr<faiss::gpu::GpuIndexFlat> {
82 MemorySpace::Unified : MemorySpace::Device;
84 auto p = std::unique_ptr<faiss::gpu::GpuIndexFlat>(
90 printf(
"copy done\n");
94 faiss::float_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
101 float cpuTime = 0.0f;
104 index->search(numQueries,
111 printf(
"CPU time %.3f ms\n", cpuTime);
117 CUDA_VERIFY(cudaProfilerStart());
118 faiss::gpu::synchronizeAllDevices();
120 float gpuTime = 0.0f;
126 gpuIndex.getIndex()->search(cpuQuery.getSize(0),
137 CUDA_VERIFY(cudaProfilerStop());
138 printf(
"GPU time %.3f ms\n", gpuTime);
141 compareLists(cpuDistances.data(), cpuIndices.data(),
142 gpuDistances.data(), gpuIndices.data(),
144 "",
true, FLAGS_diff,
false);
147 CUDA_VERIFY(cudaDeviceSynchronize());
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
bool useFloat16Accumulator
CPU wallclock elapsed timer.
bool useFloat16
Whether or not data is stored as float16.
int device
GPU device on which the index is resident.