11 #include "../../IndexFlat.h"
12 #include "../../utils.h"
13 #include "../GpuIndexFlat.h"
14 #include "IndexWrapper.h"
15 #include "../test/TestUtils.h"
16 #include "../utils/DeviceTensor.cuh"
17 #include "../utils/DeviceUtils.h"
18 #include "../utils/HostTensor.cuh"
19 #include "../utils/Timer.h"
20 #include <gflags/gflags.h>
25 #include <cuda_profiler_api.h>
27 DEFINE_int32(k, 3,
"final number of closest results returned");
28 DEFINE_int32(num, 128,
"# of vecs");
29 DEFINE_int32(dim, 128,
"# of dimensions");
30 DEFINE_int32(num_queries, 3,
"number of query vectors");
31 DEFINE_bool(diff,
true,
"show exact distance + index output discrepancies");
32 DEFINE_bool(use_float16,
false,
"use encodings in float16 instead of float32");
33 DEFINE_bool(transposed,
false,
"store vectors transposed");
34 DEFINE_int64(seed, -1,
"specify random seed");
35 DEFINE_int32(num_gpus, 1,
"number of gpus to use");
36 DEFINE_int64(pinned_mem, 0,
"pinned memory allocation to use");
38 using namespace faiss::gpu;
40 int main(
int argc,
char** argv) {
41 gflags::ParseCommandLineFlags(&argc, &argv,
true);
45 auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(
nullptr);
46 printf(
"using seed %ld\n", seed);
48 auto numQueries = FLAGS_num_queries;
50 auto index = std::unique_ptr<faiss::IndexFlatL2>(
54 faiss::float_rand(vecs.data(), vecs.numElements(), seed);
56 index->add(FLAGS_num, vecs.data());
58 printf(
"Database: dim %d num vecs %d\n", FLAGS_dim, FLAGS_num);
59 printf(
"L2 lookup: %d queries, total k %d\n",
61 printf(
"float16 encoding %s\n", FLAGS_use_float16 ?
"enabled" :
"disabled");
62 printf(
"transposed storage %s\n", FLAGS_transposed ?
"enabled" :
"disabled");
65 printf(
"Copying index to %d GPU(s)...\n", FLAGS_num_gpus);
68 std::unique_ptr<faiss::gpu::GpuIndexFlatL2> {
77 auto p = std::unique_ptr<faiss::gpu::GpuIndexFlatL2>(
83 printf(
"copy done\n");
87 faiss::float_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
97 index->search(numQueries,
106 printf(
"CPU time %.3f ms\n", cpuTime);
111 CUDA_VERIFY(cudaProfilerStart());
112 faiss::gpu::synchronizeAllDevices();
114 float gpuTime = 0.0f;
120 gpuIndex.getIndex()->search(cpuQuery.getSize(0),
131 CUDA_VERIFY(cudaProfilerStop());
132 printf(
"GPU time %.3f ms\n", gpuTime);
134 compareLists(cpuDistances.data(), cpuIndices.data(),
135 gpuDistances.data(), gpuIndices.data(),
137 "",
true, FLAGS_diff,
false);
139 CUDA_VERIFY(cudaDeviceSynchronize());
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
CPU wallclock elapsed timer.
bool useFloat16
Whether or not data is stored as float16.
int device
GPU device on which the index is resident.