12 #include "../../IndexIVF.h"
13 #include "../../index_io.h"
14 #include "../../utils.h"
16 #include "../GpuIndexIVFFlat.h"
17 #include "IndexWrapper.h"
18 #include "../test/TestUtils.h"
19 #include "../utils/DeviceTensor.cuh"
20 #include "../utils/DeviceUtils.h"
21 #include "../utils/HostTensor.cuh"
22 #include "../utils/Timer.h"
23 #include <gflags/gflags.h>
28 #include <cuda_profiler_api.h>
30 DEFINE_int32(nprobe, 5,
"number of coarse centroids to probe");
31 DEFINE_int32(k, 3,
"final number of closest results returned");
32 DEFINE_int32(num_queries, 3,
"number of query vectors");
33 DEFINE_string(in,
"/home/jhj/local/index.out",
"index file for input");
34 DEFINE_bool(diff,
true,
"show exact distance + index output discrepancies");
35 DEFINE_bool(use_float16,
false,
"use encodings in float16");
36 DEFINE_bool(use_float16_coarse,
false,
"coarse quantizer in float16");
37 DEFINE_int64(seed, -1,
"specify random seed");
38 DEFINE_int32(num_gpus, 1,
"number of gpus to use");
39 DEFINE_int32(index, 2,
"0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
41 using namespace faiss::gpu;
43 int main(
int argc,
char** argv) {
44 gflags::ParseCommandLineFlags(&argc, &argv,
true);
48 auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(
nullptr);
49 printf(
"using seed %ld\n", seed);
51 auto numQueries = FLAGS_num_queries;
53 auto index = std::unique_ptr<faiss::IndexIVFFlat>(
55 FAISS_ASSERT((
bool) index);
56 index->nprobe = FLAGS_nprobe;
60 printf(
"Database: dim %d num vecs %ld\n", dim, index->ntotal);
61 printf(
"Coarse centroids: %ld\n", index->quantizer->ntotal);
62 printf(
"L2 lookup: %d queries, nprobe %d, total k %d\n",
63 numQueries, FLAGS_nprobe, FLAGS_k);
64 printf(
"float16 coarse quantizer %s\n",
65 FLAGS_use_float16_coarse ?
"enabled" :
"disabled");
66 printf(
"float16 encoding %s\n",
67 FLAGS_use_float16 ?
"enabled" :
"disabled");
70 printf(
"Copying index to %d GPU(s)...\n", FLAGS_num_gpus);
73 std::unique_ptr<faiss::gpu::GpuIndexIVFFlat> {
74 auto p = std::unique_ptr<faiss::gpu::GpuIndexIVFFlat>(
77 FLAGS_use_float16_coarse,
79 index->d, index->nlist,
80 (faiss::gpu::IndicesOptions) FLAGS_index,
82 p->copyFrom(index.get());
87 gpuIndex.setNumProbes(FLAGS_nprobe);
88 printf(
"copy done\n");
92 faiss::float_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
102 index->search(numQueries,
111 printf(
"CPU time %.3f ms\n", cpuTime);
116 CUDA_VERIFY(cudaProfilerStart());
117 faiss::gpu::synchronizeAllDevices();
119 float gpuTime = 0.0f;
125 gpuIndex.getIndex()->search(cpuQuery.getSize(0),
136 CUDA_VERIFY(cudaProfilerStop());
137 printf(
"GPU time %.3f ms\n", gpuTime);
139 compareLists(cpuDistances.data(), cpuIndices.data(),
140 gpuDistances.data(), gpuIndices.data(),
142 "",
true, FLAGS_diff,
false);
144 CUDA_VERIFY(cudaDeviceSynchronize());
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
Index * read_index(FILE *f, bool try_mmap)
CPU wallclock elapsed timer.