12 #include "../../IndexIVFPQ.h"
13 #include "../../index_io.h"
14 #include "../../utils.h"
16 #include "../GpuIndexIVFPQ.h"
17 #include "IndexWrapper.h"
18 #include "../test/TestUtils.h"
19 #include "../utils/DeviceTensor.cuh"
20 #include "../utils/DeviceUtils.h"
21 #include "../utils/HostTensor.cuh"
22 #include "../utils/Timer.h"
24 #include <cuda_profiler_api.h>
25 #include <gflags/gflags.h>
30 DEFINE_int32(nprobe, 5,
"number of coarse centroids to probe");
31 DEFINE_int32(k, 3,
"final number of closest results returned");
32 DEFINE_int32(num_queries, 3,
"number of query vectors");
33 DEFINE_string(in,
"/home/jhj/local/index.out",
"index file for input");
34 DEFINE_bool(diff,
true,
"show exact distance + index output discrepancies");
35 DEFINE_bool(use_precomputed,
true,
"enable or disable precomputed codes");
36 DEFINE_bool(float16_lookup,
false,
"use float16 residual distance tables");
37 DEFINE_int64(seed, -1,
"specify random seed");
38 DEFINE_int32(num_gpus, 1,
"number of gpus to use");
39 DEFINE_int32(index, 2,
"0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
41 using namespace faiss::gpu;
43 int main(
int argc,
char** argv) {
44 google::ParseCommandLineFlags(&argc, &argv,
true);
46 CUDA_VERIFY(cudaProfilerStop());
48 auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(
nullptr);
49 printf(
"using seed %ld\n", seed);
51 auto numQueries = FLAGS_num_queries;
53 auto index = std::unique_ptr<faiss::IndexIVFPQ>(
55 FAISS_ASSERT((
bool) index);
56 index->nprobe = FLAGS_nprobe;
58 if (!FLAGS_use_precomputed) {
59 index->use_precomputed_table = 0;
63 auto codes = index->pq.M;
64 auto bitsPerCode = index->pq.nbits;
66 printf(
"Database: dim %d num vecs %ld\n", dim, index->ntotal);
67 printf(
"Coarse centroids: %ld\n", index->quantizer->ntotal);
68 printf(
"PQ centroids: codes %ld bits per code %ld\n", codes, bitsPerCode);
69 printf(
"L2 lookup: %d queries, nprobe %d, total k %d, "
70 "precomputed codes %d\n\n",
71 numQueries, FLAGS_nprobe, FLAGS_k,
72 FLAGS_use_precomputed);
75 printf(
"Copying index to %d GPU(s)...\n", FLAGS_num_gpus);
77 auto precomp = FLAGS_use_precomputed;
78 auto indicesOpt = (faiss::gpu::IndicesOptions) FLAGS_index;
79 auto useFloat16Lookup = FLAGS_float16_lookup;
81 auto initFn = [precomp, indicesOpt, useFloat16Lookup, &index]
83 std::unique_ptr<faiss::gpu::GpuIndexIVFPQ> {
84 auto p = std::unique_ptr<faiss::gpu::GpuIndexIVFPQ>(
90 p->setPrecomputedCodes(precomp);
96 gpuIndex.setNumProbes(FLAGS_nprobe);
97 printf(
"copy done\n");
101 faiss::float_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
107 float cpuTime = 0.0f;
111 index->search(numQueries,
120 printf(
"CPU time %.3f ms\n", cpuTime);
125 CUDA_VERIFY(cudaProfilerStart());
126 faiss::gpu::synchronizeAllDevices();
128 float gpuTime = 0.0f;
134 gpuIndex.getIndex()->search(cpuQuery.getSize(0),
145 CUDA_VERIFY(cudaProfilerStop());
146 printf(
"GPU time %.3f ms\n", gpuTime);
148 compareLists(cpuDistances.data(), cpuIndices.data(),
149 gpuDistances.data(), gpuIndices.data(),
151 "",
true, FLAGS_diff,
false);
153 CUDA_VERIFY(cudaDeviceSynchronize());
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
Index * read_index(FILE *f, bool try_mmap)
CPU wallclock elapsed timer.