9 #include "../../IndexIVFPQ.h"
10 #include "../../index_io.h"
11 #include "../../utils.h"
13 #include "../GpuIndexIVFPQ.h"
14 #include "IndexWrapper.h"
15 #include "../test/TestUtils.h"
16 #include "../utils/DeviceTensor.cuh"
17 #include "../utils/DeviceUtils.h"
18 #include "../utils/HostTensor.cuh"
19 #include "../utils/Timer.h"
21 #include <cuda_profiler_api.h>
22 #include <gflags/gflags.h>
27 DEFINE_int32(nprobe, 5,
"number of coarse centroids to probe");
28 DEFINE_int32(k, 3,
"final number of closest results returned");
29 DEFINE_int32(num_queries, 3,
"number of query vectors");
30 DEFINE_string(in,
"/home/jhj/local/index.out",
"index file for input");
31 DEFINE_bool(diff,
true,
"show exact distance + index output discrepancies");
32 DEFINE_bool(use_precomputed,
true,
"enable or disable precomputed codes");
33 DEFINE_bool(float16_lookup,
false,
"use float16 residual distance tables");
34 DEFINE_int64(seed, -1,
"specify random seed");
35 DEFINE_int32(num_gpus, 1,
"number of gpus to use");
36 DEFINE_int32(index, 2,
"0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
38 using namespace faiss::gpu;
40 int main(
int argc,
char** argv) {
41 gflags::ParseCommandLineFlags(&argc, &argv,
true);
43 CUDA_VERIFY(cudaProfilerStop());
45 auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(
nullptr);
46 printf(
"using seed %ld\n", seed);
48 auto numQueries = FLAGS_num_queries;
50 auto index = std::unique_ptr<faiss::IndexIVFPQ>(
52 FAISS_ASSERT((
bool) index);
53 index->nprobe = FLAGS_nprobe;
55 if (!FLAGS_use_precomputed) {
56 index->use_precomputed_table = 0;
60 auto codes = index->pq.M;
61 auto bitsPerCode = index->pq.nbits;
63 printf(
"Database: dim %d num vecs %ld\n", dim, index->ntotal);
64 printf(
"Coarse centroids: %ld\n", index->quantizer->ntotal);
65 printf(
"PQ centroids: codes %ld bits per code %ld\n", codes, bitsPerCode);
66 printf(
"L2 lookup: %d queries, nprobe %d, total k %d, "
67 "precomputed codes %d\n\n",
68 numQueries, FLAGS_nprobe, FLAGS_k,
69 FLAGS_use_precomputed);
72 printf(
"Copying index to %d GPU(s)...\n", FLAGS_num_gpus);
74 auto precomp = FLAGS_use_precomputed;
75 auto indicesOpt = (faiss::gpu::IndicesOptions) FLAGS_index;
76 auto useFloat16Lookup = FLAGS_float16_lookup;
78 auto initFn = [precomp, indicesOpt, useFloat16Lookup, &index]
80 std::unique_ptr<faiss::gpu::GpuIndexIVFPQ> {
88 auto p = std::unique_ptr<faiss::gpu::GpuIndexIVFPQ>(
95 gpuIndex.setNumProbes(FLAGS_nprobe);
96 printf(
"copy done\n");
100 faiss::float_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
106 float cpuTime = 0.0f;
110 index->search(numQueries,
119 printf(
"CPU time %.3f ms\n", cpuTime);
124 CUDA_VERIFY(cudaProfilerStart());
125 faiss::gpu::synchronizeAllDevices();
127 float gpuTime = 0.0f;
133 gpuIndex.getIndex()->search(cpuQuery.getSize(0),
144 CUDA_VERIFY(cudaProfilerStop());
145 printf(
"GPU time %.3f ms\n", gpuTime);
147 compareLists(cpuDistances.data(), cpuIndices.data(),
148 gpuDistances.data(), gpuIndices.data(),
150 "",
true, FLAGS_diff,
false);
152 CUDA_VERIFY(cudaDeviceSynchronize());
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
bool usePrecomputedTables
CPU wallclock elapsed timer.
bool useFloat16LookupTables
int device
GPU device on which the index is resident.
IndicesOptions indicesOptions
Index storage options for the GPU.