11 #include "../../IndexIVFPQ.h"
12 #include "../../index_io.h"
13 #include "../../utils.h"
15 #include "../GpuIndexIVFPQ.h"
16 #include "IndexWrapper.h"
17 #include "../test/TestUtils.h"
18 #include "../utils/DeviceTensor.cuh"
19 #include "../utils/DeviceUtils.h"
20 #include "../utils/HostTensor.cuh"
21 #include "../utils/Timer.h"
23 #include <cuda_profiler_api.h>
24 #include <gflags/gflags.h>
29 DEFINE_int32(nprobe, 5,
"number of coarse centroids to probe");
30 DEFINE_int32(k, 3,
"final number of closest results returned");
31 DEFINE_int32(num_queries, 3,
"number of query vectors");
32 DEFINE_string(in,
"/home/jhj/local/index.out",
"index file for input");
33 DEFINE_bool(diff,
true,
"show exact distance + index output discrepancies");
34 DEFINE_bool(use_precomputed,
true,
"enable or disable precomputed codes");
35 DEFINE_bool(float16_lookup,
false,
"use float16 residual distance tables");
36 DEFINE_int64(seed, -1,
"specify random seed");
37 DEFINE_int32(num_gpus, 1,
"number of gpus to use");
38 DEFINE_int32(index, 2,
"0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
40 using namespace faiss::gpu;
42 int main(
int argc,
char** argv) {
43 gflags::ParseCommandLineFlags(&argc, &argv,
true);
45 CUDA_VERIFY(cudaProfilerStop());
47 auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(
nullptr);
48 printf(
"using seed %ld\n", seed);
50 auto numQueries = FLAGS_num_queries;
52 auto index = std::unique_ptr<faiss::IndexIVFPQ>(
54 FAISS_ASSERT((
bool) index);
55 index->nprobe = FLAGS_nprobe;
57 if (!FLAGS_use_precomputed) {
58 index->use_precomputed_table = 0;
62 auto codes = index->pq.M;
63 auto bitsPerCode = index->pq.nbits;
65 printf(
"Database: dim %d num vecs %ld\n", dim, index->ntotal);
66 printf(
"Coarse centroids: %ld\n", index->quantizer->ntotal);
67 printf(
"PQ centroids: codes %ld bits per code %ld\n", codes, bitsPerCode);
68 printf(
"L2 lookup: %d queries, nprobe %d, total k %d, "
69 "precomputed codes %d\n\n",
70 numQueries, FLAGS_nprobe, FLAGS_k,
71 FLAGS_use_precomputed);
74 printf(
"Copying index to %d GPU(s)...\n", FLAGS_num_gpus);
76 auto precomp = FLAGS_use_precomputed;
77 auto indicesOpt = (faiss::gpu::IndicesOptions) FLAGS_index;
78 auto useFloat16Lookup = FLAGS_float16_lookup;
80 auto initFn = [precomp, indicesOpt, useFloat16Lookup, &index]
82 std::unique_ptr<faiss::gpu::GpuIndexIVFPQ> {
90 auto p = std::unique_ptr<faiss::gpu::GpuIndexIVFPQ>(
97 gpuIndex.setNumProbes(FLAGS_nprobe);
98 printf(
"copy done\n");
102 faiss::float_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
108 float cpuTime = 0.0f;
112 index->search(numQueries,
121 printf(
"CPU time %.3f ms\n", cpuTime);
126 CUDA_VERIFY(cudaProfilerStart());
127 faiss::gpu::synchronizeAllDevices();
129 float gpuTime = 0.0f;
135 gpuIndex.getIndex()->search(cpuQuery.getSize(0),
146 CUDA_VERIFY(cudaProfilerStop());
147 printf(
"GPU time %.3f ms\n", gpuTime);
149 compareLists(cpuDistances.data(), cpuIndices.data(),
150 gpuDistances.data(), gpuIndices.data(),
152 "",
true, FLAGS_diff,
false);
154 CUDA_VERIFY(cudaDeviceSynchronize());
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
bool usePrecomputedTables
Index * read_index(FILE *f, bool try_mmap)
CPU wallclock elapsed timer.
bool useFloat16LookupTables
int device
GPU device on which the index is resident.
IndicesOptions indicesOptions
Index storage options for the GPU.