10 #include "../../IndexIVFFlat.h"
11 #include "../../index_io.h"
12 #include "../../utils.h"
14 #include "../GpuIndexIVFFlat.h"
15 #include "IndexWrapper.h"
16 #include "../test/TestUtils.h"
17 #include "../utils/DeviceTensor.cuh"
18 #include "../utils/DeviceUtils.h"
19 #include "../utils/HostTensor.cuh"
20 #include "../utils/Timer.h"
21 #include <gflags/gflags.h>
26 #include <cuda_profiler_api.h>
28 DEFINE_int32(nprobe, 5,
"number of coarse centroids to probe");
29 DEFINE_int32(k, 3,
"final number of closest results returned");
30 DEFINE_int32(num_queries, 3,
"number of query vectors");
31 DEFINE_string(in,
"/home/jhj/local/index.out",
"index file for input");
32 DEFINE_bool(diff,
true,
"show exact distance + index output discrepancies");
33 DEFINE_bool(use_float16,
false,
"use encodings in float16");
34 DEFINE_bool(use_float16_coarse,
false,
"coarse quantizer in float16");
35 DEFINE_int64(seed, -1,
"specify random seed");
36 DEFINE_int32(num_gpus, 1,
"number of gpus to use");
37 DEFINE_int32(index, 2,
"0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
39 using namespace faiss::gpu;
41 int main(
int argc,
char** argv) {
42 gflags::ParseCommandLineFlags(&argc, &argv,
true);
46 auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(
nullptr);
47 printf(
"using seed %ld\n", seed);
49 auto numQueries = FLAGS_num_queries;
51 auto index = std::unique_ptr<faiss::IndexIVFFlat>(
53 FAISS_ASSERT((
bool) index);
54 index->nprobe = FLAGS_nprobe;
58 printf(
"Database: dim %d num vecs %ld\n", dim, index->ntotal);
59 printf(
"Coarse centroids: %ld\n", index->quantizer->ntotal);
60 printf(
"L2 lookup: %d queries, nprobe %d, total k %d\n",
61 numQueries, FLAGS_nprobe, FLAGS_k);
62 printf(
"float16 coarse quantizer %s\n",
63 FLAGS_use_float16_coarse ?
"enabled" :
"disabled");
64 printf(
"float16 encoding %s\n",
65 FLAGS_use_float16 ?
"enabled" :
"disabled");
68 printf(
"Copying index to %d GPU(s)...\n", FLAGS_num_gpus);
71 std::unique_ptr<faiss::gpu::GpuIndexIVFFlat> {
78 auto p = std::unique_ptr<faiss::gpu::GpuIndexIVFFlat>(
84 p->copyFrom(index.get());
89 gpuIndex.setNumProbes(FLAGS_nprobe);
90 printf(
"copy done\n");
94 faiss::float_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
100 float cpuTime = 0.0f;
104 index->search(numQueries,
113 printf(
"CPU time %.3f ms\n", cpuTime);
118 CUDA_VERIFY(cudaProfilerStart());
119 faiss::gpu::synchronizeAllDevices();
121 float gpuTime = 0.0f;
127 gpuIndex.getIndex()->search(cpuQuery.getSize(0),
138 CUDA_VERIFY(cudaProfilerStop());
139 printf(
"GPU time %.3f ms\n", gpuTime);
141 compareLists(cpuDistances.data(), cpuIndices.data(),
142 gpuDistances.data(), gpuIndices.data(),
144 "",
true, FLAGS_diff,
false);
146 CUDA_VERIFY(cudaDeviceSynchronize());
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
bool useFloat16IVFStorage
CPU wallclock elapsed timer.
bool useFloat16
Whether or not data is stored as float16.
int device
GPU device on which the index is resident.
GpuIndexFlatConfig flatConfig
Configuration for the coarse quantizer object.
IndicesOptions indicesOptions
Index storage options for the GPU.