Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
CompareIVFFlat.cu
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 
12 #include "../../IndexIVF.h"
13 #include "../../index_io.h"
14 #include "../../utils.h"
15 
16 #include "../GpuIndexIVFFlat.h"
17 #include "IndexWrapper.h"
18 #include "../test/TestUtils.h"
19 #include "../utils/DeviceTensor.cuh"
20 #include "../utils/DeviceUtils.h"
21 #include "../utils/HostTensor.cuh"
22 #include "../utils/Timer.h"
23 #include <gflags/gflags.h>
24 #include <map>
25 #include <memory>
26 #include <vector>
27 
28 #include <cuda_profiler_api.h>
29 
30 DEFINE_int32(nprobe, 5, "number of coarse centroids to probe");
31 DEFINE_int32(k, 3, "final number of closest results returned");
32 DEFINE_int32(num_queries, 3, "number of query vectors");
33 DEFINE_string(in, "/home/jhj/local/index.out", "index file for input");
34 DEFINE_bool(diff, true, "show exact distance + index output discrepancies");
35 DEFINE_bool(use_float16, false, "use encodings in float16");
36 DEFINE_bool(use_float16_coarse, false, "coarse quantizer in float16");
37 DEFINE_int64(seed, -1, "specify random seed");
38 DEFINE_int32(num_gpus, 1, "number of gpus to use");
39 DEFINE_int32(index, 2, "0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
40 
41 using namespace faiss::gpu;
42 
43 int main(int argc, char** argv) {
44  google::ParseCommandLineFlags(&argc, &argv, true);
45 
46  cudaProfilerStop();
47 
48  auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(nullptr);
49  printf("using seed %ld\n", seed);
50 
51  auto numQueries = FLAGS_num_queries;
52 
53  auto index = std::unique_ptr<faiss::IndexIVFFlat>(
54  dynamic_cast<faiss::IndexIVFFlat*>(faiss::read_index(FLAGS_in.c_str())));
55  FAISS_ASSERT((bool) index);
56  index->nprobe = FLAGS_nprobe;
57 
58  auto dim = index->d;
59 
60  printf("Database: dim %d num vecs %ld\n", dim, index->ntotal);
61  printf("Coarse centroids: %ld\n", index->quantizer->ntotal);
62  printf("L2 lookup: %d queries, nprobe %d, total k %d\n",
63  numQueries, FLAGS_nprobe, FLAGS_k);
64  printf("float16 coarse quantizer %s\n",
65  FLAGS_use_float16_coarse ? "enabled" : "disabled");
66  printf("float16 encoding %s\n",
67  FLAGS_use_float16 ? "enabled" : "disabled");
68 
69  // Convert to GPU index
70  printf("Copying index to %d GPU(s)...\n", FLAGS_num_gpus);
71 
72  auto initFn = [&index](faiss::gpu::GpuResources* res, int dev) ->
73  std::unique_ptr<faiss::gpu::GpuIndexIVFFlat> {
74  auto p = std::unique_ptr<faiss::gpu::GpuIndexIVFFlat>(
76  dev,
77  FLAGS_use_float16_coarse,
78  FLAGS_use_float16,
79  index->d, index->nlist,
80  (faiss::gpu::IndicesOptions) FLAGS_index,
81  index->metric_type));
82  p->copyFrom(index.get());
83  return p;
84  };
85 
86  IndexWrapper<faiss::gpu::GpuIndexIVFFlat> gpuIndex(FLAGS_num_gpus, initFn);
87  gpuIndex.setNumProbes(FLAGS_nprobe);
88  printf("copy done\n");
89 
90  // Build query vectors
91  HostTensor<float, 2, true> cpuQuery({numQueries, dim});
92  faiss::float_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
93 
94  // Time faiss CPU
95  HostTensor<float, 2, true> cpuDistances({numQueries, FLAGS_k});
96  HostTensor<faiss::Index::idx_t, 2, true> cpuIndices({numQueries, FLAGS_k});
97 
98  float cpuTime = 0.0f;
99 
100  {
101  CpuTimer timer;
102  index->search(numQueries,
103  cpuQuery.data(),
104  FLAGS_k,
105  cpuDistances.data(),
106  cpuIndices.data());
107 
108  cpuTime = timer.elapsedMilliseconds();
109  }
110 
111  printf("CPU time %.3f ms\n", cpuTime);
112 
113  HostTensor<float, 2, true> gpuDistances({numQueries, FLAGS_k});
114  HostTensor<faiss::Index::idx_t, 2, true> gpuIndices({numQueries, FLAGS_k});
115 
116  CUDA_VERIFY(cudaProfilerStart());
117  faiss::gpu::synchronizeAllDevices();
118 
119  float gpuTime = 0.0f;
120 
121  // Time GPU
122  {
123  CpuTimer timer;
124 
125  gpuIndex.getIndex()->search(cpuQuery.getSize(0),
126  cpuQuery.data(),
127  FLAGS_k,
128  gpuDistances.data(),
129  gpuIndices.data());
130 
131  // There is a device -> host copy above, so no need to time
132  // additional synchronization with the GPU
133  gpuTime = timer.elapsedMilliseconds();
134  }
135 
136  CUDA_VERIFY(cudaProfilerStop());
137  printf("GPU time %.3f ms\n", gpuTime);
138 
139  compareLists(cpuDistances.data(), cpuIndices.data(),
140  gpuDistances.data(), gpuIndices.data(),
141  numQueries, FLAGS_k,
142  "", true, FLAGS_diff, false);
143 
144  CUDA_VERIFY(cudaDeviceSynchronize());
145  // printf("\ncudaMalloc usage %zd\n",
146  // resources.getMemoryManager().getHighWaterCudaMalloc());
147 
148  return 0;
149 }
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
Definition: Timer.cpp:53
Index * read_index(FILE *f, bool try_mmap)
Definition: index_io.cpp:476
CPU wallclock elapsed timer.
Definition: Timer.h:43