Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
CompareIVFFlat.cu
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #include "../../IndexIVFFlat.h"
12 #include "../../index_io.h"
13 #include "../../utils.h"
14 
15 #include "../GpuIndexIVFFlat.h"
16 #include "IndexWrapper.h"
17 #include "../test/TestUtils.h"
18 #include "../utils/DeviceTensor.cuh"
19 #include "../utils/DeviceUtils.h"
20 #include "../utils/HostTensor.cuh"
21 #include "../utils/Timer.h"
22 #include <gflags/gflags.h>
23 #include <map>
24 #include <memory>
25 #include <vector>
26 
27 #include <cuda_profiler_api.h>
28 
29 DEFINE_int32(nprobe, 5, "number of coarse centroids to probe");
30 DEFINE_int32(k, 3, "final number of closest results returned");
31 DEFINE_int32(num_queries, 3, "number of query vectors");
32 DEFINE_string(in, "/home/jhj/local/index.out", "index file for input");
33 DEFINE_bool(diff, true, "show exact distance + index output discrepancies");
34 DEFINE_bool(use_float16, false, "use encodings in float16");
35 DEFINE_bool(use_float16_coarse, false, "coarse quantizer in float16");
36 DEFINE_int64(seed, -1, "specify random seed");
37 DEFINE_int32(num_gpus, 1, "number of gpus to use");
38 DEFINE_int32(index, 2, "0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
39 
40 using namespace faiss::gpu;
41 
42 int main(int argc, char** argv) {
43  gflags::ParseCommandLineFlags(&argc, &argv, true);
44 
45  cudaProfilerStop();
46 
47  auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(nullptr);
48  printf("using seed %ld\n", seed);
49 
50  auto numQueries = FLAGS_num_queries;
51 
52  auto index = std::unique_ptr<faiss::IndexIVFFlat>(
53  dynamic_cast<faiss::IndexIVFFlat*>(faiss::read_index(FLAGS_in.c_str())));
54  FAISS_ASSERT((bool) index);
55  index->nprobe = FLAGS_nprobe;
56 
57  auto dim = index->d;
58 
59  printf("Database: dim %d num vecs %ld\n", dim, index->ntotal);
60  printf("Coarse centroids: %ld\n", index->quantizer->ntotal);
61  printf("L2 lookup: %d queries, nprobe %d, total k %d\n",
62  numQueries, FLAGS_nprobe, FLAGS_k);
63  printf("float16 coarse quantizer %s\n",
64  FLAGS_use_float16_coarse ? "enabled" : "disabled");
65  printf("float16 encoding %s\n",
66  FLAGS_use_float16 ? "enabled" : "disabled");
67 
68  // Convert to GPU index
69  printf("Copying index to %d GPU(s)...\n", FLAGS_num_gpus);
70 
71  auto initFn = [&index](faiss::gpu::GpuResources* res, int dev) ->
72  std::unique_ptr<faiss::gpu::GpuIndexIVFFlat> {
73  GpuIndexIVFFlatConfig config;
74  config.device = dev;
75  config.indicesOptions = (faiss::gpu::IndicesOptions) FLAGS_index;
76  config.flatConfig.useFloat16 = FLAGS_use_float16_coarse;
77  config.useFloat16IVFStorage = FLAGS_use_float16;
78 
79  auto p = std::unique_ptr<faiss::gpu::GpuIndexIVFFlat>(
81  index->d,
82  index->nlist,
83  index->metric_type,
84  config));
85  p->copyFrom(index.get());
86  return p;
87  };
88 
89  IndexWrapper<faiss::gpu::GpuIndexIVFFlat> gpuIndex(FLAGS_num_gpus, initFn);
90  gpuIndex.setNumProbes(FLAGS_nprobe);
91  printf("copy done\n");
92 
93  // Build query vectors
94  HostTensor<float, 2, true> cpuQuery({numQueries, dim});
95  faiss::float_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
96 
97  // Time faiss CPU
98  HostTensor<float, 2, true> cpuDistances({numQueries, FLAGS_k});
99  HostTensor<faiss::Index::idx_t, 2, true> cpuIndices({numQueries, FLAGS_k});
100 
101  float cpuTime = 0.0f;
102 
103  {
104  CpuTimer timer;
105  index->search(numQueries,
106  cpuQuery.data(),
107  FLAGS_k,
108  cpuDistances.data(),
109  cpuIndices.data());
110 
111  cpuTime = timer.elapsedMilliseconds();
112  }
113 
114  printf("CPU time %.3f ms\n", cpuTime);
115 
116  HostTensor<float, 2, true> gpuDistances({numQueries, FLAGS_k});
117  HostTensor<faiss::Index::idx_t, 2, true> gpuIndices({numQueries, FLAGS_k});
118 
119  CUDA_VERIFY(cudaProfilerStart());
120  faiss::gpu::synchronizeAllDevices();
121 
122  float gpuTime = 0.0f;
123 
124  // Time GPU
125  {
126  CpuTimer timer;
127 
128  gpuIndex.getIndex()->search(cpuQuery.getSize(0),
129  cpuQuery.data(),
130  FLAGS_k,
131  gpuDistances.data(),
132  gpuIndices.data());
133 
134  // There is a device -> host copy above, so no need to time
135  // additional synchronization with the GPU
136  gpuTime = timer.elapsedMilliseconds();
137  }
138 
139  CUDA_VERIFY(cudaProfilerStop());
140  printf("GPU time %.3f ms\n", gpuTime);
141 
142  compareLists(cpuDistances.data(), cpuIndices.data(),
143  gpuDistances.data(), gpuIndices.data(),
144  numQueries, FLAGS_k,
145  "", true, FLAGS_diff, false);
146 
147  CUDA_VERIFY(cudaDeviceSynchronize());
148  // printf("\ncudaMalloc usage %zd\n",
149  // resources.getMemoryManager().getHighWaterCudaMalloc());
150 
151  return 0;
152 }
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
Definition: Timer.cpp:52
CPU wallclock elapsed timer.
Definition: Timer.h:42
bool useFloat16
Whether or not data is stored as float16.
Definition: GpuIndexFlat.h:35
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:27
GpuIndexFlatConfig flatConfig
Configuration for the coarse quantizer object.
Definition: GpuIndexIVF.h:34
IndicesOptions indicesOptions
Index storage options for the GPU.
Definition: GpuIndexIVF.h:31