Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
CompareFlat.cu
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 
12 #include "../../IndexFlat.h"
13 #include "../../utils.h"
14 #include "../GpuIndexFlat.h"
15 #include "IndexWrapper.h"
16 #include "../test/TestUtils.h"
17 #include "../utils/DeviceTensor.cuh"
18 #include "../utils/DeviceUtils.h"
19 #include "../utils/HostTensor.cuh"
20 #include "../utils/Timer.h"
21 #include <gflags/gflags.h>
22 #include <map>
23 #include <memory>
24 #include <vector>
25 
26 #include <cuda_profiler_api.h>
27 
28 DEFINE_int32(k, 3, "final number of closest results returned");
29 DEFINE_int32(num, 128, "# of vecs");
30 DEFINE_int32(dim, 128, "# of dimensions");
31 DEFINE_int32(num_queries, 3, "number of query vectors");
32 DEFINE_bool(diff, true, "show exact distance + index output discrepancies");
33 DEFINE_bool(use_float16, false, "use encodings in float16 instead of float32");
34 DEFINE_bool(transposed, false, "store vectors transposed");
35 DEFINE_int64(seed, -1, "specify random seed");
36 DEFINE_int32(num_gpus, 1, "number of gpus to use");
37 DEFINE_int64(pinned_mem, 0, "pinned memory allocation to use");
38 
39 using namespace faiss::gpu;
40 
41 int main(int argc, char** argv) {
42  gflags::ParseCommandLineFlags(&argc, &argv, true);
43 
44  cudaProfilerStop();
45 
46  auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(nullptr);
47  printf("using seed %ld\n", seed);
48 
49  auto numQueries = FLAGS_num_queries;
50 
51  auto index = std::unique_ptr<faiss::IndexFlatL2>(
52  new faiss::IndexFlatL2(FLAGS_dim));
53 
54  HostTensor<float, 2, true> vecs({FLAGS_num, FLAGS_dim});
55  faiss::float_rand(vecs.data(), vecs.numElements(), seed);
56 
57  index->add(FLAGS_num, vecs.data());
58 
59  printf("Database: dim %d num vecs %d\n", FLAGS_dim, FLAGS_num);
60  printf("L2 lookup: %d queries, total k %d\n",
61  numQueries, FLAGS_k);
62  printf("float16 encoding %s\n", FLAGS_use_float16 ? "enabled" : "disabled");
63  printf("transposed storage %s\n", FLAGS_transposed ? "enabled" : "disabled");
64 
65  // Convert to GPU index
66  printf("Copying index to %d GPU(s)...\n", FLAGS_num_gpus);
67 
68  auto initFn = [&index](faiss::gpu::GpuResources* res, int dev) ->
69  std::unique_ptr<faiss::gpu::GpuIndexFlatL2> {
70  ((faiss::gpu::StandardGpuResources*) res)->setPinnedMemory(
71  FLAGS_pinned_mem);
72 
73  GpuIndexFlatConfig config;
74  config.device = dev;
75  config.useFloat16 = FLAGS_use_float16;
76  config.storeTransposed = FLAGS_transposed;
77 
78  auto p = std::unique_ptr<faiss::gpu::GpuIndexFlatL2>(
79  new faiss::gpu::GpuIndexFlatL2(res, index.get(), config));
80  return p;
81  };
82 
83  IndexWrapper<faiss::gpu::GpuIndexFlatL2> gpuIndex(FLAGS_num_gpus, initFn);
84  printf("copy done\n");
85 
86  // Build query vectors
87  HostTensor<float, 2, true> cpuQuery({numQueries, FLAGS_dim});
88  faiss::float_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
89 
90  // Time faiss CPU
91  HostTensor<float, 2, true> cpuDistances({numQueries, FLAGS_k});
92  HostTensor<faiss::Index::idx_t, 2, true> cpuIndices({numQueries, FLAGS_k});
93 
94  float cpuTime = 0.0f;
95 
96  {
97  CpuTimer timer;
98  index->search(numQueries,
99  cpuQuery.data(),
100  FLAGS_k,
101  cpuDistances.data(),
102  cpuIndices.data());
103 
104  cpuTime = timer.elapsedMilliseconds();
105  }
106 
107  printf("CPU time %.3f ms\n", cpuTime);
108 
109  HostTensor<float, 2, true> gpuDistances({numQueries, FLAGS_k});
110  HostTensor<faiss::Index::idx_t, 2, true> gpuIndices({numQueries, FLAGS_k});
111 
112  CUDA_VERIFY(cudaProfilerStart());
113  faiss::gpu::synchronizeAllDevices();
114 
115  float gpuTime = 0.0f;
116 
117  // Time GPU
118  {
119  CpuTimer timer;
120 
121  gpuIndex.getIndex()->search(cpuQuery.getSize(0),
122  cpuQuery.data(),
123  FLAGS_k,
124  gpuDistances.data(),
125  gpuIndices.data());
126 
127  // There is a device -> host copy above, so no need to time
128  // additional synchronization with the GPU
129  gpuTime = timer.elapsedMilliseconds();
130  }
131 
132  CUDA_VERIFY(cudaProfilerStop());
133  printf("GPU time %.3f ms\n", gpuTime);
134 
135  compareLists(cpuDistances.data(), cpuIndices.data(),
136  gpuDistances.data(), gpuIndices.data(),
137  numQueries, FLAGS_k,
138  "", true, FLAGS_diff, false);
139 
140  CUDA_VERIFY(cudaDeviceSynchronize());
141  // printf("\ncudaMalloc usage %zd\n",
142  // resources.getMemoryManager().getHighWaterCudaMalloc());
143 
144  return 0;
145 }
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
Definition: Timer.cpp:53
CPU wallclock elapsed timer.
Definition: Timer.h:43