Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
PerfClustering.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #include "../../utils.h"
12 #include "../../Clustering.h"
13 #include "../GpuIndexFlat.h"
14 #include "../StandardGpuResources.h"
15 #include "IndexWrapper.h"
16 #include "../utils/DeviceUtils.h"
17 #include "../utils/Timer.h"
18 #include <gflags/gflags.h>
19 #include <memory>
20 #include <vector>
21 
22 #include <cuda_profiler_api.h>
23 
24 DEFINE_int32(num, 10000, "# of vecs");
25 DEFINE_int32(k, 100, "# of clusters");
26 DEFINE_int32(dim, 128, "# of dimensions");
27 DEFINE_int32(niter, 10, "# of iterations");
28 DEFINE_bool(L2_metric, true, "If true, use L2 metric. If false, use IP metric");
29 DEFINE_bool(use_float16, false, "use float16 vectors and math");
30 DEFINE_bool(transposed, false, "transposed vector storage");
31 DEFINE_bool(verbose, false, "turn on clustering logging");
32 DEFINE_int64(seed, -1, "specify random seed");
33 DEFINE_int32(num_gpus, 1, "number of gpus to use");
34 DEFINE_int64(min_paging_size, -1, "minimum size to use CPU -> GPU paged copies");
35 DEFINE_int64(pinned_mem, -1, "pinned memory allocation to use");
36 DEFINE_int32(max_points, -1, "max points per centroid");
37 
38 using namespace faiss::gpu;
39 
40 int main(int argc, char** argv) {
41  gflags::ParseCommandLineFlags(&argc, &argv, true);
42 
43  cudaProfilerStop();
44 
45  auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(nullptr);
46  printf("using seed %ld\n", seed);
47 
48  std::vector<float> vecs((size_t) FLAGS_num * FLAGS_dim);
49  faiss::float_rand(vecs.data(), vecs.size(), seed);
50 
51  printf("K-means metric %s dim %d centroids %d num train %d niter %d\n",
52  FLAGS_L2_metric ? "L2" : "IP",
53  FLAGS_dim, FLAGS_k, FLAGS_num, FLAGS_niter);
54  printf("float16 math %s\n", FLAGS_use_float16 ? "enabled" : "disabled");
55  printf("transposed storage %s\n", FLAGS_transposed ? "enabled" : "disabled");
56  printf("verbose %s\n", FLAGS_verbose ? "enabled" : "disabled");
57 
58  auto initFn = [](faiss::gpu::GpuResources* res, int dev) ->
59  std::unique_ptr<faiss::gpu::GpuIndexFlat> {
60  if (FLAGS_pinned_mem >= 0) {
61  ((faiss::gpu::StandardGpuResources*) res)->setPinnedMemory(
62  FLAGS_pinned_mem);
63  }
64 
65  GpuIndexFlatConfig config;
66  config.device = dev;
67  config.useFloat16 = FLAGS_use_float16;
68  config.storeTransposed = FLAGS_transposed;
69 
70  auto p = std::unique_ptr<faiss::gpu::GpuIndexFlat>(
71  FLAGS_L2_metric ?
73  new faiss::gpu::GpuIndexFlatL2(res, FLAGS_dim, config) :
74  (faiss::gpu::GpuIndexFlat*)
75  new faiss::gpu::GpuIndexFlatIP(res, FLAGS_dim, config));
76 
77  if (FLAGS_min_paging_size >= 0) {
78  p->setMinPagingSize(FLAGS_min_paging_size);
79  }
80  return p;
81  };
82 
83  IndexWrapper<faiss::gpu::GpuIndexFlat> gpuIndex(FLAGS_num_gpus, initFn);
84 
85  CUDA_VERIFY(cudaProfilerStart());
86  faiss::gpu::synchronizeAllDevices();
87 
88  float gpuTime = 0.0f;
89 
91  cp.niter = FLAGS_niter;
92  cp.verbose = FLAGS_verbose;
93 
94  if (FLAGS_max_points > 0) {
95  cp.max_points_per_centroid = FLAGS_max_points;
96  }
97 
98  faiss::Clustering kmeans(FLAGS_dim, FLAGS_k, cp);
99 
100  // Time k-means
101  {
102  CpuTimer timer;
103 
104  kmeans.train(FLAGS_num, vecs.data(), *(gpuIndex.getIndex()));
105 
106  // There is a device -> host copy above, so no need to time
107  // additional synchronization with the GPU
108  gpuTime = timer.elapsedMilliseconds();
109  }
110 
111  CUDA_VERIFY(cudaProfilerStop());
112  printf("k-means time %.3f ms\n", gpuTime);
113 
114  CUDA_VERIFY(cudaDeviceSynchronize());
115 
116  return 0;
117 }
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
Definition: Timer.cpp:52
int niter
clustering iterations
Definition: Clustering.h:25
CPU wallclock elapsed timer.
Definition: Timer.h:42
bool useFloat16
Whether or not data is stored as float16.
Definition: GpuIndexFlat.h:35
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:27
int max_points_per_centroid
to limit size of dataset
Definition: Clustering.h:33