12 #include "../../utils.h"
13 #include "../../Clustering.h"
14 #include "../GpuIndexFlat.h"
15 #include "../StandardGpuResources.h"
16 #include "IndexWrapper.h"
17 #include "../utils/DeviceUtils.h"
18 #include "../utils/Timer.h"
19 #include <gflags/gflags.h>
23 #include <cuda_profiler_api.h>
25 DEFINE_int32(num, 10000,
"# of vecs");
26 DEFINE_int32(k, 100,
"# of clusters");
27 DEFINE_int32(dim, 128,
"# of dimensions");
28 DEFINE_int32(niter, 10,
"# of iterations");
29 DEFINE_bool(L2_metric,
true,
"If true, use L2 metric. If false, use IP metric");
30 DEFINE_bool(use_float16,
false,
"use float16 vectors and math");
31 DEFINE_bool(transposed,
false,
"transposed vector storage");
32 DEFINE_bool(verbose,
false,
"turn on clustering logging");
33 DEFINE_int64(seed, -1,
"specify random seed");
34 DEFINE_int32(num_gpus, 1,
"number of gpus to use");
35 DEFINE_int64(min_paging_size, -1,
"minimum size to use CPU -> GPU paged copies");
36 DEFINE_int64(pinned_mem, -1,
"pinned memory allocation to use");
37 DEFINE_int32(max_points, -1,
"max points per centroid");
39 using namespace faiss::gpu;
41 int main(
int argc,
char** argv) {
42 gflags::ParseCommandLineFlags(&argc, &argv,
true);
46 auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(
nullptr);
47 printf(
"using seed %ld\n", seed);
49 std::vector<float> vecs((
size_t) FLAGS_num * FLAGS_dim);
50 faiss::float_rand(vecs.data(), vecs.size(), seed);
52 printf(
"K-means metric %s dim %d centroids %d num train %d niter %d\n",
53 FLAGS_L2_metric ?
"L2" :
"IP",
54 FLAGS_dim, FLAGS_k, FLAGS_num, FLAGS_niter);
55 printf(
"float16 math %s\n", FLAGS_use_float16 ?
"enabled" :
"disabled");
56 printf(
"transposed storage %s\n", FLAGS_transposed ?
"enabled" :
"disabled");
57 printf(
"verbose %s\n", FLAGS_verbose ?
"enabled" :
"disabled");
60 std::unique_ptr<faiss::gpu::GpuIndexFlat> {
61 if (FLAGS_pinned_mem >= 0) {
68 config.useFloat16 = FLAGS_use_float16;
69 config.storeTransposed = FLAGS_transposed;
71 auto p = std::unique_ptr<faiss::gpu::GpuIndexFlat>(
78 if (FLAGS_min_paging_size >= 0) {
79 p->setMinPagingSize(FLAGS_min_paging_size);
86 CUDA_VERIFY(cudaProfilerStart());
87 faiss::gpu::synchronizeAllDevices();
92 cp.
niter = FLAGS_niter;
93 cp.verbose = FLAGS_verbose;
95 if (FLAGS_max_points > 0) {
105 kmeans.train(FLAGS_num, vecs.data(), *(gpuIndex.getIndex()));
112 CUDA_VERIFY(cudaProfilerStop());
113 printf(
"k-means time %.3f ms\n", gpuTime);
115 CUDA_VERIFY(cudaDeviceSynchronize());
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
int niter
clustering iterations
CPU wallclock elapsed timer.
int max_points_per_centroid
to limit size of dataset