12 #include "../../utils.h"
13 #include "../../Clustering.h"
14 #include "../GpuIndexFlat.h"
15 #include "../StandardGpuResources.h"
16 #include "IndexWrapper.h"
17 #include "../utils/DeviceUtils.h"
18 #include "../utils/Timer.h"
19 #include <gflags/gflags.h>
23 #include <cuda_profiler_api.h>
25 DEFINE_int32(num, 10000,
"# of vecs");
26 DEFINE_int32(k, 100,
"# of clusters");
27 DEFINE_int32(dim, 128,
"# of dimensions");
28 DEFINE_int32(niter, 10,
"# of iterations");
29 DEFINE_bool(L2_metric,
true,
"If true, use L2 metric. If false, use IP metric");
30 DEFINE_bool(use_float16,
false,
"use float16 vectors and math");
31 DEFINE_bool(verbose,
false,
"turn on clustering logging");
32 DEFINE_int64(seed, -1,
"specify random seed");
33 DEFINE_int32(num_gpus, 1,
"number of gpus to use");
34 DEFINE_int64(min_paging_size, -1,
"minimum size to use CPU -> GPU paged copies");
35 DEFINE_int64(pinned_mem, -1,
"pinned memory allocation to use");
36 DEFINE_int32(max_points, -1,
"max points per centroid");
38 using namespace faiss::gpu;
40 int main(
int argc,
char** argv) {
41 google::ParseCommandLineFlags(&argc, &argv,
true);
45 auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(
nullptr);
46 printf(
"using seed %ld\n", seed);
48 std::vector<float> vecs((
size_t) FLAGS_num * FLAGS_dim);
49 faiss::float_rand(vecs.data(), vecs.size(), seed);
51 printf(
"K-means metric %s dim %d centroids %d num train %d niter %d\n",
52 FLAGS_L2_metric ?
"L2" :
"IP",
53 FLAGS_dim, FLAGS_k, FLAGS_num, FLAGS_niter);
54 printf(
"float16 math %s\n", FLAGS_use_float16 ?
"enabled" :
"disabled");
55 printf(
"verbose %s\n", FLAGS_verbose ?
"enabled" :
"disabled");
58 std::unique_ptr<faiss::gpu::GpuIndexFlat> {
59 if (FLAGS_pinned_mem >= 0) {
64 auto p = std::unique_ptr<faiss::gpu::GpuIndexFlat>(
69 new faiss::gpu::
GpuIndexFlatIP(res, dev, FLAGS_dim, FLAGS_use_float16));
71 if (FLAGS_min_paging_size >= 0) {
72 p->setMinPagingSize(FLAGS_min_paging_size);
79 CUDA_VERIFY(cudaProfilerStart());
80 faiss::gpu::synchronizeAllDevices();
85 cp.
niter = FLAGS_niter;
86 cp.verbose = FLAGS_verbose;
88 if (FLAGS_max_points > 0) {
98 kmeans.train(FLAGS_num, vecs.data(), *(gpuIndex.getIndex()));
105 CUDA_VERIFY(cudaProfilerStop());
106 printf(
"k-means time %.3f ms\n", gpuTime);
108 CUDA_VERIFY(cudaDeviceSynchronize());
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
int niter
clustering iterations
CPU wallclock elapsed timer.
int max_points_per_centroid
to limit size of dataset