11 #include "../../utils.h"
12 #include "../../Clustering.h"
13 #include "../GpuIndexFlat.h"
14 #include "../StandardGpuResources.h"
15 #include "IndexWrapper.h"
16 #include "../utils/DeviceUtils.h"
17 #include "../utils/Timer.h"
18 #include <gflags/gflags.h>
22 #include <cuda_profiler_api.h>
24 DEFINE_int32(num, 10000,
"# of vecs");
25 DEFINE_int32(k, 100,
"# of clusters");
26 DEFINE_int32(dim, 128,
"# of dimensions");
27 DEFINE_int32(niter, 10,
"# of iterations");
28 DEFINE_bool(L2_metric,
true,
"If true, use L2 metric. If false, use IP metric");
29 DEFINE_bool(use_float16,
false,
"use float16 vectors and math");
30 DEFINE_bool(transposed,
false,
"transposed vector storage");
31 DEFINE_bool(verbose,
false,
"turn on clustering logging");
32 DEFINE_int64(seed, -1,
"specify random seed");
33 DEFINE_int32(num_gpus, 1,
"number of gpus to use");
34 DEFINE_int64(min_paging_size, -1,
"minimum size to use CPU -> GPU paged copies");
35 DEFINE_int64(pinned_mem, -1,
"pinned memory allocation to use");
36 DEFINE_int32(max_points, -1,
"max points per centroid");
38 using namespace faiss::gpu;
40 int main(
int argc,
char** argv) {
41 gflags::ParseCommandLineFlags(&argc, &argv,
true);
45 auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(
nullptr);
46 printf(
"using seed %ld\n", seed);
48 std::vector<float> vecs((
size_t) FLAGS_num * FLAGS_dim);
49 faiss::float_rand(vecs.data(), vecs.size(), seed);
51 printf(
"K-means metric %s dim %d centroids %d num train %d niter %d\n",
52 FLAGS_L2_metric ?
"L2" :
"IP",
53 FLAGS_dim, FLAGS_k, FLAGS_num, FLAGS_niter);
54 printf(
"float16 math %s\n", FLAGS_use_float16 ?
"enabled" :
"disabled");
55 printf(
"transposed storage %s\n", FLAGS_transposed ?
"enabled" :
"disabled");
56 printf(
"verbose %s\n", FLAGS_verbose ?
"enabled" :
"disabled");
59 std::unique_ptr<faiss::gpu::GpuIndexFlat> {
60 if (FLAGS_pinned_mem >= 0) {
70 auto p = std::unique_ptr<faiss::gpu::GpuIndexFlat>(
77 if (FLAGS_min_paging_size >= 0) {
78 p->setMinPagingSize(FLAGS_min_paging_size);
85 CUDA_VERIFY(cudaProfilerStart());
86 faiss::gpu::synchronizeAllDevices();
91 cp.
niter = FLAGS_niter;
92 cp.verbose = FLAGS_verbose;
94 if (FLAGS_max_points > 0) {
104 kmeans.train(FLAGS_num, vecs.data(), *(gpuIndex.getIndex()));
111 CUDA_VERIFY(cudaProfilerStop());
112 printf(
"k-means time %.3f ms\n", gpuTime);
114 CUDA_VERIFY(cudaDeviceSynchronize());
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
int niter
clustering iterations
CPU wallclock elapsed timer.
bool useFloat16
Whether or not data is stored as float16.
int device
GPU device on which the index is resident.
int max_points_per_centroid
to limit size of dataset