faiss/gpu/perf/PerfClustering.cpp

117 lines
3.5 KiB
C++

/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "../../utils.h"
#include "../../Clustering.h"
#include "../GpuIndexFlat.h"
#include "../StandardGpuResources.h"
#include "IndexWrapper.h"
#include "../utils/DeviceUtils.h"
#include "../utils/Timer.h"
#include <gflags/gflags.h>
#include <memory>
#include <vector>
#include <cuda_profiler_api.h>
DEFINE_int32(num, 10000, "# of vecs");
DEFINE_int32(k, 100, "# of clusters");
DEFINE_int32(dim, 128, "# of dimensions");
DEFINE_int32(niter, 10, "# of iterations");
DEFINE_bool(L2_metric, true, "If true, use L2 metric. If false, use IP metric");
DEFINE_bool(use_float16, false, "use float16 vectors and math");
DEFINE_bool(transposed, false, "transposed vector storage");
DEFINE_bool(verbose, false, "turn on clustering logging");
DEFINE_int64(seed, -1, "specify random seed");
DEFINE_int32(num_gpus, 1, "number of gpus to use");
DEFINE_int64(min_paging_size, -1, "minimum size to use CPU -> GPU paged copies");
DEFINE_int64(pinned_mem, -1, "pinned memory allocation to use");
DEFINE_int32(max_points, -1, "max points per centroid");
using namespace faiss::gpu;
int main(int argc, char** argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
cudaProfilerStop();
auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(nullptr);
printf("using seed %ld\n", seed);
std::vector<float> vecs((size_t) FLAGS_num * FLAGS_dim);
faiss::float_rand(vecs.data(), vecs.size(), seed);
printf("K-means metric %s dim %d centroids %d num train %d niter %d\n",
FLAGS_L2_metric ? "L2" : "IP",
FLAGS_dim, FLAGS_k, FLAGS_num, FLAGS_niter);
printf("float16 math %s\n", FLAGS_use_float16 ? "enabled" : "disabled");
printf("transposed storage %s\n", FLAGS_transposed ? "enabled" : "disabled");
printf("verbose %s\n", FLAGS_verbose ? "enabled" : "disabled");
auto initFn = [](faiss::gpu::GpuResources* res, int dev) ->
std::unique_ptr<faiss::gpu::GpuIndexFlat> {
if (FLAGS_pinned_mem >= 0) {
((faiss::gpu::StandardGpuResources*) res)->setPinnedMemory(
FLAGS_pinned_mem);
}
GpuIndexFlatConfig config;
config.device = dev;
config.useFloat16 = FLAGS_use_float16;
config.storeTransposed = FLAGS_transposed;
auto p = std::unique_ptr<faiss::gpu::GpuIndexFlat>(
FLAGS_L2_metric ?
(faiss::gpu::GpuIndexFlat*)
new faiss::gpu::GpuIndexFlatL2(res, FLAGS_dim, config) :
(faiss::gpu::GpuIndexFlat*)
new faiss::gpu::GpuIndexFlatIP(res, FLAGS_dim, config));
if (FLAGS_min_paging_size >= 0) {
p->setMinPagingSize(FLAGS_min_paging_size);
}
return p;
};
IndexWrapper<faiss::gpu::GpuIndexFlat> gpuIndex(FLAGS_num_gpus, initFn);
CUDA_VERIFY(cudaProfilerStart());
faiss::gpu::synchronizeAllDevices();
float gpuTime = 0.0f;
faiss::ClusteringParameters cp;
cp.niter = FLAGS_niter;
cp.verbose = FLAGS_verbose;
if (FLAGS_max_points > 0) {
cp.max_points_per_centroid = FLAGS_max_points;
}
faiss::Clustering kmeans(FLAGS_dim, FLAGS_k, cp);
// Time k-means
{
CpuTimer timer;
kmeans.train(FLAGS_num, vecs.data(), *(gpuIndex.getIndex()));
// There is a device -> host copy above, so no need to time
// additional synchronization with the GPU
gpuTime = timer.elapsedMilliseconds();
}
CUDA_VERIFY(cudaProfilerStop());
printf("k-means time %.3f ms\n", gpuTime);
CUDA_VERIFY(cudaDeviceSynchronize());
return 0;
}