11 #include "../utils/DeviceUtils.h"
12 #include "../utils/BlockSelectKernel.cuh"
13 #include "../utils/WarpSelectKernel.cuh"
14 #include "../utils/HostTensor.cuh"
15 #include "../utils/DeviceTensor.cuh"
16 #include "../test/TestUtils.h"
18 #include <gflags/gflags.h>
19 #include <gtest/gtest.h>
21 #include <unordered_map>
24 DEFINE_int32(rows, 10000,
"rows in matrix");
25 DEFINE_int32(cols, 40000,
"cols in matrix");
26 DEFINE_int32(k, 100,
"k");
27 DEFINE_bool(dir,
false,
"direction of sort");
28 DEFINE_bool(warp,
false,
"warp select");
29 DEFINE_int32(iter, 5,
"iterations to run");
30 DEFINE_bool(k_powers,
false,
"test k powers of 2 from 1 -> 1024");
32 int main(
int argc,
char** argv) {
33 gflags::ParseCommandLineFlags(&argc, &argv,
true);
35 std::vector<float> v = faiss::gpu::randVecs(FLAGS_rows, FLAGS_cols);
38 for (
int r = 0; r < FLAGS_rows; ++r) {
39 for (
int c = 0; c < FLAGS_cols; ++c) {
40 hostVal[r][c] = v[r * FLAGS_cols + c];
59 for (
int k = startK; k <= limitK; k *= 2) {
60 for (
int i = 0; i < FLAGS_iter; ++i) {
62 faiss::gpu::runWarpSelect(gpuVal, gpuOutVal, gpuOutInd,
65 faiss::gpu::runBlockSelect(gpuVal, gpuOutVal, gpuOutInd,
71 cudaDeviceSynchronize();