Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
TestGpuSelect.cu
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #include "../utils/DeviceUtils.h"
12 #include "../utils/BlockSelectKernel.cuh"
13 #include "../utils/WarpSelectKernel.cuh"
14 #include "../utils/HostTensor.cuh"
15 #include "../utils/DeviceTensor.cuh"
16 #include "../test/TestUtils.h"
17 #include <algorithm>
18 #include <gtest/gtest.h>
19 #include <sstream>
20 #include <unordered_map>
21 #include <vector>
22 
23 int main(int argc, char** argv) {
24  testing::InitGoogleTest(&argc, argv);
25  return RUN_ALL_TESTS();
26 }
27 
28 void testForSize(int rows, int cols, int k, bool dir, bool warp) {
29  std::vector<float> v = faiss::gpu::randVecs(rows, cols);
30  faiss::gpu::HostTensor<float, 2, true> hostVal({rows, cols});
31 
32  for (int r = 0; r < rows; ++r) {
33  for (int c = 0; c < cols; ++c) {
34  hostVal[r][c] = v[r * cols + c];
35  }
36  }
37 
38  // row -> (val -> idx)
39  std::unordered_map<int, std::vector<std::pair<int, float>>> hostOutValAndInd;
40  for (int r = 0; r < rows; ++r) {
41  std::vector<std::pair<int, float>> closest;
42 
43  for (int c = 0; c < cols; ++c) {
44  closest.emplace_back(c, (float) hostVal[r][c]);
45  }
46 
47  auto dirFalseFn =
48  [](std::pair<int, float>& a, std::pair<int, float>& b) {
49  return a.second < b.second;
50  };
51  auto dirTrueFn =
52  [](std::pair<int, float>& a, std::pair<int, float>& b) {
53  return a.second > b.second;
54  };
55 
56  std::sort(closest.begin(), closest.end(), dir ? dirTrueFn : dirFalseFn);
57  hostOutValAndInd.emplace(r, closest);
58  }
59 
60  // Select top-k on GPU
62  faiss::gpu::DeviceTensor<float, 2, true> gpuOutVal({rows, k});
63  faiss::gpu::DeviceTensor<int, 2, true> gpuOutInd({rows, k});
64 
65  if (warp) {
66  faiss::gpu::runWarpSelect(gpuVal, gpuOutVal, gpuOutInd, dir, k, 0);
67  } else {
68  faiss::gpu::runBlockSelect(gpuVal, gpuOutVal, gpuOutInd, dir, k, 0);
69  }
70 
71  // Copy back to CPU
72  faiss::gpu::HostTensor<float, 2, true> outVal(gpuOutVal, 0);
73  faiss::gpu::HostTensor<int, 2, true> outInd(gpuOutInd, 0);
74 
75  for (int r = 0; r < rows; ++r) {
76  std::unordered_map<int, int> seenIndices;
77 
78  for (int i = 0; i < k; ++i) {
79  float gpuV = outVal[r][i];
80  float cpuV = hostOutValAndInd[r][i].second;
81 
82  EXPECT_EQ(gpuV, cpuV) <<
83  "rows " << rows << " cols " << cols << " k " << k << " dir " << dir
84  << " row " << r << " ind " << i;
85 
86  // If there are identical elements in a row that should be
87  // within the top-k, then it is possible that the index can
88  // differ, because the order in which the GPU will see the
89  // equivalent values is different than the CPU (and will remain
90  // unspecified, since this is affected by the choice of
91  // k-selection algorithm that we use)
92  int gpuInd = outInd[r][i];
93  int cpuInd = hostOutValAndInd[r][i].first;
94 
95  // We should never see duplicate indices, however
96  auto itSeenIndex = seenIndices.find(gpuInd);
97 
98  EXPECT_EQ(itSeenIndex, seenIndices.end()) <<
99  "Row " << r << " user index " << gpuInd << " was seen at both " <<
100  itSeenIndex->second << " and " << i;
101 
102  seenIndices[gpuInd] = i;
103 
104  if (gpuInd != cpuInd) {
105  // Gather the values from the original data via index; the
106  // values should be the same
107  float gpuGatherV = hostVal[r][gpuInd];
108  float cpuGatherV = hostVal[r][cpuInd];
109 
110  EXPECT_EQ(gpuGatherV, cpuGatherV) <<
111  "rows " << rows << " cols " << cols << " k " << k << " dir " << dir
112  << " row " << r << " ind " << i << " source ind "
113  << gpuInd << " " << cpuInd;
114  }
115  }
116  }
117 }
118 
119 // General test
120 TEST(TestGpuSelect, test) {
121  for (int i = 0; i < 10; ++i) {
122  int rows = faiss::gpu::randVal(10, 100);
123  int cols = faiss::gpu::randVal(1, 30000);
124  int k = std::min(cols, faiss::gpu::randVal(1, 1024));
125  bool dir = faiss::gpu::randBool();
126 
127  testForSize(rows, cols, k, dir, false);
128  }
129 }
130 
131 // Test for k = 1
132 TEST(TestGpuSelect, test1) {
133  for (int i = 0; i < 5; ++i) {
134  int rows = faiss::gpu::randVal(10, 100);
135  int cols = faiss::gpu::randVal(1, 30000);
136  bool dir = faiss::gpu::randBool();
137 
138  testForSize(rows, cols, 1, dir, false);
139  }
140 }
141 
142 // Test for where k = #cols exactly (we are returning all the values,
143 // just sorted)
144 TEST(TestGpuSelect, testExact) {
145  for (int i = 0; i < 5; ++i) {
146  int rows = faiss::gpu::randVal(10, 100);
147  int cols = faiss::gpu::randVal(1, 1024);
148  bool dir = faiss::gpu::randBool();
149 
150  testForSize(rows, cols, cols, dir, false);
151  }
152 }
153 
154 // General test
155 TEST(TestGpuSelect, testWarp) {
156  for (int i = 0; i < 10; ++i) {
157  int rows = faiss::gpu::randVal(10, 100);
158  int cols = faiss::gpu::randVal(1, 30000);
159  int k = std::min(cols, faiss::gpu::randVal(1, 1024));
160  bool dir = faiss::gpu::randBool();
161 
162  testForSize(rows, cols, k, dir, true);
163  }
164 }
165 
166 // Test for k = 1
167 TEST(TestGpuSelect, test1Warp) {
168  for (int i = 0; i < 5; ++i) {
169  int rows = faiss::gpu::randVal(10, 100);
170  int cols = faiss::gpu::randVal(1, 30000);
171  bool dir = faiss::gpu::randBool();
172 
173  testForSize(rows, cols, 1, dir, true);
174  }
175 }
176 
177 // Test for where k = #cols exactly (we are returning all the values,
178 // just sorted)
179 TEST(TestGpuSelect, testExactWarp) {
180  for (int i = 0; i < 5; ++i) {
181  int rows = faiss::gpu::randVal(10, 100);
182  int cols = faiss::gpu::randVal(1, 1024);
183  bool dir = faiss::gpu::randBool();
184 
185  testForSize(rows, cols, cols, dir, true);
186  }
187 }