Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
TestUtils.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 
10 #include "../test/TestUtils.h"
11 #include "../../utils.h"
12 #include <cmath>
13 #include <gtest/gtest.h>
14 #include <set>
15 #include <sstream>
16 #include <time.h>
17 #include <unordered_map>
18 
19 namespace faiss { namespace gpu {
20 
21 inline float relativeError(float a, float b) {
22  return std::abs(a - b) / (0.5f * (std::abs(a) + std::abs(b)));
23 }
24 
25 // This seed is also used for the faiss float_rand API; in a test it
26 // is all within a single thread, so it is ok
27 long s_seed = 1;
28 
29 void newTestSeed() {
30  struct timespec t;
31  clock_gettime(CLOCK_REALTIME, &t);
32 
33  setTestSeed(t.tv_nsec);
34 }
35 
36 void setTestSeed(long seed) {
37  printf("testing with random seed %ld\n", seed);
38 
39  srand48(seed);
40  s_seed = seed;
41 }
42 
43 int randVal(int a, int b) {
44  EXPECT_GE(a, 0);
45  EXPECT_LE(a, b);
46 
47  return a + (lrand48() % (b + 1 - a));
48 }
49 
50 bool randBool() {
51  return randSelect<bool>({true, false});
52 }
53 
54 std::vector<float> randVecs(size_t num, size_t dim) {
55  std::vector<float> v(num * dim);
56 
57  faiss::float_rand(v.data(), v.size(), s_seed);
58  // unfortunately we generate separate sets of vectors, and don't
59  // want the same values
60  ++s_seed;
61 
62  return v;
63 }
64 
65 std::vector<unsigned char> randBinaryVecs(size_t num, size_t dim) {
66  std::vector<unsigned char> v(num * (dim / 8));
67 
68  faiss::byte_rand(v.data(), v.size(), s_seed);
69  // unfortunately we generate separate sets of vectors, and don't
70  // want the same values
71  ++s_seed;
72 
73  return v;
74 }
75 
76 void compareIndices(
77  const std::vector<float>& queryVecs,
78  faiss::Index& refIndex,
79  faiss::Index& testIndex,
80  int numQuery,
81  int /*dim*/,
82  int k,
83  const std::string& configMsg,
84  float maxRelativeError,
85  float pctMaxDiff1,
86  float pctMaxDiffN) {
87  // Compare
88  std::vector<float> refDistance(numQuery * k, 0);
89  std::vector<faiss::Index::idx_t> refIndices(numQuery * k, -1);
90  refIndex.search(numQuery, queryVecs.data(),
91  k, refDistance.data(), refIndices.data());
92 
93  std::vector<float> testDistance(numQuery * k, 0);
94  std::vector<faiss::Index::idx_t> testIndices(numQuery * k, -1);
95  testIndex.search(numQuery, queryVecs.data(),
96  k, testDistance.data(), testIndices.data());
97 
98  faiss::gpu::compareLists(refDistance.data(),
99  refIndices.data(),
100  testDistance.data(),
101  testIndices.data(),
102  numQuery, k,
103  configMsg,
104  true, false, true,
105  maxRelativeError, pctMaxDiff1, pctMaxDiffN);
106 }
107 
108 void compareIndices(faiss::Index& refIndex,
109  faiss::Index& testIndex,
110  int numQuery, int dim, int k,
111  const std::string& configMsg,
112  float maxRelativeError,
113  float pctMaxDiff1,
114  float pctMaxDiffN) {
115  auto queryVecs = faiss::gpu::randVecs(numQuery, dim);
116 
117  compareIndices(queryVecs,
118  refIndex,
119  testIndex,
120  numQuery, dim, k,
121  configMsg,
122  maxRelativeError,
123  pctMaxDiff1,
124  pctMaxDiffN);
125 }
126 
127 template <typename T>
128 inline T lookup(const T* p, int i, int j, int /*dim1*/, int dim2) {
129  return p[i * dim2 + j];
130 }
131 
132 void compareLists(const float* refDist,
133  const faiss::Index::idx_t* refInd,
134  const float* testDist,
135  const faiss::Index::idx_t* testInd,
136  int dim1, int dim2,
137  const std::string& configMsg,
138  bool printBasicStats, bool printDiffs, bool assertOnErr,
139  float maxRelativeError,
140  float pctMaxDiff1,
141  float pctMaxDiffN) {
142 
143  float maxAbsErr = 0.0f;
144  for (int i = 0; i < dim1 * dim2; ++i) {
145  maxAbsErr = std::max(maxAbsErr, std::abs(refDist[i] - testDist[i]));
146  }
147  int numResults = dim1 * dim2;
148 
149  // query -> {index -> result position}
150  std::vector<std::unordered_map<faiss::Index::idx_t, int>> refIndexMap;
151 
152  for (int query = 0; query < dim1; ++query) {
153  std::unordered_map<faiss::Index::idx_t, int> indices;
154 
155  for (int result = 0; result < dim2; ++result) {
156  indices[lookup(refInd, query, result, dim1, dim2)] = result;
157  }
158 
159  refIndexMap.emplace_back(std::move(indices));
160  }
161 
162  // See how far off the indices are
163  // Keep track of the difference for each entry
164  std::vector<std::vector<int>> indexDiffs;
165 
166  int diff1 = 0; // index differs by 1
167  int diffN = 0; // index differs by >1
168  int diffInf = 0; // index not found in the other
169  int nonUniqueIndices = 0;
170 
171  double avgDiff = 0.0;
172  int maxDiff = 0;
173  float maxRelErr = 0.0f;
174 
175  for (int query = 0; query < dim1; ++query) {
176  std::vector<int> diffs;
177  std::set<faiss::Index::idx_t> uniqueIndices;
178 
179  auto& indices = refIndexMap[query];
180 
181  for (int result = 0; result < dim2; ++result) {
182  auto t = lookup(testInd, query, result, dim1, dim2);
183 
184  // All indices reported within a query should be unique; this is
185  // a serious error if is otherwise the case
186  bool uniqueIndex = uniqueIndices.count(t) == 0;
187  if (assertOnErr) {
188  EXPECT_TRUE(uniqueIndex) << configMsg
189  << " " << query
190  << " " << result
191  << " " << t;
192  }
193 
194  if (!uniqueIndex) {
195  ++nonUniqueIndices;
196  } else {
197  uniqueIndices.insert(t);
198  }
199 
200  auto it = indices.find(t);
201  if (it != indices.end()) {
202  int diff = std::abs(result - it->second);
203  diffs.push_back(diff);
204 
205  if (diff == 1) {
206  ++diff1;
207  maxDiff = std::max(diff, maxDiff);
208  } else if (diff > 1) {
209  ++diffN;
210  maxDiff = std::max(diff, maxDiff);
211  }
212 
213  avgDiff += (double) diff;
214  } else {
215  ++diffInf;
216  diffs.push_back(-1);
217  // don't count this for maxDiff
218  }
219 
220  auto refD = lookup(refDist, query, result, dim1, dim2);
221  auto testD = lookup(testDist, query, result, dim1, dim2);
222 
223  float relErr = relativeError(refD, testD);
224 
225  if (assertOnErr) {
226  EXPECT_LE(relErr, maxRelativeError) << configMsg
227  << " (" << query << ", " << result
228  << ") refD: " << refD
229  << " testD: " << testD;
230  }
231 
232  maxRelErr = std::max(maxRelErr, relErr);
233  }
234 
235  indexDiffs.emplace_back(std::move(diffs));
236  }
237 
238  if (assertOnErr) {
239  EXPECT_LE((float) (diff1 + diffN + diffInf),
240  (float) numResults * pctMaxDiff1) << configMsg;
241 
242  // Don't count diffInf because that could be diff1 as far as we
243  // know
244  EXPECT_LE((float) diffN, (float) numResults * pctMaxDiffN) << configMsg;
245  }
246 
247  avgDiff /= (double) numResults;
248 
249  if (printBasicStats) {
250  if (!configMsg.empty()) {
251  printf("Config\n"
252  "----------------------------\n"
253  "%s\n",
254  configMsg.c_str());
255  }
256 
257  printf("Result error and differences\n"
258  "----------------------------\n"
259  "max abs diff %.7f rel diff %.7f\n"
260  "idx diff avg: %.5g max: %d\n"
261  "idx diff of 1: %d (%.3f%% of queries)\n"
262  "idx diff of >1: %d (%.3f%% of queries)\n"
263  "idx diff not found: %d (%.3f%% of queries)"
264  " [typically a last element inversion]\n"
265  "non-unique indices: %d (a serious error if >0)\n",
266  maxAbsErr, maxRelErr,
267  avgDiff, maxDiff,
268  diff1, 100.0f * (float) diff1 / (float) numResults,
269  diffN, 100.0f * (float) diffN / (float) numResults,
270  diffInf, 100.0f * (float) diffInf / (float) numResults,
271  nonUniqueIndices);
272  }
273 
274  if (printDiffs) {
275  printf("differences:\n");
276  printf("==================\n");
277  for (int query = 0; query < dim1; ++query) {
278  for (int result = 0; result < dim2; ++result) {
279  long refI = lookup(refInd, query, result, dim1, dim2);
280  long testI = lookup(testInd, query, result, dim1, dim2);
281 
282  if (refI != testI) {
283  float refD = lookup(refDist, query, result, dim1, dim2);
284  float testD = lookup(testDist, query, result, dim1, dim2);
285 
286  float maxDist = std::max(refD, testD);
287  float delta = std::abs(refD - testD);
288 
289  float relErr = delta / maxDist;
290 
291  if (refD == testD) {
292  printf("(%d, %d [%d]) (ref %ld tst %ld dist ==)\n",
293  query, result,
294  indexDiffs[query][result],
295  refI, testI);
296  } else {
297  printf("(%d, %d [%d]) (ref %ld tst %ld abs %.8f "
298  "rel %.8f ref %a tst %a)\n",
299  query, result,
300  indexDiffs[query][result],
301  refI, testI, delta, relErr, refD, testD);
302  }
303  }
304  }
305  }
306  }
307 }
308 
309 } }
long idx_t
all indices are this type
Definition: Index.h:64
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0