Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
TestUtils.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #include "../test/TestUtils.h"
12 #include "../../utils.h"
13 #include <cmath>
14 #include <gtest/gtest.h>
15 #include <set>
16 #include <sstream>
17 #include <time.h>
18 #include <unordered_map>
19 
20 namespace faiss { namespace gpu {
21 
22 inline float relativeError(float a, float b) {
23  return std::abs(a - b) / (0.5f * (std::abs(a) + std::abs(b)));
24 }
25 
26 // This seed is also used for the faiss float_rand API; in a test it
27 // is all within a single thread, so it is ok
28 long s_seed = 1;
29 
30 void newTestSeed() {
31  struct timespec t;
32  clock_gettime(CLOCK_REALTIME, &t);
33 
34  setTestSeed(t.tv_nsec);
35 }
36 
37 void setTestSeed(long seed) {
38  printf("testing with random seed %ld\n", seed);
39 
40  srand48(seed);
41  s_seed = seed;
42 }
43 
44 int randVal(int a, int b) {
45  EXPECT_GE(a, 0);
46  EXPECT_LE(a, b);
47 
48  return a + (lrand48() % (b + 1 - a));
49 }
50 
51 bool randBool() {
52  return randSelect<bool>({true, false});
53 }
54 
55 std::vector<float> randVecs(size_t num, size_t dim) {
56  std::vector<float> v(num * dim);
57  static bool first = true;
58 
59  faiss::float_rand(v.data(), v.size(), s_seed);
60  // unfortunately we generate separate sets of vectors, and don't
61  // want the same values
62  ++s_seed;
63 
64  return v;
65 }
66 
67 void compareIndices(const std::vector<float>& queryVecs,
68  faiss::Index& refIndex,
69  faiss::Index& testIndex,
70  int numQuery, int dim, int k,
71  const std::string& configMsg,
72  float maxRelativeError,
73  float pctMaxDiff1,
74  float pctMaxDiffN) {
75  // Compare
76  std::vector<float> refDistance(numQuery * k, 0);
77  std::vector<faiss::Index::idx_t> refIndices(numQuery * k, -1);
78  refIndex.search(numQuery, queryVecs.data(),
79  k, refDistance.data(), refIndices.data());
80 
81  std::vector<float> testDistance(numQuery * k, 0);
82  std::vector<faiss::Index::idx_t> testIndices(numQuery * k, -1);
83  testIndex.search(numQuery, queryVecs.data(),
84  k, testDistance.data(), testIndices.data());
85 
86  faiss::gpu::compareLists(refDistance.data(),
87  refIndices.data(),
88  testDistance.data(),
89  testIndices.data(),
90  numQuery, k,
91  configMsg,
92  true, false, true,
93  maxRelativeError, pctMaxDiff1, pctMaxDiffN);
94 }
95 
96 void compareIndices(faiss::Index& refIndex,
97  faiss::Index& testIndex,
98  int numQuery, int dim, int k,
99  const std::string& configMsg,
100  float maxRelativeError,
101  float pctMaxDiff1,
102  float pctMaxDiffN) {
103  auto queryVecs = faiss::gpu::randVecs(numQuery, dim);
104 
105  compareIndices(queryVecs,
106  refIndex,
107  testIndex,
108  numQuery, dim, k,
109  configMsg,
110  maxRelativeError,
111  pctMaxDiff1,
112  pctMaxDiffN);
113 }
114 
115 template <typename T>
116 inline T lookup(const T* p, int i, int j, int /*dim1*/, int dim2) {
117  return p[i * dim2 + j];
118 }
119 
120 void compareLists(const float* refDist,
121  const faiss::Index::idx_t* refInd,
122  const float* testDist,
123  const faiss::Index::idx_t* testInd,
124  int dim1, int dim2,
125  const std::string& configMsg,
126  bool printBasicStats, bool printDiffs, bool assertOnErr,
127  float maxRelativeError,
128  float pctMaxDiff1,
129  float pctMaxDiffN) {
130 
131  float maxAbsErr = 0.0f;
132  for (int i = 0; i < dim1 * dim2; ++i) {
133  maxAbsErr = std::max(maxAbsErr, std::abs(refDist[i] - testDist[i]));
134  }
135  int numResults = dim1 * dim2;
136 
137  // query -> {index -> result position}
138  std::vector<std::unordered_map<faiss::Index::idx_t, int>> refIndexMap;
139 
140  for (int query = 0; query < dim1; ++query) {
141  std::unordered_map<faiss::Index::idx_t, int> indices;
142 
143  for (int result = 0; result < dim2; ++result) {
144  indices[lookup(refInd, query, result, dim1, dim2)] = result;
145  }
146 
147  refIndexMap.emplace_back(std::move(indices));
148  }
149 
150  // See how far off the indices are
151  // Keep track of the difference for each entry
152  std::vector<std::vector<int>> indexDiffs;
153 
154  int diff1 = 0; // index differs by 1
155  int diffN = 0; // index differs by >1
156  int diffInf = 0; // index not found in the other
157  int nonUniqueIndices = 0;
158 
159  double avgDiff = 0.0;
160  int maxDiff = 0;
161  float maxRelErr = 0.0f;
162 
163  for (int query = 0; query < dim1; ++query) {
164  std::vector<int> diffs;
165  std::set<faiss::Index::idx_t> uniqueIndices;
166 
167  auto& indices = refIndexMap[query];
168 
169  for (int result = 0; result < dim2; ++result) {
170  auto t = lookup(testInd, query, result, dim1, dim2);
171 
172  // All indices reported within a query should be unique; this is
173  // a serious error if is otherwise the case
174  bool uniqueIndex = uniqueIndices.count(t) == 0;
175  if (assertOnErr) {
176  EXPECT_TRUE(uniqueIndex) << configMsg
177  << " " << query
178  << " " << result
179  << " " << t;
180  }
181 
182  if (!uniqueIndex) {
183  ++nonUniqueIndices;
184  } else {
185  uniqueIndices.insert(t);
186  }
187 
188  auto it = indices.find(t);
189  if (it != indices.end()) {
190  int diff = std::abs(result - it->second);
191  diffs.push_back(diff);
192 
193  if (diff == 1) {
194  ++diff1;
195  maxDiff = std::max(diff, maxDiff);
196  } else if (diff > 1) {
197  ++diffN;
198  maxDiff = std::max(diff, maxDiff);
199  }
200 
201  avgDiff += (double) diff;
202  } else {
203  ++diffInf;
204  diffs.push_back(-1);
205  // don't count this for maxDiff
206  }
207 
208  auto refD = lookup(refDist, query, result, dim1, dim2);
209  auto testD = lookup(testDist, query, result, dim1, dim2);
210 
211  float relErr = relativeError(refD, testD);
212 
213  if (assertOnErr) {
214  EXPECT_LE(relErr, maxRelativeError) << configMsg
215  << " (" << query << ", " << result
216  << ") refD: " << refD
217  << " testD: " << testD;
218  }
219 
220  maxRelErr = std::max(maxRelErr, relErr);
221  }
222 
223  indexDiffs.emplace_back(std::move(diffs));
224  }
225 
226  if (assertOnErr) {
227  EXPECT_LE((float) (diff1 + diffN + diffInf),
228  (float) numResults * pctMaxDiff1) << configMsg;
229 
230  // Don't count diffInf because that could be diff1 as far as we
231  // know
232  EXPECT_LE((float) diffN, (float) numResults * pctMaxDiffN) << configMsg;
233  }
234 
235  avgDiff /= (double) numResults;
236 
237  if (printBasicStats) {
238  if (!configMsg.empty()) {
239  printf("Config\n"
240  "----------------------------\n"
241  "%s\n",
242  configMsg.c_str());
243  }
244 
245  printf("Result error and differences\n"
246  "----------------------------\n"
247  "max abs diff %.7f rel diff %.7f\n"
248  "idx diff avg: %.5g max: %d\n"
249  "idx diff of 1: %d (%.3f%% of queries)\n"
250  "idx diff of >1: %d (%.3f%% of queries)\n"
251  "idx diff not found: %d (%.3f%% of queries)"
252  " [typically a last element inversion]\n"
253  "non-unique indices: %d (a serious error if >0)\n",
254  maxAbsErr, maxRelErr,
255  avgDiff, maxDiff,
256  diff1, 100.0f * (float) diff1 / (float) numResults,
257  diffN, 100.0f * (float) diffN / (float) numResults,
258  diffInf, 100.0f * (float) diffInf / (float) numResults,
259  nonUniqueIndices);
260  }
261 
262  if (printDiffs) {
263  printf("differences:\n");
264  printf("==================\n");
265  for (int query = 0; query < dim1; ++query) {
266  for (int result = 0; result < dim2; ++result) {
267  long refI = lookup(refInd, query, result, dim1, dim2);
268  long testI = lookup(testInd, query, result, dim1, dim2);
269 
270  if (refI != testI) {
271  float refD = lookup(refDist, query, result, dim1, dim2);
272  float testD = lookup(testDist, query, result, dim1, dim2);
273 
274  float maxDist = std::max(refD, testD);
275  float delta = std::abs(refD - testD);
276 
277  float relErr = delta / maxDist;
278 
279  if (refD == testD) {
280  printf("(%d, %d [%d]) (ref %ld tst %ld dist ==)\n",
281  query, result,
282  indexDiffs[query][result],
283  refI, testI);
284  } else {
285  printf("(%d, %d [%d]) (ref %ld tst %ld abs %.8f "
286  "rel %.8f ref %a tst %a)\n",
287  query, result,
288  indexDiffs[query][result],
289  refI, testI, delta, relErr, refD, testD);
290  }
291  }
292  }
293  }
294  }
295 }
296 
297 } }
long idx_t
all indices are this type
Definition: Index.h:62
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0