Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
TestUtils.cpp
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 
12 #include "../test/TestUtils.h"
13 #include "../../utils.h"
14 #include <cmath>
15 #include <gtest/gtest.h>
16 #include <set>
17 #include <sstream>
18 #include <time.h>
19 #include <unordered_map>
20 
21 namespace faiss { namespace gpu {
22 
23 inline float relativeError(float a, float b) {
24  return std::abs(a - b) / (0.5f * (std::abs(a) + std::abs(b)));
25 }
26 
27 // This seed is also used for the faiss float_rand API; in a test it
28 // is all within a single thread, so it is ok
29 long s_seed = 1;
30 
31 void newTestSeed() {
32  struct timespec t;
33  clock_gettime(CLOCK_REALTIME, &t);
34 
35  setTestSeed(t.tv_nsec);
36 }
37 
38 void setTestSeed(long seed) {
39  printf("testing with random seed %ld\n", seed);
40 
41  srand48(seed);
42  s_seed = seed;
43 }
44 
45 int randVal(int a, int b) {
46  EXPECT_GE(a, 0);
47  EXPECT_LE(a, b);
48 
49  return a + (lrand48() % (b + 1 - a));
50 }
51 
52 bool randBool() {
53  return randSelect<bool>({true, false});
54 }
55 
56 std::vector<float> randVecs(int num, int dim) {
57  std::vector<float> v(num * dim);
58  static bool first = true;
59 
60  faiss::float_rand(v.data(), v.size(), s_seed);
61  // unfortunately we generate separate sets of vectors, and don't
62  // want the same values
63  ++s_seed;
64 
65  return v;
66 }
67 
68 void compareIndices(faiss::Index& refIndex,
69  faiss::Index& testIndex,
70  int numQuery, int dim, int k,
71  const std::string& configMsg,
72  float maxRelativeError,
73  float pctMaxDiff1,
74  float pctMaxDiffN) {
75  auto queries = faiss::gpu::randVecs(numQuery, dim);
76 
77  // Compare
78  std::vector<float> refDistance(numQuery * k, 0);
79  std::vector<faiss::Index::idx_t> refIndices(numQuery * k, -1);
80  refIndex.search(numQuery, queries.data(),
81  k, refDistance.data(), refIndices.data());
82 
83  std::vector<float> testDistance(numQuery * k, 0);
84  std::vector<faiss::Index::idx_t> testIndices(numQuery * k, -1);
85  testIndex.search(numQuery, queries.data(),
86  k, testDistance.data(), testIndices.data());
87 
88  faiss::gpu::compareLists(refDistance.data(),
89  refIndices.data(),
90  testDistance.data(),
91  testIndices.data(),
92  numQuery, k,
93  configMsg,
94  true, false, true,
95  maxRelativeError, pctMaxDiff1, pctMaxDiffN);
96 }
97 
98 template <typename T>
99 inline T lookup(const T* p, int i, int j, int dim1, int dim2) {
100  return p[i * dim2 + j];
101 }
102 
103 void compareLists(const float* refDist,
104  const faiss::Index::idx_t* refInd,
105  const float* testDist,
106  const faiss::Index::idx_t* testInd,
107  int dim1, int dim2,
108  const std::string& configMsg,
109  bool printBasicStats, bool printDiffs, bool assertOnErr,
110  float maxRelativeError,
111  float pctMaxDiff1,
112  float pctMaxDiffN) {
113 
114  float maxAbsErr = 0.0f;
115  for (int i = 0; i < dim1 * dim2; ++i) {
116  maxAbsErr = std::max(maxAbsErr, std::abs(refDist[i] - testDist[i]));
117  }
118  int numResults = dim1 * dim2;
119 
120  // query -> {index -> result position}
121  std::vector<std::unordered_map<faiss::Index::idx_t, int>> refIndexMap;
122 
123  for (int query = 0; query < dim1; ++query) {
124  std::unordered_map<faiss::Index::idx_t, int> indices;
125 
126  for (int result = 0; result < dim2; ++result) {
127  indices[lookup(refInd, query, result, dim1, dim2)] = result;
128  }
129 
130  refIndexMap.emplace_back(std::move(indices));
131  }
132 
133  // See how far off the indices are
134  // Keep track of the difference for each entry
135  std::vector<std::vector<int>> indexDiffs;
136 
137  int diff1 = 0; // index differs by 1
138  int diffN = 0; // index differs by >1
139  int diffInf = 0; // index not found in the other
140  int nonUniqueIndices = 0;
141 
142  double avgDiff = 0.0;
143  int maxDiff = 0;
144  float maxRelErr = 0.0f;
145 
146  for (int query = 0; query < dim1; ++query) {
147  std::vector<int> diffs;
148  std::set<faiss::Index::idx_t> uniqueIndices;
149 
150  auto& indices = refIndexMap[query];
151 
152  for (int result = 0; result < dim2; ++result) {
153  auto t = lookup(testInd, query, result, dim1, dim2);
154 
155  // All indices reported within a query should be unique; this is
156  // a serious error if is otherwise the case
157  bool uniqueIndex = uniqueIndices.count(t) == 0;
158  if (assertOnErr) {
159  EXPECT_TRUE(uniqueIndex) << configMsg
160  << " " << query
161  << " " << result
162  << " " << t;
163  }
164 
165  if (!uniqueIndex) {
166  ++nonUniqueIndices;
167  } else {
168  uniqueIndices.insert(t);
169  }
170 
171  auto it = indices.find(t);
172  if (it != indices.end()) {
173  int diff = std::abs(result - it->second);
174  diffs.push_back(diff);
175 
176  if (diff == 1) {
177  ++diff1;
178  maxDiff = std::max(diff, maxDiff);
179  } else if (diff > 1) {
180  ++diffN;
181  maxDiff = std::max(diff, maxDiff);
182  }
183 
184  avgDiff += (double) diff;
185  } else {
186  ++diffInf;
187  diffs.push_back(-1);
188  // don't count this for maxDiff
189  }
190 
191  auto refD = lookup(refDist, query, result, dim1, dim2);
192  auto testD = lookup(testDist, query, result, dim1, dim2);
193 
194  float relErr = relativeError(refD, testD);
195 
196  if (assertOnErr) {
197  EXPECT_LE(relErr, maxRelativeError) << configMsg
198  << " " << query << " " << result;
199  }
200 
201  maxRelErr = std::max(maxRelErr, relErr);
202  }
203 
204  indexDiffs.emplace_back(std::move(diffs));
205  }
206 
207  if (assertOnErr) {
208  EXPECT_LE((float) (diff1 + diffN + diffInf),
209  (float) numResults * pctMaxDiff1) << configMsg;
210 
211  // Don't count diffInf because that could be diff1 as far as we
212  // know
213  EXPECT_LE((float) diffN, (float) numResults * pctMaxDiffN) << configMsg;
214  }
215 
216  avgDiff /= (double) numResults;
217 
218  if (printBasicStats) {
219  if (!configMsg.empty()) {
220  printf("Config\n"
221  "----------------------------\n"
222  "%s\n",
223  configMsg.c_str());
224  }
225 
226  printf("Result error and differences\n"
227  "----------------------------\n"
228  "max abs diff %.7f rel diff %.7f\n"
229  "idx diff avg: %.5g max: %d\n"
230  "idx diff of 1: %d (%.3f%% of queries)\n"
231  "idx diff of >1: %d (%.3f%% of queries)\n"
232  "idx diff not found: %d (%.3f%% of queries)"
233  " [typically a last element inversion]\n"
234  "non-unique indices: %d (a serious error if >0)\n",
235  maxAbsErr, maxRelErr,
236  avgDiff, maxDiff,
237  diff1, 100.0f * (float) diff1 / (float) numResults,
238  diffN, 100.0f * (float) diffN / (float) numResults,
239  diffInf, 100.0f * (float) diffInf / (float) numResults,
240  nonUniqueIndices);
241  }
242 
243  if (printDiffs) {
244  printf("differences:\n");
245  printf("==================\n");
246  for (int query = 0; query < dim1; ++query) {
247  for (int result = 0; result < dim2; ++result) {
248  long refI = lookup(refInd, query, result, dim1, dim2);
249  long testI = lookup(testInd, query, result, dim1, dim2);
250 
251  if (refI != testI) {
252  float refD = lookup(refDist, query, result, dim1, dim2);
253  float testD = lookup(testDist, query, result, dim1, dim2);
254 
255  float maxDist = std::max(refD, testD);
256  float delta = std::abs(refD - testD);
257 
258  float relErr = delta / maxDist;
259 
260  if (refD == testD) {
261  printf("(%d, %d [%d]) (ref %ld tst %ld dist ==)\n",
262  query, result,
263  indexDiffs[query][result],
264  refI, testI);
265  } else {
266  printf("(%d, %d [%d]) (ref %ld tst %ld abs %.8f "
267  "rel %.8f ref %a tst %a)\n",
268  query, result,
269  indexDiffs[query][result],
270  refI, testI, delta, relErr, refD, testD);
271  }
272  }
273  }
274  }
275  }
276 }
277 
278 } }
long idx_t
all indices are this type
Definition: Index.h:64
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const =0