Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/Heap.h
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 /* Copyright 2004-present Facebook. All Rights Reserved.
11  *
12  * C++ support for heaps. The set of functions is tailored for
13  * efficient similarity search.
14  *
15  * There is no specific object for a heap, and the functions that
16  * operate on a signle heap are inlined, because heaps are often
17  * small. More complex functions are implemented in Heaps.cpp
18  *
19  */
20 
21 
22 #ifndef FAISS_Heap_h
23 #define FAISS_Heap_h
24 
25 #include <climits>
26 #include <cstring>
27 #include <cmath>
28 
29 #include <cassert>
30 #include <cstdio>
31 
32 #include <limits>
33 
34 
35 
36 namespace faiss {
37 
38 /*******************************************************************
39  * C object: uniform handling of min and max heap
40  *******************************************************************/
41 
42 /** The C object gives the type T of the values in the heap, the type
43  * of the keys, TI and the comparison that is done: > for the minheap
44  * and < for the maxheap. The neutral value will always be dropped in
45  * favor of any other value in the heap.
46  */
47 
48 template <typename T_, typename TI_>
49 struct CMax;
50 
51 // traits of minheaps = heaps where the minimum value is stored on top
52 // useful to find the *max* values of an array
53 template <typename T_, typename TI_>
54 struct CMin {
55  typedef T_ T;
56  typedef TI_ TI;
57  typedef CMax<T_, TI_> Crev;
58  inline static bool cmp (T a, T b) {
59  return a < b;
60  }
61  // value that will be popped first -> must be smaller than all others
62  // for int types this is not strictly the smallest val (-max - 1)
63  inline static T neutral () {
64  return -std::numeric_limits<T>::max();
65  }
66 };
67 
68 
69 template <typename T_, typename TI_>
70 struct CMax {
71  typedef T_ T;
72  typedef TI_ TI;
73  typedef CMin<T_, TI_> Crev;
74  inline static bool cmp (T a, T b) {
75  return a > b;
76  }
77  inline static T neutral () {
78  return std::numeric_limits<T>::max();
79  }
80 };
81 
82 
83 /*******************************************************************
84  * Basic heap ops: push and pop
85  *******************************************************************/
86 
87 /** Pops the top element from the heap defined by bh_val[0..k-1] and
88  * bh_ids[0..k-1]. on output the element at k-1 is undefined.
89  */
90 template <class C> inline
91 void heap_pop (size_t k, typename C::T * bh_val, typename C::TI * bh_ids)
92 {
93  bh_val--; /* Use 1-based indexing for easier node->child translation */
94  bh_ids--;
95  typename C::T val = bh_val[k];
96  size_t i = 1, i1, i2;
97  while (1) {
98  i1 = i << 1;
99  i2 = i1 + 1;
100  if (i1 > k)
101  break;
102  if (i2 == k + 1 || C::cmp(bh_val[i1], bh_val[i2])) {
103  if (C::cmp(val, bh_val[i1]))
104  break;
105  bh_val[i] = bh_val[i1];
106  bh_ids[i] = bh_ids[i1];
107  i = i1;
108  }
109  else {
110  if (C::cmp(val, bh_val[i2]))
111  break;
112  bh_val[i] = bh_val[i2];
113  bh_ids[i] = bh_ids[i2];
114  i = i2;
115  }
116  }
117  bh_val[i] = bh_val[k];
118  bh_ids[i] = bh_ids[k];
119 }
120 
121 
122 
123 /** Pushes the element (val, ids) into the heap bh_val[0..k-2] and
124  * bh_ids[0..k-2]. on output the element at k-1 is defined.
125  */
126 template <class C> inline
127 void heap_push (size_t k,
128  typename C::T * bh_val, typename C::TI * bh_ids,
129  typename C::T val, typename C::TI ids)
130 {
131  bh_val--; /* Use 1-based indexing for easier node->child translation */
132  bh_ids--;
133  size_t i = k, i_father;
134  while (i > 1) {
135  i_father = i >> 1;
136  if (!C::cmp (val, bh_val[i_father])) /* the heap structure is ok */
137  break;
138  bh_val[i] = bh_val[i_father];
139  bh_ids[i] = bh_ids[i_father];
140  i = i_father;
141  }
142  bh_val[i] = val;
143  bh_ids[i] = ids;
144 }
145 
146 
147 
148 /* Partial instanciation for heaps with TI = long */
149 
150 template <typename T> inline
151 void minheap_pop (size_t k, T * bh_val, long * bh_ids)
152 {
153  heap_pop<CMin<T, long> > (k, bh_val, bh_ids);
154 }
155 
156 
157 template <typename T> inline
158 void minheap_push (size_t k, T * bh_val, long * bh_ids, T val, long ids)
159 {
160  heap_push<CMin<T, long> > (k, bh_val, bh_ids, val, ids);
161 }
162 
163 
164 template <typename T> inline
165 void maxheap_pop (size_t k, T * bh_val, long * bh_ids)
166 {
167  heap_pop<CMax<T, long> > (k, bh_val, bh_ids);
168 }
169 
170 
171 template <typename T> inline
172 void maxheap_push (size_t k, T * bh_val, long * bh_ids, T val, long ids)
173 {
174  heap_push<CMax<T, long> > (k, bh_val, bh_ids, val, ids);
175 }
176 
177 
178 
179 /*******************************************************************
180  * Heap initialization
181  *******************************************************************/
182 
183 /* Initialization phase for the heap (with inconditionnal pushes).
184  * Store k0 elements in a heap containing up to k values. Note that
185  * (bh_val, bh_ids) can be the same as (x, ids) */
186 template <class C> inline
187 void heap_heapify (
188  size_t k,
189  typename C::T * bh_val,
190  typename C::TI * bh_ids,
191  const typename C::T * x = nullptr,
192  const typename C::TI * ids = nullptr,
193  size_t k0 = 0)
194 {
195  if (k0 > 0) assert (x);
196 
197  if (ids) {
198  for (size_t i = 0; i < k0; i++)
199  heap_push<C> (i+1, bh_val, bh_ids, x[i], ids[i]);
200  } else {
201  for (size_t i = 0; i < k0; i++)
202  heap_push<C> (i+1, bh_val, bh_ids, x[i], i);
203  }
204 
205  for (size_t i = k0; i < k; i++) {
206  bh_val[i] = C::neutral();
207  bh_ids[i] = -1;
208  }
209 
210 }
211 
212 template <typename T> inline
213 void minheap_heapify (
214  size_t k, T * bh_val,
215  long * bh_ids,
216  const T * x = nullptr,
217  const long * ids = nullptr,
218  size_t k0 = 0)
219 {
220  heap_heapify< CMin<T, long> > (k, bh_val, bh_ids, x, ids, k0);
221 }
222 
223 
224 template <typename T> inline
225 void maxheap_heapify (
226  size_t k,
227  T * bh_val,
228  long * bh_ids,
229  const T * x = nullptr,
230  const long * ids = nullptr,
231  size_t k0 = 0)
232 {
233  heap_heapify< CMax<T, long> > (k, bh_val, bh_ids, x, ids, k0);
234 }
235 
236 
237 
238 /*******************************************************************
239  * Add n elements to the heap
240  *******************************************************************/
241 
242 
243 /* Add some elements to the heap */
244 template <class C> inline
245 void heap_addn (size_t k,
246  typename C::T * bh_val, typename C::TI * bh_ids,
247  const typename C::T * x,
248  const typename C::TI * ids,
249  size_t n)
250 {
251  size_t i;
252  if (ids)
253  for (i = 0; i < n; i++) {
254  if (C::cmp (bh_val[0], x[i])) {
255  heap_pop<C> (k, bh_val, bh_ids);
256  heap_push<C> (k, bh_val, bh_ids, x[i], ids[i]);
257  }
258  }
259  else
260  for (i = 0; i < n; i++) {
261  if (C::cmp (bh_val[0], x[i])) {
262  heap_pop<C> (k, bh_val, bh_ids);
263  heap_push<C> (k, bh_val, bh_ids, x[i], i);
264  }
265  }
266 }
267 
268 
269 /* Partial instanciation for heaps with TI = long */
270 
271 template <typename T> inline
272 void minheap_addn (size_t k, T * bh_val, long * bh_ids,
273  const T * x, const long * ids, size_t n)
274 {
275  heap_addn<CMin<T, long> > (k, bh_val, bh_ids, x, ids, n);
276 }
277 
278 template <typename T> inline
279 void maxheap_addn (size_t k, T * bh_val, long * bh_ids,
280  const T * x, const long * ids, size_t n)
281 {
282  heap_addn<CMax<T, long> > (k, bh_val, bh_ids, x, ids, n);
283 }
284 
285 
286 
287 
288 
289 
290 /*******************************************************************
291  * Heap finalization (reorder elements)
292  *******************************************************************/
293 
294 
295 /* This function maps a binary heap into an sorted structure.
296  It returns the number */
297 template <typename C> inline
298 size_t heap_reorder (size_t k, typename C::T * bh_val, typename C::TI * bh_ids)
299 {
300  size_t i, ii;
301 
302  for (i = 0, ii = 0; i < k; i++) {
303  /* top element should be put at the end of the list */
304  typename C::T val = bh_val[0];
305  typename C::TI id = bh_ids[0];
306 
307  /* boundary case: we will over-ride this value if not a true element */
308  heap_pop<C> (k-i, bh_val, bh_ids);
309  bh_val[k-ii-1] = val;
310  bh_ids[k-ii-1] = id;
311  if (id != -1) ii++;
312  }
313  /* Count the number of elements which are effectively returned */
314  size_t nel = ii;
315 
316  memmove (bh_val, bh_val+k-ii, ii * sizeof(*bh_val));
317  memmove (bh_ids, bh_ids+k-ii, ii * sizeof(*bh_ids));
318 
319  for (; ii < k; ii++) {
320  bh_val[ii] = C::neutral();
321  bh_ids[ii] = -1;
322  }
323  return nel;
324 }
325 
326 template <typename T> inline
327 size_t minheap_reorder (size_t k, T * bh_val, long * bh_ids)
328 {
329  return heap_reorder< CMin<T, long> > (k, bh_val, bh_ids);
330 }
331 
332 template <typename T> inline
333 size_t maxheap_reorder (size_t k, T * bh_val, long * bh_ids)
334 {
335  return heap_reorder< CMax<T, long> > (k, bh_val, bh_ids);
336 }
337 
338 
339 
340 
341 
342 /*******************************************************************
343  * Operations on heap arrays
344  *******************************************************************/
345 
346 /** a template structure for a set of [min|max]-heaps it is tailored
347  * so that the actual data of the heaps can just live in compact
348  * arrays.
349  */
350 template <typename C>
351 struct HeapArray {
352  typedef typename C::TI TI;
353  typedef typename C::T T;
354 
355  size_t nh; ///< number of heaps
356  size_t k; ///< allocated size per heap
357  TI * ids; ///< identifiers (size nh * k)
358  T * val; ///< values (distances or similarities), size nh * k
359 
360  /// Return the list of values for a heap
361  T * get_val (size_t key) { return val + key * k; }
362 
363  /// Correspponding identifiers
364  TI * get_ids (size_t key) { return ids + key * k; }
365 
366  /// prepare all the heaps before adding
367  void heapify ();
368 
369  /** add nj elements to heaps i0:i0+ni, with sequential ids
370  *
371  * @param nj nb of elements to add to each heap
372  * @param vin elements to add, size ni * nj
373  * @param j0 add this to the ids that are added
374  * @param i0 first heap to update
375  * @param ni nb of elements to update (-1 = use nh)
376  */
377  void addn (size_t nj, const T *vin, TI j0 = 0,
378  size_t i0 = 0, long ni = -1);
379 
380  /** same as addn
381  *
382  * @param id_in ids of the elements to add, size ni * nj
383  * @param id_stride stride for id_in
384  */
385  void addn_with_ids (
386  size_t nj, const T *vin, const TI *id_in = nullptr,
387  long id_stride = 0, size_t i0 = 0, long ni = -1);
388 
389  /// reorder all the heaps
390  void reorder ();
391 
392  /** this is not really a heap function. It just finds the per-line
393  * extrema of each line of array D
394  * @param vals_out extreme value of each line (size nh, or NULL)
395  * @param idx_out index of extreme value (size nh or NULL)
396  */
397  void per_line_extrema (T *vals_out, TI *idx_out) const;
398 
399 };
400 
401 
402 /* Define useful heaps */
403 typedef HeapArray<CMin<float, long> > float_minheap_array_t;
404 typedef HeapArray<CMin<int, long> > int_minheap_array_t;
405 
406 typedef HeapArray<CMax<float, long> > float_maxheap_array_t;
407 typedef HeapArray<CMax<int, long> > int_maxheap_array_t;
408 
409 // The heap templates are instanciated explicitly in Heap.cpp
410 
411 
412 
413 
414 
415 
416 
417 
418 
419 
420 
421 
422 
423 
424 
425 
426 
427 
428 
429 /*********************************************************************
430  * Indirect heaps: instead of having
431  *
432  * node i = (bh_ids[i], bh_val[i]),
433  *
434  * in indirect heaps,
435  *
436  * node i = (bh_ids[i], bh_val[bh_ids[i]]),
437  *
438  *********************************************************************/
439 
440 
441 template <class C>
442 static inline
443 void indirect_heap_pop (
444  size_t k,
445  const typename C::T * bh_val,
446  typename C::TI * bh_ids)
447 {
448  bh_ids--; /* Use 1-based indexing for easier node->child translation */
449  typename C::T val = bh_val[bh_ids[k]];
450  size_t i = 1;
451  while (1) {
452  size_t i1 = i << 1;
453  size_t i2 = i1 + 1;
454  if (i1 > k)
455  break;
456  typename C::TI id1 = bh_ids[i1], id2 = bh_ids[i2];
457  if (i2 == k + 1 || C::cmp(bh_val[id1], bh_val[id2])) {
458  if (C::cmp(val, bh_val[id1]))
459  break;
460  bh_ids[i] = id1;
461  i = i1;
462  } else {
463  if (C::cmp(val, bh_val[id2]))
464  break;
465  bh_ids[i] = id2;
466  i = i2;
467  }
468  }
469  bh_ids[i] = bh_ids[k];
470 }
471 
472 
473 
474 template <class C>
475 static inline
476 void indirect_heap_push (size_t k,
477  const typename C::T * bh_val, typename C::TI * bh_ids,
478  typename C::TI id)
479 {
480  bh_ids--; /* Use 1-based indexing for easier node->child translation */
481  typename C::T val = bh_val[id];
482  size_t i = k;
483  while (i > 1) {
484  size_t i_father = i >> 1;
485  if (!C::cmp (val, bh_val[bh_ids[i_father]]))
486  break;
487  bh_ids[i] = bh_ids[i_father];
488  i = i_father;
489  }
490  bh_ids[i] = id;
491 }
492 
493 
494 
495 
496 } // namespace faiss
497 
498 #endif /* FAISS_Heap_h */
T * get_val(size_t key)
Return the list of values for a heap.
Definition: Heap.h:361
void heap_pop(size_t k, typename C::T *bh_val, typename C::TI *bh_ids)
Definition: Heap.h:91
size_t k
allocated size per heap
Definition: Heap.h:356
void reorder()
reorder all the heaps
Definition: Heap.cpp:35
void per_line_extrema(T *vals_out, TI *idx_out) const
Definition: Heap.cpp:93
TI * ids
identifiers (size nh * k)
Definition: Heap.h:357
void addn_with_ids(size_t nj, const T *vin, const TI *id_in=nullptr, long id_stride=0, size_t i0=0, long ni=-1)
Definition: Heap.cpp:65
void heapify()
prepare all the heaps before adding
Definition: Heap.cpp:27
TI * get_ids(size_t key)
Correspponding identifiers.
Definition: Heap.h:364
T * val
values (distances or similarities), size nh * k
Definition: Heap.h:358
size_t nh
number of heaps
Definition: Heap.h:355
void heap_push(size_t k, typename C::T *bh_val, typename C::TI *bh_ids, typename C::T val, typename C::TI ids)
Definition: Heap.h:127
void addn(size_t nj, const T *vin, TI j0=0, size_t i0=0, long ni=-1)
Definition: Heap.cpp:43