Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/Heap.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 /* Copyright 2004-present Facebook. All Rights Reserved.
10  *
11  * C++ support for heaps. The set of functions is tailored for
12  * efficient similarity search.
13  *
14  * There is no specific object for a heap, and the functions that
15  * operate on a signle heap are inlined, because heaps are often
16  * small. More complex functions are implemented in Heaps.cpp
17  *
18  */
19 
20 
21 #ifndef FAISS_Heap_h
22 #define FAISS_Heap_h
23 
24 #include <climits>
25 #include <cstring>
26 #include <cmath>
27 
28 #include <cassert>
29 #include <cstdio>
30 
31 #include <limits>
32 
33 
34 
35 namespace faiss {
36 
37 /*******************************************************************
38  * C object: uniform handling of min and max heap
39  *******************************************************************/
40 
41 /** The C object gives the type T of the values in the heap, the type
42  * of the keys, TI and the comparison that is done: > for the minheap
43  * and < for the maxheap. The neutral value will always be dropped in
44  * favor of any other value in the heap.
45  */
46 
47 template <typename T_, typename TI_>
48 struct CMax;
49 
50 // traits of minheaps = heaps where the minimum value is stored on top
51 // useful to find the *max* values of an array
52 template <typename T_, typename TI_>
53 struct CMin {
54  typedef T_ T;
55  typedef TI_ TI;
56  typedef CMax<T_, TI_> Crev;
57  inline static bool cmp (T a, T b) {
58  return a < b;
59  }
60  // value that will be popped first -> must be smaller than all others
61  // for int types this is not strictly the smallest val (-max - 1)
62  inline static T neutral () {
63  return -std::numeric_limits<T>::max();
64  }
65 };
66 
67 
68 template <typename T_, typename TI_>
69 struct CMax {
70  typedef T_ T;
71  typedef TI_ TI;
72  typedef CMin<T_, TI_> Crev;
73  inline static bool cmp (T a, T b) {
74  return a > b;
75  }
76  inline static T neutral () {
77  return std::numeric_limits<T>::max();
78  }
79 };
80 
81 
82 /*******************************************************************
83  * Basic heap ops: push and pop
84  *******************************************************************/
85 
86 /** Pops the top element from the heap defined by bh_val[0..k-1] and
87  * bh_ids[0..k-1]. on output the element at k-1 is undefined.
88  */
89 template <class C> inline
90 void heap_pop (size_t k, typename C::T * bh_val, typename C::TI * bh_ids)
91 {
92  bh_val--; /* Use 1-based indexing for easier node->child translation */
93  bh_ids--;
94  typename C::T val = bh_val[k];
95  size_t i = 1, i1, i2;
96  while (1) {
97  i1 = i << 1;
98  i2 = i1 + 1;
99  if (i1 > k)
100  break;
101  if (i2 == k + 1 || C::cmp(bh_val[i1], bh_val[i2])) {
102  if (C::cmp(val, bh_val[i1]))
103  break;
104  bh_val[i] = bh_val[i1];
105  bh_ids[i] = bh_ids[i1];
106  i = i1;
107  }
108  else {
109  if (C::cmp(val, bh_val[i2]))
110  break;
111  bh_val[i] = bh_val[i2];
112  bh_ids[i] = bh_ids[i2];
113  i = i2;
114  }
115  }
116  bh_val[i] = bh_val[k];
117  bh_ids[i] = bh_ids[k];
118 }
119 
120 
121 
122 /** Pushes the element (val, ids) into the heap bh_val[0..k-2] and
123  * bh_ids[0..k-2]. on output the element at k-1 is defined.
124  */
125 template <class C> inline
126 void heap_push (size_t k,
127  typename C::T * bh_val, typename C::TI * bh_ids,
128  typename C::T val, typename C::TI ids)
129 {
130  bh_val--; /* Use 1-based indexing for easier node->child translation */
131  bh_ids--;
132  size_t i = k, i_father;
133  while (i > 1) {
134  i_father = i >> 1;
135  if (!C::cmp (val, bh_val[i_father])) /* the heap structure is ok */
136  break;
137  bh_val[i] = bh_val[i_father];
138  bh_ids[i] = bh_ids[i_father];
139  i = i_father;
140  }
141  bh_val[i] = val;
142  bh_ids[i] = ids;
143 }
144 
145 
146 
147 /* Partial instanciation for heaps with TI = long */
148 
149 template <typename T> inline
150 void minheap_pop (size_t k, T * bh_val, long * bh_ids)
151 {
152  heap_pop<CMin<T, long> > (k, bh_val, bh_ids);
153 }
154 
155 
156 template <typename T> inline
157 void minheap_push (size_t k, T * bh_val, long * bh_ids, T val, long ids)
158 {
159  heap_push<CMin<T, long> > (k, bh_val, bh_ids, val, ids);
160 }
161 
162 
163 template <typename T> inline
164 void maxheap_pop (size_t k, T * bh_val, long * bh_ids)
165 {
166  heap_pop<CMax<T, long> > (k, bh_val, bh_ids);
167 }
168 
169 
170 template <typename T> inline
171 void maxheap_push (size_t k, T * bh_val, long * bh_ids, T val, long ids)
172 {
173  heap_push<CMax<T, long> > (k, bh_val, bh_ids, val, ids);
174 }
175 
176 
177 
178 /*******************************************************************
179  * Heap initialization
180  *******************************************************************/
181 
182 /* Initialization phase for the heap (with inconditionnal pushes).
183  * Store k0 elements in a heap containing up to k values. Note that
184  * (bh_val, bh_ids) can be the same as (x, ids) */
185 template <class C> inline
186 void heap_heapify (
187  size_t k,
188  typename C::T * bh_val,
189  typename C::TI * bh_ids,
190  const typename C::T * x = nullptr,
191  const typename C::TI * ids = nullptr,
192  size_t k0 = 0)
193 {
194  if (k0 > 0) assert (x);
195 
196  if (ids) {
197  for (size_t i = 0; i < k0; i++)
198  heap_push<C> (i+1, bh_val, bh_ids, x[i], ids[i]);
199  } else {
200  for (size_t i = 0; i < k0; i++)
201  heap_push<C> (i+1, bh_val, bh_ids, x[i], i);
202  }
203 
204  for (size_t i = k0; i < k; i++) {
205  bh_val[i] = C::neutral();
206  bh_ids[i] = -1;
207  }
208 
209 }
210 
211 template <typename T> inline
212 void minheap_heapify (
213  size_t k, T * bh_val,
214  long * bh_ids,
215  const T * x = nullptr,
216  const long * ids = nullptr,
217  size_t k0 = 0)
218 {
219  heap_heapify< CMin<T, long> > (k, bh_val, bh_ids, x, ids, k0);
220 }
221 
222 
223 template <typename T> inline
224 void maxheap_heapify (
225  size_t k,
226  T * bh_val,
227  long * bh_ids,
228  const T * x = nullptr,
229  const long * ids = nullptr,
230  size_t k0 = 0)
231 {
232  heap_heapify< CMax<T, long> > (k, bh_val, bh_ids, x, ids, k0);
233 }
234 
235 
236 
237 /*******************************************************************
238  * Add n elements to the heap
239  *******************************************************************/
240 
241 
242 /* Add some elements to the heap */
243 template <class C> inline
244 void heap_addn (size_t k,
245  typename C::T * bh_val, typename C::TI * bh_ids,
246  const typename C::T * x,
247  const typename C::TI * ids,
248  size_t n)
249 {
250  size_t i;
251  if (ids)
252  for (i = 0; i < n; i++) {
253  if (C::cmp (bh_val[0], x[i])) {
254  heap_pop<C> (k, bh_val, bh_ids);
255  heap_push<C> (k, bh_val, bh_ids, x[i], ids[i]);
256  }
257  }
258  else
259  for (i = 0; i < n; i++) {
260  if (C::cmp (bh_val[0], x[i])) {
261  heap_pop<C> (k, bh_val, bh_ids);
262  heap_push<C> (k, bh_val, bh_ids, x[i], i);
263  }
264  }
265 }
266 
267 
268 /* Partial instanciation for heaps with TI = long */
269 
270 template <typename T> inline
271 void minheap_addn (size_t k, T * bh_val, long * bh_ids,
272  const T * x, const long * ids, size_t n)
273 {
274  heap_addn<CMin<T, long> > (k, bh_val, bh_ids, x, ids, n);
275 }
276 
277 template <typename T> inline
278 void maxheap_addn (size_t k, T * bh_val, long * bh_ids,
279  const T * x, const long * ids, size_t n)
280 {
281  heap_addn<CMax<T, long> > (k, bh_val, bh_ids, x, ids, n);
282 }
283 
284 
285 
286 
287 
288 
289 /*******************************************************************
290  * Heap finalization (reorder elements)
291  *******************************************************************/
292 
293 
294 /* This function maps a binary heap into an sorted structure.
295  It returns the number */
296 template <typename C> inline
297 size_t heap_reorder (size_t k, typename C::T * bh_val, typename C::TI * bh_ids)
298 {
299  size_t i, ii;
300 
301  for (i = 0, ii = 0; i < k; i++) {
302  /* top element should be put at the end of the list */
303  typename C::T val = bh_val[0];
304  typename C::TI id = bh_ids[0];
305 
306  /* boundary case: we will over-ride this value if not a true element */
307  heap_pop<C> (k-i, bh_val, bh_ids);
308  bh_val[k-ii-1] = val;
309  bh_ids[k-ii-1] = id;
310  if (id != -1) ii++;
311  }
312  /* Count the number of elements which are effectively returned */
313  size_t nel = ii;
314 
315  memmove (bh_val, bh_val+k-ii, ii * sizeof(*bh_val));
316  memmove (bh_ids, bh_ids+k-ii, ii * sizeof(*bh_ids));
317 
318  for (; ii < k; ii++) {
319  bh_val[ii] = C::neutral();
320  bh_ids[ii] = -1;
321  }
322  return nel;
323 }
324 
325 template <typename T> inline
326 size_t minheap_reorder (size_t k, T * bh_val, long * bh_ids)
327 {
328  return heap_reorder< CMin<T, long> > (k, bh_val, bh_ids);
329 }
330 
331 template <typename T> inline
332 size_t maxheap_reorder (size_t k, T * bh_val, long * bh_ids)
333 {
334  return heap_reorder< CMax<T, long> > (k, bh_val, bh_ids);
335 }
336 
337 
338 
339 
340 
341 /*******************************************************************
342  * Operations on heap arrays
343  *******************************************************************/
344 
345 /** a template structure for a set of [min|max]-heaps it is tailored
346  * so that the actual data of the heaps can just live in compact
347  * arrays.
348  */
349 template <typename C>
350 struct HeapArray {
351  typedef typename C::TI TI;
352  typedef typename C::T T;
353 
354  size_t nh; ///< number of heaps
355  size_t k; ///< allocated size per heap
356  TI * ids; ///< identifiers (size nh * k)
357  T * val; ///< values (distances or similarities), size nh * k
358 
359  /// Return the list of values for a heap
360  T * get_val (size_t key) { return val + key * k; }
361 
362  /// Correspponding identifiers
363  TI * get_ids (size_t key) { return ids + key * k; }
364 
365  /// prepare all the heaps before adding
366  void heapify ();
367 
368  /** add nj elements to heaps i0:i0+ni, with sequential ids
369  *
370  * @param nj nb of elements to add to each heap
371  * @param vin elements to add, size ni * nj
372  * @param j0 add this to the ids that are added
373  * @param i0 first heap to update
374  * @param ni nb of elements to update (-1 = use nh)
375  */
376  void addn (size_t nj, const T *vin, TI j0 = 0,
377  size_t i0 = 0, long ni = -1);
378 
379  /** same as addn
380  *
381  * @param id_in ids of the elements to add, size ni * nj
382  * @param id_stride stride for id_in
383  */
384  void addn_with_ids (
385  size_t nj, const T *vin, const TI *id_in = nullptr,
386  long id_stride = 0, size_t i0 = 0, long ni = -1);
387 
388  /// reorder all the heaps
389  void reorder ();
390 
391  /** this is not really a heap function. It just finds the per-line
392  * extrema of each line of array D
393  * @param vals_out extreme value of each line (size nh, or NULL)
394  * @param idx_out index of extreme value (size nh or NULL)
395  */
396  void per_line_extrema (T *vals_out, TI *idx_out) const;
397 
398 };
399 
400 
401 /* Define useful heaps */
402 typedef HeapArray<CMin<float, long> > float_minheap_array_t;
403 typedef HeapArray<CMin<int, long> > int_minheap_array_t;
404 
405 typedef HeapArray<CMax<float, long> > float_maxheap_array_t;
406 typedef HeapArray<CMax<int, long> > int_maxheap_array_t;
407 
408 // The heap templates are instanciated explicitly in Heap.cpp
409 
410 
411 
412 
413 
414 
415 
416 
417 
418 
419 
420 
421 
422 
423 
424 
425 
426 
427 
428 /*********************************************************************
429  * Indirect heaps: instead of having
430  *
431  * node i = (bh_ids[i], bh_val[i]),
432  *
433  * in indirect heaps,
434  *
435  * node i = (bh_ids[i], bh_val[bh_ids[i]]),
436  *
437  *********************************************************************/
438 
439 
440 template <class C>
441 inline
442 void indirect_heap_pop (
443  size_t k,
444  const typename C::T * bh_val,
445  typename C::TI * bh_ids)
446 {
447  bh_ids--; /* Use 1-based indexing for easier node->child translation */
448  typename C::T val = bh_val[bh_ids[k]];
449  size_t i = 1;
450  while (1) {
451  size_t i1 = i << 1;
452  size_t i2 = i1 + 1;
453  if (i1 > k)
454  break;
455  typename C::TI id1 = bh_ids[i1], id2 = bh_ids[i2];
456  if (i2 == k + 1 || C::cmp(bh_val[id1], bh_val[id2])) {
457  if (C::cmp(val, bh_val[id1]))
458  break;
459  bh_ids[i] = id1;
460  i = i1;
461  } else {
462  if (C::cmp(val, bh_val[id2]))
463  break;
464  bh_ids[i] = id2;
465  i = i2;
466  }
467  }
468  bh_ids[i] = bh_ids[k];
469 }
470 
471 
472 
473 template <class C>
474 inline
475 void indirect_heap_push (size_t k,
476  const typename C::T * bh_val, typename C::TI * bh_ids,
477  typename C::TI id)
478 {
479  bh_ids--; /* Use 1-based indexing for easier node->child translation */
480  typename C::T val = bh_val[id];
481  size_t i = k;
482  while (i > 1) {
483  size_t i_father = i >> 1;
484  if (!C::cmp (val, bh_val[bh_ids[i_father]]))
485  break;
486  bh_ids[i] = bh_ids[i_father];
487  i = i_father;
488  }
489  bh_ids[i] = id;
490 }
491 
492 
493 
494 
495 } // namespace faiss
496 
497 #endif /* FAISS_Heap_h */
T * get_val(size_t key)
Return the list of values for a heap.
Definition: Heap.h:360
void heap_pop(size_t k, typename C::T *bh_val, typename C::TI *bh_ids)
Definition: Heap.h:90
size_t k
allocated size per heap
Definition: Heap.h:355
void reorder()
reorder all the heaps
Definition: Heap.cpp:34
void per_line_extrema(T *vals_out, TI *idx_out) const
Definition: Heap.cpp:92
TI * ids
identifiers (size nh * k)
Definition: Heap.h:356
void addn_with_ids(size_t nj, const T *vin, const TI *id_in=nullptr, long id_stride=0, size_t i0=0, long ni=-1)
Definition: Heap.cpp:64
void heapify()
prepare all the heaps before adding
Definition: Heap.cpp:26
TI * get_ids(size_t key)
Correspponding identifiers.
Definition: Heap.h:363
T * val
values (distances or similarities), size nh * k
Definition: Heap.h:357
size_t nh
number of heaps
Definition: Heap.h:354
void heap_push(size_t k, typename C::T *bh_val, typename C::TI *bh_ids, typename C::T val, typename C::TI ids)
Definition: Heap.h:126
void addn(size_t nj, const T *vin, TI j0=0, size_t i0=0, long ni=-1)
Definition: Heap.cpp:42