Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/hoss/faiss/Heap.h
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 // -*- c++ -*-
9 
10 /*
11  * C++ support for heaps. The set of functions is tailored for
12  * efficient similarity search.
13  *
14  * There is no specific object for a heap, and the functions that
15  * operate on a signle heap are inlined, because heaps are often
16  * small. More complex functions are implemented in Heaps.cpp
17  *
18  */
19 
20 
21 #ifndef FAISS_Heap_h
22 #define FAISS_Heap_h
23 
24 #include <climits>
25 #include <cstring>
26 #include <cmath>
27 
28 #include <cassert>
29 #include <cstdio>
30 
31 #include <limits>
32 
33 
34 namespace faiss {
35 
36 /*******************************************************************
37  * C object: uniform handling of min and max heap
38  *******************************************************************/
39 
40 /** The C object gives the type T of the values in the heap, the type
41  * of the keys, TI and the comparison that is done: > for the minheap
42  * and < for the maxheap. The neutral value will always be dropped in
43  * favor of any other value in the heap.
44  */
45 
46 template <typename T_, typename TI_>
47 struct CMax;
48 
49 // traits of minheaps = heaps where the minimum value is stored on top
50 // useful to find the *max* values of an array
51 template <typename T_, typename TI_>
52 struct CMin {
53  typedef T_ T;
54  typedef TI_ TI;
55  typedef CMax<T_, TI_> Crev;
56  inline static bool cmp (T a, T b) {
57  return a < b;
58  }
59  // value that will be popped first -> must be smaller than all others
60  // for int types this is not strictly the smallest val (-max - 1)
61  inline static T neutral () {
62  return -std::numeric_limits<T>::max();
63  }
64 };
65 
66 
67 template <typename T_, typename TI_>
68 struct CMax {
69  typedef T_ T;
70  typedef TI_ TI;
71  typedef CMin<T_, TI_> Crev;
72  inline static bool cmp (T a, T b) {
73  return a > b;
74  }
75  inline static T neutral () {
76  return std::numeric_limits<T>::max();
77  }
78 };
79 
80 
81 /*******************************************************************
82  * Basic heap ops: push and pop
83  *******************************************************************/
84 
85 /** Pops the top element from the heap defined by bh_val[0..k-1] and
86  * bh_ids[0..k-1]. on output the element at k-1 is undefined.
87  */
88 template <class C> inline
89 void heap_pop (size_t k, typename C::T * bh_val, typename C::TI * bh_ids)
90 {
91  bh_val--; /* Use 1-based indexing for easier node->child translation */
92  bh_ids--;
93  typename C::T val = bh_val[k];
94  size_t i = 1, i1, i2;
95  while (1) {
96  i1 = i << 1;
97  i2 = i1 + 1;
98  if (i1 > k)
99  break;
100  if (i2 == k + 1 || C::cmp(bh_val[i1], bh_val[i2])) {
101  if (C::cmp(val, bh_val[i1]))
102  break;
103  bh_val[i] = bh_val[i1];
104  bh_ids[i] = bh_ids[i1];
105  i = i1;
106  }
107  else {
108  if (C::cmp(val, bh_val[i2]))
109  break;
110  bh_val[i] = bh_val[i2];
111  bh_ids[i] = bh_ids[i2];
112  i = i2;
113  }
114  }
115  bh_val[i] = bh_val[k];
116  bh_ids[i] = bh_ids[k];
117 }
118 
119 
120 
121 /** Pushes the element (val, ids) into the heap bh_val[0..k-2] and
122  * bh_ids[0..k-2]. on output the element at k-1 is defined.
123  */
124 template <class C> inline
125 void heap_push (size_t k,
126  typename C::T * bh_val, typename C::TI * bh_ids,
127  typename C::T val, typename C::TI ids)
128 {
129  bh_val--; /* Use 1-based indexing for easier node->child translation */
130  bh_ids--;
131  size_t i = k, i_father;
132  while (i > 1) {
133  i_father = i >> 1;
134  if (!C::cmp (val, bh_val[i_father])) /* the heap structure is ok */
135  break;
136  bh_val[i] = bh_val[i_father];
137  bh_ids[i] = bh_ids[i_father];
138  i = i_father;
139  }
140  bh_val[i] = val;
141  bh_ids[i] = ids;
142 }
143 
144 
145 
146 /* Partial instanciation for heaps with TI = long */
147 
148 template <typename T> inline
149 void minheap_pop (size_t k, T * bh_val, long * bh_ids)
150 {
151  heap_pop<CMin<T, long> > (k, bh_val, bh_ids);
152 }
153 
154 
155 template <typename T> inline
156 void minheap_push (size_t k, T * bh_val, long * bh_ids, T val, long ids)
157 {
158  heap_push<CMin<T, long> > (k, bh_val, bh_ids, val, ids);
159 }
160 
161 
162 template <typename T> inline
163 void maxheap_pop (size_t k, T * bh_val, long * bh_ids)
164 {
165  heap_pop<CMax<T, long> > (k, bh_val, bh_ids);
166 }
167 
168 
169 template <typename T> inline
170 void maxheap_push (size_t k, T * bh_val, long * bh_ids, T val, long ids)
171 {
172  heap_push<CMax<T, long> > (k, bh_val, bh_ids, val, ids);
173 }
174 
175 
176 
177 /*******************************************************************
178  * Heap initialization
179  *******************************************************************/
180 
181 /* Initialization phase for the heap (with unconditionnal pushes).
182  * Store k0 elements in a heap containing up to k values. Note that
183  * (bh_val, bh_ids) can be the same as (x, ids) */
184 template <class C> inline
185 void heap_heapify (
186  size_t k,
187  typename C::T * bh_val,
188  typename C::TI * bh_ids,
189  const typename C::T * x = nullptr,
190  const typename C::TI * ids = nullptr,
191  size_t k0 = 0)
192 {
193  if (k0 > 0) assert (x);
194 
195  if (ids) {
196  for (size_t i = 0; i < k0; i++)
197  heap_push<C> (i+1, bh_val, bh_ids, x[i], ids[i]);
198  } else {
199  for (size_t i = 0; i < k0; i++)
200  heap_push<C> (i+1, bh_val, bh_ids, x[i], i);
201  }
202 
203  for (size_t i = k0; i < k; i++) {
204  bh_val[i] = C::neutral();
205  bh_ids[i] = -1;
206  }
207 
208 }
209 
210 template <typename T> inline
211 void minheap_heapify (
212  size_t k, T * bh_val,
213  long * bh_ids,
214  const T * x = nullptr,
215  const long * ids = nullptr,
216  size_t k0 = 0)
217 {
218  heap_heapify< CMin<T, long> > (k, bh_val, bh_ids, x, ids, k0);
219 }
220 
221 
222 template <typename T> inline
223 void maxheap_heapify (
224  size_t k,
225  T * bh_val,
226  long * bh_ids,
227  const T * x = nullptr,
228  const long * ids = nullptr,
229  size_t k0 = 0)
230 {
231  heap_heapify< CMax<T, long> > (k, bh_val, bh_ids, x, ids, k0);
232 }
233 
234 
235 
236 /*******************************************************************
237  * Add n elements to the heap
238  *******************************************************************/
239 
240 
241 /* Add some elements to the heap */
242 template <class C> inline
243 void heap_addn (size_t k,
244  typename C::T * bh_val, typename C::TI * bh_ids,
245  const typename C::T * x,
246  const typename C::TI * ids,
247  size_t n)
248 {
249  size_t i;
250  if (ids)
251  for (i = 0; i < n; i++) {
252  if (C::cmp (bh_val[0], x[i])) {
253  heap_pop<C> (k, bh_val, bh_ids);
254  heap_push<C> (k, bh_val, bh_ids, x[i], ids[i]);
255  }
256  }
257  else
258  for (i = 0; i < n; i++) {
259  if (C::cmp (bh_val[0], x[i])) {
260  heap_pop<C> (k, bh_val, bh_ids);
261  heap_push<C> (k, bh_val, bh_ids, x[i], i);
262  }
263  }
264 }
265 
266 
267 /* Partial instanciation for heaps with TI = long */
268 
269 template <typename T> inline
270 void minheap_addn (size_t k, T * bh_val, long * bh_ids,
271  const T * x, const long * ids, size_t n)
272 {
273  heap_addn<CMin<T, long> > (k, bh_val, bh_ids, x, ids, n);
274 }
275 
276 template <typename T> inline
277 void maxheap_addn (size_t k, T * bh_val, long * bh_ids,
278  const T * x, const long * ids, size_t n)
279 {
280  heap_addn<CMax<T, long> > (k, bh_val, bh_ids, x, ids, n);
281 }
282 
283 
284 
285 
286 
287 
288 /*******************************************************************
289  * Heap finalization (reorder elements)
290  *******************************************************************/
291 
292 
293 /* This function maps a binary heap into an sorted structure.
294  It returns the number */
295 template <typename C> inline
296 size_t heap_reorder (size_t k, typename C::T * bh_val, typename C::TI * bh_ids)
297 {
298  size_t i, ii;
299 
300  for (i = 0, ii = 0; i < k; i++) {
301  /* top element should be put at the end of the list */
302  typename C::T val = bh_val[0];
303  typename C::TI id = bh_ids[0];
304 
305  /* boundary case: we will over-ride this value if not a true element */
306  heap_pop<C> (k-i, bh_val, bh_ids);
307  bh_val[k-ii-1] = val;
308  bh_ids[k-ii-1] = id;
309  if (id != -1) ii++;
310  }
311  /* Count the number of elements which are effectively returned */
312  size_t nel = ii;
313 
314  memmove (bh_val, bh_val+k-ii, ii * sizeof(*bh_val));
315  memmove (bh_ids, bh_ids+k-ii, ii * sizeof(*bh_ids));
316 
317  for (; ii < k; ii++) {
318  bh_val[ii] = C::neutral();
319  bh_ids[ii] = -1;
320  }
321  return nel;
322 }
323 
324 template <typename T> inline
325 size_t minheap_reorder (size_t k, T * bh_val, long * bh_ids)
326 {
327  return heap_reorder< CMin<T, long> > (k, bh_val, bh_ids);
328 }
329 
330 template <typename T> inline
331 size_t maxheap_reorder (size_t k, T * bh_val, long * bh_ids)
332 {
333  return heap_reorder< CMax<T, long> > (k, bh_val, bh_ids);
334 }
335 
336 
337 
338 
339 
340 /*******************************************************************
341  * Operations on heap arrays
342  *******************************************************************/
343 
344 /** a template structure for a set of [min|max]-heaps it is tailored
345  * so that the actual data of the heaps can just live in compact
346  * arrays.
347  */
348 template <typename C>
349 struct HeapArray {
350  typedef typename C::TI TI;
351  typedef typename C::T T;
352 
353  size_t nh; ///< number of heaps
354  size_t k; ///< allocated size per heap
355  TI * ids; ///< identifiers (size nh * k)
356  T * val; ///< values (distances or similarities), size nh * k
357 
358  /// Return the list of values for a heap
359  T * get_val (size_t key) { return val + key * k; }
360 
361  /// Correspponding identifiers
362  TI * get_ids (size_t key) { return ids + key * k; }
363 
364  /// prepare all the heaps before adding
365  void heapify ();
366 
367  /** add nj elements to heaps i0:i0+ni, with sequential ids
368  *
369  * @param nj nb of elements to add to each heap
370  * @param vin elements to add, size ni * nj
371  * @param j0 add this to the ids that are added
372  * @param i0 first heap to update
373  * @param ni nb of elements to update (-1 = use nh)
374  */
375  void addn (size_t nj, const T *vin, TI j0 = 0,
376  size_t i0 = 0, long ni = -1);
377 
378  /** same as addn
379  *
380  * @param id_in ids of the elements to add, size ni * nj
381  * @param id_stride stride for id_in
382  */
383  void addn_with_ids (
384  size_t nj, const T *vin, const TI *id_in = nullptr,
385  long id_stride = 0, size_t i0 = 0, long ni = -1);
386 
387  /// reorder all the heaps
388  void reorder ();
389 
390  /** this is not really a heap function. It just finds the per-line
391  * extrema of each line of array D
392  * @param vals_out extreme value of each line (size nh, or NULL)
393  * @param idx_out index of extreme value (size nh or NULL)
394  */
395  void per_line_extrema (T *vals_out, TI *idx_out) const;
396 
397 };
398 
399 
400 /* Define useful heaps */
401 typedef HeapArray<CMin<float, long> > float_minheap_array_t;
402 typedef HeapArray<CMin<int, long> > int_minheap_array_t;
403 
404 typedef HeapArray<CMax<float, long> > float_maxheap_array_t;
405 typedef HeapArray<CMax<int, long> > int_maxheap_array_t;
406 
407 // The heap templates are instanciated explicitly in Heap.cpp
408 
409 
410 
411 
412 
413 
414 
415 
416 
417 
418 
419 
420 
421 
422 
423 
424 
425 
426 
427 /*********************************************************************
428  * Indirect heaps: instead of having
429  *
430  * node i = (bh_ids[i], bh_val[i]),
431  *
432  * in indirect heaps,
433  *
434  * node i = (bh_ids[i], bh_val[bh_ids[i]]),
435  *
436  *********************************************************************/
437 
438 
439 template <class C>
440 inline
441 void indirect_heap_pop (
442  size_t k,
443  const typename C::T * bh_val,
444  typename C::TI * bh_ids)
445 {
446  bh_ids--; /* Use 1-based indexing for easier node->child translation */
447  typename C::T val = bh_val[bh_ids[k]];
448  size_t i = 1;
449  while (1) {
450  size_t i1 = i << 1;
451  size_t i2 = i1 + 1;
452  if (i1 > k)
453  break;
454  typename C::TI id1 = bh_ids[i1], id2 = bh_ids[i2];
455  if (i2 == k + 1 || C::cmp(bh_val[id1], bh_val[id2])) {
456  if (C::cmp(val, bh_val[id1]))
457  break;
458  bh_ids[i] = id1;
459  i = i1;
460  } else {
461  if (C::cmp(val, bh_val[id2]))
462  break;
463  bh_ids[i] = id2;
464  i = i2;
465  }
466  }
467  bh_ids[i] = bh_ids[k];
468 }
469 
470 
471 
472 template <class C>
473 inline
474 void indirect_heap_push (size_t k,
475  const typename C::T * bh_val, typename C::TI * bh_ids,
476  typename C::TI id)
477 {
478  bh_ids--; /* Use 1-based indexing for easier node->child translation */
479  typename C::T val = bh_val[id];
480  size_t i = k;
481  while (i > 1) {
482  size_t i_father = i >> 1;
483  if (!C::cmp (val, bh_val[bh_ids[i_father]]))
484  break;
485  bh_ids[i] = bh_ids[i_father];
486  i = i_father;
487  }
488  bh_ids[i] = id;
489 }
490 
491 
492 } // namespace faiss
493 
494 #endif /* FAISS_Heap_h */
T * get_val(size_t key)
Return the list of values for a heap.
Definition: Heap.h:359
void heap_pop(size_t k, typename C::T *bh_val, typename C::TI *bh_ids)
Definition: Heap.h:89
size_t k
allocated size per heap
Definition: Heap.h:354
void reorder()
reorder all the heaps
Definition: Heap.cpp:27
void per_line_extrema(T *vals_out, TI *idx_out) const
Definition: Heap.cpp:85
TI * ids
identifiers (size nh * k)
Definition: Heap.h:355
void addn_with_ids(size_t nj, const T *vin, const TI *id_in=nullptr, long id_stride=0, size_t i0=0, long ni=-1)
Definition: Heap.cpp:57
void heapify()
prepare all the heaps before adding
Definition: Heap.cpp:19
TI * get_ids(size_t key)
Correspponding identifiers.
Definition: Heap.h:362
T * val
values (distances or similarities), size nh * k
Definition: Heap.h:356
size_t nh
number of heaps
Definition: Heap.h:353
void heap_push(size_t k, typename C::T *bh_val, typename C::TI *bh_ids, typename C::T val, typename C::TI ids)
Definition: Heap.h:125
void addn(size_t nj, const T *vin, TI j0=0, size_t i0=0, long ni=-1)
Definition: Heap.cpp:35