/** * Copyright (c) Facebook, Inc. and its affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ // -*- c++ -*- /* * C++ support for heaps. The set of functions is tailored for * efficient similarity search. * * There is no specific object for a heap, and the functions that * operate on a signle heap are inlined, because heaps are often * small. More complex functions are implemented in Heaps.cpp * */ #ifndef FAISS_Heap_h #define FAISS_Heap_h #include #include #include #include #include #include namespace faiss { /******************************************************************* * C object: uniform handling of min and max heap *******************************************************************/ /** The C object gives the type T of the values in the heap, the type * of the keys, TI and the comparison that is done: > for the minheap * and < for the maxheap. The neutral value will always be dropped in * favor of any other value in the heap. */ template struct CMax; // traits of minheaps = heaps where the minimum value is stored on top // useful to find the *max* values of an array template struct CMin { typedef T_ T; typedef TI_ TI; typedef CMax Crev; inline static bool cmp (T a, T b) { return a < b; } // value that will be popped first -> must be smaller than all others // for int types this is not strictly the smallest val (-max - 1) inline static T neutral () { return -std::numeric_limits::max(); } }; template struct CMax { typedef T_ T; typedef TI_ TI; typedef CMin Crev; inline static bool cmp (T a, T b) { return a > b; } inline static T neutral () { return std::numeric_limits::max(); } }; /******************************************************************* * Basic heap ops: push and pop *******************************************************************/ /** Pops the top element from the heap defined by bh_val[0..k-1] and * bh_ids[0..k-1]. on output the element at k-1 is undefined. */ template inline void heap_pop (size_t k, typename C::T * bh_val, typename C::TI * bh_ids) { bh_val--; /* Use 1-based indexing for easier node->child translation */ bh_ids--; typename C::T val = bh_val[k]; size_t i = 1, i1, i2; while (1) { i1 = i << 1; i2 = i1 + 1; if (i1 > k) break; if (i2 == k + 1 || C::cmp(bh_val[i1], bh_val[i2])) { if (C::cmp(val, bh_val[i1])) break; bh_val[i] = bh_val[i1]; bh_ids[i] = bh_ids[i1]; i = i1; } else { if (C::cmp(val, bh_val[i2])) break; bh_val[i] = bh_val[i2]; bh_ids[i] = bh_ids[i2]; i = i2; } } bh_val[i] = bh_val[k]; bh_ids[i] = bh_ids[k]; } /** Pushes the element (val, ids) into the heap bh_val[0..k-2] and * bh_ids[0..k-2]. on output the element at k-1 is defined. */ template inline void heap_push (size_t k, typename C::T * bh_val, typename C::TI * bh_ids, typename C::T val, typename C::TI ids) { bh_val--; /* Use 1-based indexing for easier node->child translation */ bh_ids--; size_t i = k, i_father; while (i > 1) { i_father = i >> 1; if (!C::cmp (val, bh_val[i_father])) /* the heap structure is ok */ break; bh_val[i] = bh_val[i_father]; bh_ids[i] = bh_ids[i_father]; i = i_father; } bh_val[i] = val; bh_ids[i] = ids; } /* Partial instanciation for heaps with TI = long */ template inline void minheap_pop (size_t k, T * bh_val, long * bh_ids) { heap_pop > (k, bh_val, bh_ids); } template inline void minheap_push (size_t k, T * bh_val, long * bh_ids, T val, long ids) { heap_push > (k, bh_val, bh_ids, val, ids); } template inline void maxheap_pop (size_t k, T * bh_val, long * bh_ids) { heap_pop > (k, bh_val, bh_ids); } template inline void maxheap_push (size_t k, T * bh_val, long * bh_ids, T val, long ids) { heap_push > (k, bh_val, bh_ids, val, ids); } /******************************************************************* * Heap initialization *******************************************************************/ /* Initialization phase for the heap (with unconditionnal pushes). * Store k0 elements in a heap containing up to k values. Note that * (bh_val, bh_ids) can be the same as (x, ids) */ template inline void heap_heapify ( size_t k, typename C::T * bh_val, typename C::TI * bh_ids, const typename C::T * x = nullptr, const typename C::TI * ids = nullptr, size_t k0 = 0) { if (k0 > 0) assert (x); if (ids) { for (size_t i = 0; i < k0; i++) heap_push (i+1, bh_val, bh_ids, x[i], ids[i]); } else { for (size_t i = 0; i < k0; i++) heap_push (i+1, bh_val, bh_ids, x[i], i); } for (size_t i = k0; i < k; i++) { bh_val[i] = C::neutral(); bh_ids[i] = -1; } } template inline void minheap_heapify ( size_t k, T * bh_val, long * bh_ids, const T * x = nullptr, const long * ids = nullptr, size_t k0 = 0) { heap_heapify< CMin > (k, bh_val, bh_ids, x, ids, k0); } template inline void maxheap_heapify ( size_t k, T * bh_val, long * bh_ids, const T * x = nullptr, const long * ids = nullptr, size_t k0 = 0) { heap_heapify< CMax > (k, bh_val, bh_ids, x, ids, k0); } /******************************************************************* * Add n elements to the heap *******************************************************************/ /* Add some elements to the heap */ template inline void heap_addn (size_t k, typename C::T * bh_val, typename C::TI * bh_ids, const typename C::T * x, const typename C::TI * ids, size_t n) { size_t i; if (ids) for (i = 0; i < n; i++) { if (C::cmp (bh_val[0], x[i])) { heap_pop (k, bh_val, bh_ids); heap_push (k, bh_val, bh_ids, x[i], ids[i]); } } else for (i = 0; i < n; i++) { if (C::cmp (bh_val[0], x[i])) { heap_pop (k, bh_val, bh_ids); heap_push (k, bh_val, bh_ids, x[i], i); } } } /* Partial instanciation for heaps with TI = long */ template inline void minheap_addn (size_t k, T * bh_val, long * bh_ids, const T * x, const long * ids, size_t n) { heap_addn > (k, bh_val, bh_ids, x, ids, n); } template inline void maxheap_addn (size_t k, T * bh_val, long * bh_ids, const T * x, const long * ids, size_t n) { heap_addn > (k, bh_val, bh_ids, x, ids, n); } /******************************************************************* * Heap finalization (reorder elements) *******************************************************************/ /* This function maps a binary heap into an sorted structure. It returns the number */ template inline size_t heap_reorder (size_t k, typename C::T * bh_val, typename C::TI * bh_ids) { size_t i, ii; for (i = 0, ii = 0; i < k; i++) { /* top element should be put at the end of the list */ typename C::T val = bh_val[0]; typename C::TI id = bh_ids[0]; /* boundary case: we will over-ride this value if not a true element */ heap_pop (k-i, bh_val, bh_ids); bh_val[k-ii-1] = val; bh_ids[k-ii-1] = id; if (id != -1) ii++; } /* Count the number of elements which are effectively returned */ size_t nel = ii; memmove (bh_val, bh_val+k-ii, ii * sizeof(*bh_val)); memmove (bh_ids, bh_ids+k-ii, ii * sizeof(*bh_ids)); for (; ii < k; ii++) { bh_val[ii] = C::neutral(); bh_ids[ii] = -1; } return nel; } template inline size_t minheap_reorder (size_t k, T * bh_val, long * bh_ids) { return heap_reorder< CMin > (k, bh_val, bh_ids); } template inline size_t maxheap_reorder (size_t k, T * bh_val, long * bh_ids) { return heap_reorder< CMax > (k, bh_val, bh_ids); } /******************************************************************* * Operations on heap arrays *******************************************************************/ /** a template structure for a set of [min|max]-heaps it is tailored * so that the actual data of the heaps can just live in compact * arrays. */ template struct HeapArray { typedef typename C::TI TI; typedef typename C::T T; size_t nh; ///< number of heaps size_t k; ///< allocated size per heap TI * ids; ///< identifiers (size nh * k) T * val; ///< values (distances or similarities), size nh * k /// Return the list of values for a heap T * get_val (size_t key) { return val + key * k; } /// Correspponding identifiers TI * get_ids (size_t key) { return ids + key * k; } /// prepare all the heaps before adding void heapify (); /** add nj elements to heaps i0:i0+ni, with sequential ids * * @param nj nb of elements to add to each heap * @param vin elements to add, size ni * nj * @param j0 add this to the ids that are added * @param i0 first heap to update * @param ni nb of elements to update (-1 = use nh) */ void addn (size_t nj, const T *vin, TI j0 = 0, size_t i0 = 0, long ni = -1); /** same as addn * * @param id_in ids of the elements to add, size ni * nj * @param id_stride stride for id_in */ void addn_with_ids ( size_t nj, const T *vin, const TI *id_in = nullptr, long id_stride = 0, size_t i0 = 0, long ni = -1); /// reorder all the heaps void reorder (); /** this is not really a heap function. It just finds the per-line * extrema of each line of array D * @param vals_out extreme value of each line (size nh, or NULL) * @param idx_out index of extreme value (size nh or NULL) */ void per_line_extrema (T *vals_out, TI *idx_out) const; }; /* Define useful heaps */ typedef HeapArray > float_minheap_array_t; typedef HeapArray > int_minheap_array_t; typedef HeapArray > float_maxheap_array_t; typedef HeapArray > int_maxheap_array_t; // The heap templates are instanciated explicitly in Heap.cpp /********************************************************************* * Indirect heaps: instead of having * * node i = (bh_ids[i], bh_val[i]), * * in indirect heaps, * * node i = (bh_ids[i], bh_val[bh_ids[i]]), * *********************************************************************/ template inline void indirect_heap_pop ( size_t k, const typename C::T * bh_val, typename C::TI * bh_ids) { bh_ids--; /* Use 1-based indexing for easier node->child translation */ typename C::T val = bh_val[bh_ids[k]]; size_t i = 1; while (1) { size_t i1 = i << 1; size_t i2 = i1 + 1; if (i1 > k) break; typename C::TI id1 = bh_ids[i1], id2 = bh_ids[i2]; if (i2 == k + 1 || C::cmp(bh_val[id1], bh_val[id2])) { if (C::cmp(val, bh_val[id1])) break; bh_ids[i] = id1; i = i1; } else { if (C::cmp(val, bh_val[id2])) break; bh_ids[i] = id2; i = i2; } } bh_ids[i] = bh_ids[k]; } template inline void indirect_heap_push (size_t k, const typename C::T * bh_val, typename C::TI * bh_ids, typename C::TI id) { bh_ids--; /* Use 1-based indexing for easier node->child translation */ typename C::T val = bh_val[id]; size_t i = k; while (i > 1) { size_t i_father = i >> 1; if (!C::cmp (val, bh_val[bh_ids[i_father]])) break; bh_ids[i] = bh_ids[i_father]; i = i_father; } bh_ids[i] = id; } } // namespace faiss #endif /* FAISS_Heap_h */