# Copyright (C) 2022-2025 Exaloop Inc. from .ndarray import ndarray from .routines import asarray, broadcast_to, full, copyto, round, moveaxis, concatenate, take, atleast_1d from .ndmath import isnan as math_isnan, floor, ceil, subtract, add, true_divide from .lib.arraysetops import unique import util newaxis = None def _check_out(out, shape): if not isinstance(out, ndarray): compile_error("output must be an array") if out.ndim != staticlen(shape): compile_error("output parameter has the wrong number of dimensions") if out.shape != shape: raise ValueError("output parameter has incorrect shape") def _float(x): T = type(x) if (T is float or T is float32 or T is float16 or T is complex or T is complex64): return x else: return util.cast(x, float) def _nan(T: type): if T is float or T is float32 or T is float16: return util.nan(T) elif T is complex: return complex(util.nan64(), util.nan64()) elif T is complex64: return complex64(util.nan32(), util.nan32()) else: compile_error("[internal error] no nan for type " + T.__name__) def _isnan(x): T = type(x) if T is float or T is float32 or T is float16: return util.isnan(x) else: return math_isnan(x) def _supports_nan(T: type): return (T is float or T is float32 or T is float16 or T is float128 or T is bfloat16 or T is complex or T is complex64) def _nan_to_back(v: Ptr[T], n: int, T: type): if _supports_nan(T): fill_index = 0 while fill_index < n and not _isnan(v[fill_index]): fill_index += 1 for i in range(fill_index + 1, n): e = v[i] if not _isnan(e): v[fill_index] = e fill_index += 1 return fill_index else: return n def _make_reducer(R, ans_type: type, dtype: type, conv_to_float: Static[int], bool_to_int: Static[int], **kwargs): if dtype is NoneType: if conv_to_float: ftype = type(_float(ans_type())) return R(ftype, **kwargs) elif bool_to_int and ans_type is bool: return R(int, **kwargs) else: return R(ans_type, **kwargs) else: if conv_to_float: ftype = type(_float(dtype())) return R(ftype, **kwargs) else: return R(dtype, **kwargs) def _cast_elem(e0, dtype: type, conv_to_float: Static[int]): if dtype is not NoneType: e1 = util.cast(e0, dtype) else: e1 = e0 if conv_to_float: e2 = _float(e1) else: e2 = e1 return e2 def _increment_ptr(p: Ptr[T], stride: int, T: type): return Ptr[T](p.as_byte() + stride) def _where_to_array(where, arr): if where is None or isinstance(where, util._NoValue): return None else: return broadcast_to(asarray(where), arr.shape) def _pairwise_sum_complex(a: Ptr[C], n: int, stride: int, C: type): PW_BLOCKSIZE: Static[int] = 128 T = type(C().real) sz = T.__elemsize__ p = a.as_byte() if n < 8: rr = T(-0.0) ri = T(-0.0) for i in range(0, n, 2): rr += Ptr[T](p + i * stride + 0)[0] ri += Ptr[T](p + i * stride + sz)[0] return C(rr, ri) elif n <= PW_BLOCKSIZE: r0 = Ptr[T](p + 0 * stride)[0] r1 = Ptr[T](p + 0 * stride + sz)[0] r2 = Ptr[T](p + 2 * stride)[0] r3 = Ptr[T](p + 2 * stride + sz)[0] r4 = Ptr[T](p + 4 * stride)[0] r5 = Ptr[T](p + 4 * stride + sz)[0] r6 = Ptr[T](p + 6 * stride)[0] r7 = Ptr[T](p + 6 * stride + sz)[0] i = 8 while i < n - (n & 7): (p + (i + 512//sz)*stride).__prefetch_r3__() r0 += Ptr[T](p + (i + 0) * stride)[0] r1 += Ptr[T](p + (i + 0) * stride + sz)[0] r2 += Ptr[T](p + (i + 2) * stride)[0] r3 += Ptr[T](p + (i + 2) * stride + sz)[0] r4 += Ptr[T](p + (i + 4) * stride)[0] r5 += Ptr[T](p + (i + 4) * stride + sz)[0] r6 += Ptr[T](p + (i + 6) * stride)[0] r7 += Ptr[T](p + (i + 6) * stride + sz)[0] i += 8 rr = (r0 + r2) + (r4 + r6) ri = (r1 + r3) + (r5 + r7) while i < n: rr += Ptr[T](p + i * stride + 0)[0] ri += Ptr[T](p + i * stride + sz)[0] i += 2 return C(rr, ri) else: n2 = n >> 1 n2 -= n2 & 7 return (_pairwise_sum_complex(a, n2, stride) + _pairwise_sum_complex(Ptr[C](p + n2 * stride), n - n2, stride)) def _pairwise_sum(a: Ptr[T], n: int, stride: int, T: type, dtype: type = NoneType): if T is complex or T is complex64: return _pairwise_sum_complex(a, n << 1, stride >> 1) if dtype is NoneType: return _pairwise_sum(a, n, stride, dtype=T) PW_BLOCKSIZE: Static[int] = 128 p = a.as_byte() if n < 8: res = util.cast(T(-0.0), dtype) for i in range(n): res += util.cast(Ptr[T](p + i * stride)[0], dtype) return res elif n <= PW_BLOCKSIZE: r0 = util.cast(Ptr[T](p + 0 * stride)[0], dtype) r1 = util.cast(Ptr[T](p + 1 * stride)[0], dtype) r2 = util.cast(Ptr[T](p + 2 * stride)[0], dtype) r3 = util.cast(Ptr[T](p + 3 * stride)[0], dtype) r4 = util.cast(Ptr[T](p + 4 * stride)[0], dtype) r5 = util.cast(Ptr[T](p + 5 * stride)[0], dtype) r6 = util.cast(Ptr[T](p + 6 * stride)[0], dtype) r7 = util.cast(Ptr[T](p + 7 * stride)[0], dtype) i = 8 while i < n - (n & 7): (p + (i + 512//T.__elemsize__)*stride).__prefetch_r3__() r0 += util.cast(Ptr[T](p + (i + 0) * stride)[0], dtype) r1 += util.cast(Ptr[T](p + (i + 1) * stride)[0], dtype) r2 += util.cast(Ptr[T](p + (i + 2) * stride)[0], dtype) r3 += util.cast(Ptr[T](p + (i + 3) * stride)[0], dtype) r4 += util.cast(Ptr[T](p + (i + 4) * stride)[0], dtype) r5 += util.cast(Ptr[T](p + (i + 5) * stride)[0], dtype) r6 += util.cast(Ptr[T](p + (i + 6) * stride)[0], dtype) r7 += util.cast(Ptr[T](p + (i + 7) * stride)[0], dtype) i += 8 res = ((r0 + r1) + (r2 + r3)) + ((r4 + r5) + (r6 + r7)) while i < n: res += util.cast(Ptr[T](p + i * stride)[0], dtype) i += 1 return res else: n2 = n >> 1 n2 -= n2 & 7 return (_pairwise_sum(a, n2, stride, dtype=dtype) + _pairwise_sum(Ptr[T](p + n2 * stride), n - n2, stride, dtype=dtype)) def _empty_like(arr, shape, dtype: type): fcontig = arr._should_transpose() p = Ptr[dtype](util.count(shape)) return ndarray(shape, p, fcontig=fcontig) def _reduce_all(arr, R, empty, dtype: type = NoneType, out=None, keepdims: Static[int] = False, where=util._NoValue(), conv_to_float: Static[int] = False, bool_to_int: Static[int] = False, **kwargs): if out is not None: if keepdims: _check_out(out, (1, ) * arr.ndim) else: _check_out(out, ()) n = arr.size p = arr.data shape = arr.shape strides = arr.strides if empty is not None: if n == 0: empty(**kwargs) where = _where_to_array(where, arr) redux = _make_reducer(R, arr.dtype, dtype, conv_to_float, bool_to_int, **kwargs) i = 0 if where is None: if arr.ndim == 1: stride = strides[0] if hasattr(redux, "loop"): redux.loop(p, n, stride, partial=False) i = n else: while i < n: e = _cast_elem(p[0], dtype, conv_to_float) redux.accept(e, i) if redux.done(): break p = _increment_ptr(p, stride) i += 1 else: if arr._is_contig: if hasattr(redux, "loop"): redux.loop(p, n, arr.itemsize, partial=False) i = n else: while i < n: e = _cast_elem(p[i], dtype, conv_to_float) redux.accept(e, i) if redux.done(): break i += 1 else: if hasattr(redux, "loop") and arr.ndim > 0: loop_axis = -1 min_abs_stride = 0x7FFFFFFFFFFFFFFF for i in staticrange(staticlen(arr.ndim)): stride = strides[i] if stride: abs_stride = abs(stride) if abs_stride < min_abs_stride: loop_axis = i min_abs_stride = abs_stride if loop_axis == -1: loop_axis = arr.ndim - 1 outer_loop_shape = util.tuple_delete(shape, loop_axis) loop_size = util.tuple_get(shape, loop_axis) loop_stride = util.tuple_get(strides, loop_axis) for idx in util.multirange(outer_loop_shape): idx1 = util.tuple_insert(idx, loop_axis, 0) q = arr._ptr(idx1) redux.loop(q, loop_size, loop_stride, partial=True) i = loop_size * util.count(outer_loop_shape) else: A = arr.T if arr._should_transpose() else arr for idx in util.multirange(A.shape): e = _cast_elem(A._ptr(idx)[0], dtype, conv_to_float) redux.accept(e, i) if redux.done(): break i += 1 else: if arr._contig_match(where): w = where.data for k in range(n): if not w[k]: continue e = _cast_elem(p[k], dtype, conv_to_float) redux.accept(e, i) if redux.done(): break i += 1 else: transpose = arr._should_transpose(where) A = arr W = where if transpose: A = A.T W = W.T for idx in util.multirange(A.shape): if not W._ptr(idx)[0]: continue e = _cast_elem(A._ptr(idx)[0], dtype, conv_to_float) redux.accept(e, i) if redux.done(): break i += 1 ans = redux.result(i) if out is not None: out.data[0] = util.cast(ans, out.dtype) return out else: if keepdims: return asarray(ans).reshape((1, ) * arr.ndim) else: return ans @tuple class _GradualFunctor: redux: R k: int kwargs: KW dtype: type conv_to_float: Static[int] R: type KW: type def __new__(redux: R, k: int, dtype: type, conv_to_float: Static[int], kwargs: KW, R: type, KW: type) -> _GradualFunctor[dtype, conv_to_float, R, KW]: return (redux, k, kwargs) def __call__(self, q, p): e = _cast_elem(p[0], self.dtype, self.conv_to_float) q[0] = util.cast( self.redux.gradual_accept(q[0], e, self.k, **self.kwargs), type(q[0])) def _reduce_gradual(arr, R, empty, axis=None, dtype: type = NoneType, out=None, keepdims: Static[int] = False, conv_to_float: Static[int] = False, bool_to_int: Static[int] = False, **kwargs): data = arr.data shape = arr.shape strides = arr.strides ax = axis redux = _make_reducer(R, arr.dtype, dtype, conv_to_float, bool_to_int, **kwargs) if hasattr(redux, "gradual_init"): init_value = redux.gradual_init(**kwargs) else: init_value = None if staticlen(ax) == 1: ax0 = ax[0] length = util.tuple_get(shape, ax0) stride = util.tuple_get(arr.strides, ax0) iter_shape = util.tuple_delete(shape, ax0) sub_strides = util.tuple_delete(arr.strides, ax0) if keepdims: ans_shape = util.tuple_set(shape, ax0, 1) else: ans_shape = iter_shape if out is None: out_type = type(redux.result(0)) ans = _empty_like(arr, iter_shape, out_type) else: _check_out(out, ans_shape) if keepdims: sub_ans_strides = util.tuple_delete(out.strides, ax0) ans = ndarray(iter_shape, sub_ans_strides, out.data) else: ans = out if init_value is not None: for i in range(ans.size): ans.data[i] = util.cast(init_value, ans.dtype) for k in range(length): sub_arr_ptr = _increment_ptr(arr.data, k * stride) sub_arr = ndarray(iter_shape, sub_strides, sub_arr_ptr) fn = _GradualFunctor(redux, k, dtype, conv_to_float, kwargs) ndarray._loop((ans, sub_arr), fn, broadcast='none') if hasattr(redux, "gradual_result"): ans.map(lambda e: redux.gradual_result(e, length), inplace=True) if out is not None: return out elif keepdims: return ans.reshape(ans_shape) elif ans.ndim == 0: return ans.item() else: return ans new_shape = (0, ) * (staticlen(shape) - staticlen(ax)) idx_bound = (0, ) * staticlen(ax) out_strides = (0, ) * (staticlen(shape) - staticlen(ax)) sub_strides = (0, ) * staticlen(ax) mask = (False, ) * staticlen(shape) ptr_new_shape = Ptr[int](__ptr__(new_shape).as_byte()) ptr_idx_bound = Ptr[int](__ptr__(idx_bound).as_byte()) ptr_out_strides = Ptr[int](__ptr__(out_strides).as_byte()) ptr_sub_strides = Ptr[int](__ptr__(sub_strides).as_byte()) ptr_mask = Ptr[bool](__ptr__(mask).as_byte()) shape_size = 1 bound_size = 1 a = 0 b = 0 for i in staticrange(staticlen(shape)): s = shape[i] stride = strides[i] if i in ax: bound_size *= s ptr_idx_bound[a] = s ptr_sub_strides[a] = stride ptr_mask[i] = False a += 1 else: shape_size *= s ptr_new_shape[b] = s ptr_out_strides[b] = stride ptr_mask[i] = True b += 1 if keepdims: ones = (1, ) * staticlen(idx_bound) ans_shape = util.reconstruct_index(new_shape, ones, mask) else: ans_shape = new_shape redux = _make_reducer(R, arr.dtype, dtype, conv_to_float, bool_to_int, **kwargs) k = 0 if out is None: out_type = type(redux.result(0)) ans = _empty_like(arr, new_shape, out_type) else: _check_out(out, ans_shape) if keepdims: sub_ans_strides = (0, ) * staticlen(new_shape) ptr_sub_ans_strides = Ptr[int](__ptr__(sub_ans_strides).as_byte()) a = 0 for i in staticrange(out.ndim): if i not in ax: ptr_sub_ans_strides[a] = out.strides[i] a += 1 ans = ndarray(new_shape, sub_ans_strides, out.data) else: ans = out if init_value is not None: for i in range(ans.size): ans.data[i] = util.cast(init_value, ans.dtype) if arr._should_transpose(): idx_bound = idx_bound[::-1] sub_strides = sub_strides[::-1] for t2 in util.multirange(idx_bound): offset = 0 for i in staticrange(staticlen(sub_strides)): offset += sub_strides[i] * t2[i] sub_arr_ptr = _increment_ptr(arr.data, offset) sub_arr = ndarray(new_shape, out_strides, sub_arr_ptr) fn = _GradualFunctor(redux, k, dtype, conv_to_float, kwargs) ndarray._loop((ans, sub_arr), fn, broadcast='none') k += 1 if hasattr(redux, "gradual_result"): ans.map(lambda e: redux.gradual_result(e, bound_size), inplace=True) if out is not None: return out elif keepdims: return ans.reshape(ans_shape) elif ans.ndim == 0: return ans.item() else: return ans def _reduce(arr, R, empty, axis=None, dtype: type = NoneType, out=None, keepdims: Static[int] = False, where=util._NoValue(), conv_to_float: Static[int] = False, bool_to_int: Static[int] = False, **kwargs): data = arr.data shape = arr.shape strides = arr.strides # Strangely, NumPy supports this, so we do too... if arr.ndim == 0 and isinstance(axis, int): if axis != 0 and axis != -1: util.normalize_axis_index(axis=axis, ndim=0) # raises error return _reduce(arr, R=R, empty=empty, axis=None, dtype=dtype, out=out, keepdims=keepdims, where=where, conv_to_float=conv_to_float, bool_to_int=bool_to_int, **kwargs) if axis is None: ax = util.tuple_range(arr.ndim) elif isinstance(axis, int): ax = (util.normalize_axis_index(axis, arr.ndim), ) else: ax = util.normalize_axis_tuple(axis, arr.ndim) if staticlen(ax) == staticlen(shape): return _reduce_all(arr, R=R, empty=empty, dtype=dtype, out=out, keepdims=keepdims, where=where, conv_to_float=conv_to_float, bool_to_int=bool_to_int, **kwargs) if empty is not None: if arr.size == 0: empty(**kwargs) new_shape = (0, ) * (staticlen(shape) - staticlen(ax)) idx_bound = (0, ) * staticlen(ax) out_strides = (0, ) * (staticlen(shape) - staticlen(ax)) sub_strides = (0, ) * staticlen(ax) mask = (False, ) * staticlen(shape) ptr_new_shape = Ptr[int](__ptr__(new_shape).as_byte()) ptr_idx_bound = Ptr[int](__ptr__(idx_bound).as_byte()) ptr_out_strides = Ptr[int](__ptr__(out_strides).as_byte()) ptr_sub_strides = Ptr[int](__ptr__(sub_strides).as_byte()) ptr_mask = Ptr[bool](__ptr__(mask).as_byte()) shape_size = 1 bound_size = 1 min_stride_bound = -1 min_stride_shape = -1 a = 0 b = 0 for i in staticrange(staticlen(shape)): s = shape[i] stride = strides[i] if i in ax: bound_size *= s ptr_idx_bound[a] = s ptr_sub_strides[a] = stride ptr_mask[i] = False if (stride and (min_stride_bound == -1 or abs(stride) < min_stride_bound)): min_stride_bound = stride a += 1 else: shape_size *= s ptr_new_shape[b] = s ptr_out_strides[b] = stride ptr_mask[i] = True if (stride and (min_stride_shape == -1 or abs(stride) < min_stride_shape)): min_stride_shape = stride b += 1 if hasattr(type( _make_reducer(R, arr.dtype, dtype, conv_to_float, bool_to_int, **kwargs)), "gradual_init"): if (out is None and (where is None or isinstance(where, util._NoValue)) and arr.ndim > 1 and min_stride_shape > 0 and min_stride_shape < min_stride_bound): return _reduce_gradual(arr, R=R, empty=empty, axis=ax, dtype=dtype, out=out, keepdims=keepdims, conv_to_float=conv_to_float, bool_to_int=bool_to_int, **kwargs) if keepdims: ones = (1, ) * staticlen(idx_bound) ans_shape = util.reconstruct_index(new_shape, ones, mask) else: ans_shape = new_shape if out is None: out_type = type( _make_reducer(R, arr.dtype, dtype, conv_to_float, bool_to_int, **kwargs).result(0)) ans = _empty_like(arr, new_shape, out_type) else: _check_out(out, ans_shape) if keepdims: if staticlen(ax) == 1: sub_ans_strides = util.tuple_delete(out.strides, ax[0]) else: sub_ans_strides = (0, ) * staticlen(new_shape) ptr_sub_ans_strides = Ptr[int](__ptr__(sub_ans_strides).as_byte()) a = 0 for i in staticrange(out.ndim): if i not in ax: ptr_sub_ans_strides[a] = out.strides[i] a += 1 ans = ndarray(new_shape, sub_ans_strides, out.data) else: ans = out where = _where_to_array(where, arr) where_out_shape = (0, ) * (staticlen(shape) - staticlen(ax)) where_sub_shape = (0, ) * staticlen(ax) where_out_strides = (0, ) * (staticlen(shape) - staticlen(ax)) where_sub_strides = (0, ) * staticlen(ax) if where is not None and staticlen(shape) > 0: ptr_where_out_shape = Ptr[int](__ptr__(where_out_shape).as_byte()) ptr_where_sub_shape = Ptr[int](__ptr__(where_sub_shape).as_byte()) ptr_where_out_strides = Ptr[int](__ptr__(where_out_strides).as_byte()) ptr_where_sub_strides = Ptr[int](__ptr__(where_sub_strides).as_byte()) a = 0 b = 0 for i in staticrange(staticlen(shape)): if i in ax: ptr_where_sub_shape[a] = where.shape[i] ptr_where_sub_strides[a] = where.strides[i] a += 1 else: ptr_where_out_shape[b] = where.shape[i] ptr_where_out_strides[b] = where.strides[i] b += 1 if arr._should_transpose(): new_shape = new_shape[::-1] out_strides = out_strides[::-1] where_out_strides = where_out_strides[::-1] ans1 = ndarray(ans.shape, ans.strides[::-1], ans.data) else: ans1 = ans for idx in util.multirange(new_shape): offset = 0 for i in staticrange(staticlen(out_strides)): offset += out_strides[i] * idx[i] sub_arr_ptr = _increment_ptr(arr.data, offset) sub_arr = ndarray(idx_bound, sub_strides, sub_arr_ptr) if where is None: sub_where = None else: offset = 0 for i in staticrange(staticlen(where_out_strides)): offset += where_out_strides[i] * idx[i] sub_where_ptr = _increment_ptr(where.data, offset) sub_where = ndarray(where_sub_shape, where_sub_strides, sub_where_ptr) sub_rdx = _reduce_all(sub_arr, R=R, empty=None, dtype=dtype, out=None, keepdims=False, where=sub_where, conv_to_float=conv_to_float, bool_to_int=bool_to_int, **kwargs) ans1._ptr(idx)[0] = util.cast(sub_rdx, ans.dtype) if out is not None: return out elif keepdims: return ans.reshape(ans_shape) elif ans.ndim == 0: return ans.item() else: return ans class _FlattenFunctor: buffer: Ptr[dtype] k: int dtype: type def __init__(self, buffer: Ptr[dtype]): self.buffer = buffer self.k = 0 def __call__(self, x): self.buffer[self.k] = x[0] self.k += 1 class _FlattenWhereFunctor: buffer: Ptr[dtype] k: int dtype: type def __init__(self, buffer: Ptr[dtype]): self.buffer = buffer self.k = 0 def __call__(self, x, w): if w[0]: self.buffer[self.k] = x[0] self.k += 1 def _reduce_buffered(arr, reducer, dtype: type, axis=None, out=None, overwrite_input: bool = False, force_contig: bool = True, keepdims: Static[int] = False, where=util._NoValue(), **kwargs): data = arr.data shape = arr.shape strides = arr.strides where = _where_to_array(where, arr) if axis is None: ax = util.tuple_range(arr.ndim) elif isinstance(axis, int): ax = (util.normalize_axis_index(axis, arr.ndim), ) else: ax = util.normalize_axis_tuple(axis, arr.ndim) if staticlen(ax) == staticlen(shape): sz = arr.size if arr._is_contig and overwrite_input and where is None: result = reducer(arr.data, util.sizeof(arr.dtype), sz, dtype, **kwargs) else: buffer = Ptr[arr.dtype](sz) if where is None: fn = _FlattenFunctor(buffer) ndarray._loop((arr,), fn, broadcast='none') n = sz else: fn = _FlattenWhereFunctor(buffer) ndarray._loop((arr, where), fn, broadcast='none') n = fn.k result = reducer(buffer, util.sizeof(arr.dtype), n, dtype, **kwargs) util.free(buffer) if out is None: if keepdims: return asarray(result).reshape((1, ) * arr.ndim) else: return result else: if keepdims: _check_out(out, (1, ) * arr.ndim) else: _check_out(out, ()) out.data[0] = util.cast(result, out.dtype) return out new_shape = (0, ) * (staticlen(shape) - staticlen(ax)) idx_bound = (0, ) * staticlen(ax) out_strides = (0, ) * (staticlen(shape) - staticlen(ax)) stride_bound = (0, ) * staticlen(ax) mask = (False, ) * staticlen(shape) ptr_new_shape = Ptr[int](__ptr__(new_shape).as_byte()) ptr_idx_bound = Ptr[int](__ptr__(idx_bound).as_byte()) ptr_out_strides = Ptr[int](__ptr__(out_strides).as_byte()) ptr_stride_bound = Ptr[int](__ptr__(stride_bound).as_byte()) ptr_mask = Ptr[bool](__ptr__(mask).as_byte()) shape_size = 1 bound_size = 1 a = 0 b = 0 for i in staticrange(staticlen(shape)): s = shape[i] stride = strides[i] if i in ax: bound_size *= s ptr_idx_bound[a] = s ptr_stride_bound[a] = stride ptr_mask[i] = False a += 1 else: shape_size *= s ptr_new_shape[b] = s ptr_out_strides[b] = stride ptr_mask[i] = True b += 1 if keepdims: ones = (1, ) * staticlen(idx_bound) ans_shape = util.reconstruct_index(new_shape, ones, mask) else: ans_shape = new_shape if out is None: out_type = type(reducer(Ptr[arr.dtype](), 0, 0, dtype, **kwargs)) ans = _empty_like(arr, new_shape, out_type) else: _check_out(out, ans_shape) if keepdims: if staticlen(ax) == 1: sub_ans_strides = util.tuple_delete(out.strides, ax[0]) else: sub_ans_strides = (0, ) * staticlen(new_shape) ptr_sub_ans_strides = Ptr[int](__ptr__(sub_ans_strides).as_byte()) a = 0 for i in staticrange(out.ndim): if i not in ax: ptr_sub_ans_strides[a] = out.strides[i] a += 1 ans = ndarray(new_shape, sub_ans_strides, out.data) else: ans = out inplace = False stride = 0 if where is not None or not overwrite_input: inplace = False stride = util.sizeof(arr.dtype) else: if staticlen(ax) == 1: inplace = True stride = stride_bound[0] else: if stride_bound == util.strides(idx_bound, False, arr.dtype): inplace = True stride = stride_bound[-1] elif stride_bound == util.strides(idx_bound, True, arr.dtype): inplace = True stride = stride_bound[0] else: inplace = False stride = util.sizeof(arr.dtype) if force_contig and stride != util.sizeof(arr.dtype): inplace = False stride = util.sizeof(arr.dtype) where_out_shape = (0, ) * (staticlen(shape) - staticlen(ax)) where_sub_shape = (0, ) * staticlen(ax) where_out_strides = (0, ) * (staticlen(shape) - staticlen(ax)) where_sub_strides = (0, ) * staticlen(ax) if where is not None and staticlen(shape) > 0: ptr_where_out_shape = Ptr[int](__ptr__(where_out_shape).as_byte()) ptr_where_sub_shape = Ptr[int](__ptr__(where_sub_shape).as_byte()) ptr_where_out_strides = Ptr[int](__ptr__(where_out_strides).as_byte()) ptr_where_sub_strides = Ptr[int](__ptr__(where_sub_strides).as_byte()) a = 0 b = 0 for i in staticrange(staticlen(shape)): if i in ax: ptr_where_sub_shape[a] = where.shape[i] ptr_where_sub_strides[a] = where.strides[i] a += 1 else: ptr_where_out_shape[b] = where.shape[i] ptr_where_out_strides[b] = where.strides[i] b += 1 buffer = Ptr[arr.dtype]() if inplace else Ptr[arr.dtype](bound_size) if arr._should_transpose(): new_shape = new_shape[::-1] out_strides = out_strides[::-1] where_out_strides = where_out_strides[::-1] ans1 = ndarray(ans.shape, ans.strides[::-1], ans.data) else: ans1 = ans for idx in util.multirange(new_shape): n = bound_size offset = 0 for i in staticrange(staticlen(out_strides)): offset += out_strides[i] * idx[i] sub_arr_ptr = _increment_ptr(arr.data, offset) if inplace: data_ptr = sub_arr_ptr else: sub_arr = ndarray(idx_bound, stride_bound, sub_arr_ptr) if where is None: fn = _FlattenFunctor(buffer) ndarray._loop((sub_arr,), fn, broadcast='none') else: offset = 0 for i in staticrange(staticlen(where_out_strides)): offset += where_out_strides[i] * idx[i] sub_where_ptr = _increment_ptr(where.data, offset) sub_where = ndarray(where_sub_shape, where_sub_strides, sub_where_ptr) fn = _FlattenWhereFunctor(buffer) ndarray._loop((sub_arr, sub_where), fn, broadcast='none') n = fn.k data_ptr = buffer result = reducer(data_ptr, stride, n, dtype, **kwargs) ans1._ptr(idx)[0] = util.cast(result, ans.dtype) if not inplace: util.free(buffer) if out is not None: return out elif keepdims: return ans.reshape(ans_shape) elif ans.ndim == 0: return ans.item() else: return ans def _reduce_buffered_multi(arr, reducer, multi_num: int, dtype: type, axis=None, out=None, overwrite_input: bool = False, force_contig: bool = True, keepdims: Static[int] = False, where=util._NoValue(), **kwargs): data = arr.data shape = arr.shape strides = arr.strides where = _where_to_array(where, arr) if axis is None: ax = util.tuple_range(arr.ndim) elif isinstance(axis, int): ax = (util.normalize_axis_index(axis, arr.ndim), ) else: ax = util.normalize_axis_tuple(axis, arr.ndim) new_shape = (0, ) * (staticlen(shape) - staticlen(ax)) idx_bound = (0, ) * staticlen(ax) stride_bound = (0, ) * staticlen(ax) mask = (False, ) * staticlen(shape) ptr_new_shape = Ptr[int](__ptr__(new_shape).as_byte()) ptr_idx_bound = Ptr[int](__ptr__(idx_bound).as_byte()) ptr_stride_bound = Ptr[int](__ptr__(stride_bound).as_byte()) ptr_mask = Ptr[bool](__ptr__(mask).as_byte()) shape_size = 1 bound_size = 1 a = 0 b = 0 for i in staticrange(staticlen(shape)): s = shape[i] if i in ax: bound_size *= s ptr_idx_bound[a] = s ptr_stride_bound[a] = strides[i] ptr_mask[i] = False a += 1 else: shape_size *= s ptr_new_shape[b] = s ptr_mask[i] = True b += 1 if keepdims: ones = (1, ) * staticlen(idx_bound) ans_shape = (multi_num, ) + util.reconstruct_index( new_shape, ones, mask) else: ans_shape = (multi_num, ) + new_shape if out is None: ans = _empty_like(arr, ans_shape, dtype) else: _check_out(out, ans_shape) ans = out if staticlen(ax) == staticlen(shape): sz = arr.size if arr._is_contig and overwrite_input and where is None: reducer(arr.data, util.sizeof(arr.dtype), sz, ans.data, dtype, **kwargs) else: buffer = Ptr[arr.dtype](sz) k = 0 for idx in util.multirange(shape): if where is not None: if not where._ptr(idx)[0]: continue buffer[k] = arr._ptr(idx)[0] k += 1 n = sz if where is None else k reducer(buffer, util.sizeof(arr.dtype), n, ans.data, dtype, **kwargs) util.free(buffer) return ans inplace = False stride = 0 if where is not None or not overwrite_input: inplace = False stride = util.sizeof(arr.dtype) else: if staticlen(ax) == 1: inplace = True stride = stride_bound[0] else: if stride_bound == util.strides(idx_bound, False, arr.dtype): inplace = True stride = stride_bound[-1] elif stride_bound == util.strides(idx_bound, True, arr.dtype): inplace = True stride = stride_bound[0] else: inplace = False stride = util.sizeof(arr.dtype) if force_contig and stride != util.sizeof(arr.dtype): inplace = False stride = util.sizeof(arr.dtype) buffer = Ptr[arr.dtype]() if inplace else Ptr[arr.dtype](bound_size) out_buffer = Ptr[ans.dtype](multi_num) for t1 in util.multirange(new_shape): n = bound_size if inplace: idx = util.reconstruct_index(t1, (0, ) * staticlen(idx_bound), mask) subdata = arr._ptr(idx) else: k = 0 for t2 in util.multirange(idx_bound): idx = util.reconstruct_index(t1, t2, mask) if where is not None: if not where._ptr(idx)[0]: continue e = arr._ptr(idx)[0] buffer[k] = e k += 1 subdata = buffer if where is not None: n = k reducer(subdata, stride, n, out_buffer, dtype, **kwargs) if keepdims: zeros = (0, ) * staticlen(idx_bound) t3 = util.reconstruct_index(t1, zeros, mask) else: t3 = t1 for i in range(multi_num): ans._ptr((i, ) + t3)[0] = util.cast(out_buffer[i], ans.dtype) if not inplace: util.free(buffer) util.free(out_buffer) return ans class SumRedux: total: T T: type def create(T: type, **kwargs): return SumRedux[T](**kwargs) def __init__(self, **kwargs): initial = kwargs.get("initial", T()) self.total = util.cast(initial, T) def accept(self, item, index: int): self.total += util.cast(item, T) def result(self, count: int): return self.total def empty(**kwargs): pass def done(self): return False def gradual_init(self, **kwargs): return util.cast(kwargs.get("initial", T()), T) def gradual_accept(self, curr, item, index: int, **kwargs): return curr + util.cast(item, T) def _loop(a: Ptr[S], n: int, stride: int, S: type): ans = T() if (T is float or T is float32 or T is float16 or T is complex or T is complex64): ans += _pairwise_sum(a, n, stride, dtype=T) else: for i in range(n): item = _increment_ptr(a, i * stride)[0] ans += util.cast(item, T) return ans def loop(self, a: Ptr[S], n: int, stride: int, partial: Static[int], S: type): self.total += SumRedux[T]._loop(a, n, stride, S) class NanSumRedux: total: T T: type def create(T: type, **kwargs): return NanSumRedux[T](**kwargs) def __init__(self, **kwargs): initial = kwargs.get("initial", T()) self.total = util.cast(initial, T) def accept(self, item, index: int): if not _isnan(item): self.total += util.cast(item, T) def result(self, count: int): return self.total def empty(**kwargs): pass def done(self): return False def gradual_init(self, **kwargs): return util.cast(kwargs.get("initial", T()), T) def gradual_accept(self, curr, item, index: int, **kwargs): return curr if _isnan(item) else curr + util.cast(item, T) def loop(self, a: Ptr[S], n: int, stride: int, partial: Static[int], S: type): ans = T() for i in range(n): item = _increment_ptr(a, i * stride)[0] if not _isnan(item): ans += util.cast(item, T) self.total += ans class ProdRedux: total: T T: type def create(T: type, **kwargs): return ProdRedux[T](**kwargs) def __init__(self, **kwargs): initial = kwargs.get("initial", T(1)) self.total = util.cast(initial, T) def accept(self, item, index: int): self.total *= util.cast(item, T) def result(self, count: int): return self.total def empty(**kwargs): pass def done(self): return False def gradual_init(self, **kwargs): return util.cast(kwargs.get("initial", T(1)), T) def gradual_accept(self, curr, item, index: int, **kwargs): return curr * util.cast(item, T) def loop(self, a: Ptr[S], n: int, stride: int, partial: Static[int], S: type): ans = T(1) for i in range(n): item = _increment_ptr(a, i * stride)[0] ans *= util.cast(item, T) self.total *= ans class NanProdRedux: total: T T: type def create(T: type, **kwargs): return NanProdRedux[T](**kwargs) def __init__(self, **kwargs): initial = kwargs.get("initial", T(1)) self.total = util.cast(initial, T) def accept(self, item, index: int): if not _isnan(item): self.total *= util.cast(item, T) def result(self, count: int): return self.total def empty(**kwargs): pass def done(self): return False def gradual_init(self, **kwargs): return util.cast(kwargs.get("initial", T(1)), T) def gradual_accept(self, curr, item, index: int, **kwargs): return curr if _isnan(item) else curr * util.cast(item, T) def loop(self, a: Ptr[S], n: int, stride: int, partial: Static[int], S: type): ans = T(1) for i in range(n): item = _increment_ptr(a, i * stride)[0] if not _isnan(item): ans *= util.cast(item, T) self.total *= ans class MeanRedux: total: T T: type def create(T: type, **kwargs): return MeanRedux[T](**kwargs) def __init__(self, **kwargs): self.total = T() def accept(self, item: T, index: int): self.total += item def result(self, count: int): return self.total / T(count) if count else _nan(T) def empty(**kwargs): pass def done(self): return False def gradual_init(self, **kwargs): return T() def gradual_accept(self, curr, item, index: int, **kwargs): return curr + util.cast(item, T) def gradual_result(self, curr, count: int): return curr / T(count) if count else _nan(T) def loop(self, a: Ptr[S], n: int, stride: int, partial: Static[int], S: type): self.total += SumRedux[T]._loop(a, n, stride, S) class NanMeanRedux: total: T T: type nan_count: int def create(T: type, **kwargs): return NanMeanRedux[T](**kwargs) def __init__(self, **kwargs): self.total = T() self.nan_count = 0 def accept(self, item: T, index: int): if not _isnan(item): self.total += item else: self.nan_count += 1 def result(self, count: int): count -= self.nan_count return self.total / T(count) if count else _nan(T) def empty(**kwargs): pass def done(self): return False def loop(self, a: Ptr[S], n: int, stride: int, partial: Static[int], S: type): ans = T() nan_count = 0 for i in range(n): item = _increment_ptr(a, i * stride)[0] if _isnan(item): nan_count += 1 else: ans += util.cast(item, T) self.total += ans self.nan_count += nan_count class MinRedux: m: Optional[T] T: type def create(T: type, **kwargs): return MinRedux[T](**kwargs) def __init__(self, **kwargs): initial = kwargs.get("initial", util._NoValue()) if isinstance(initial, util._NoValue): self.m = None else: self.m = util.cast(initial, T) def accept(self, item: T, index: int): if self.m is None: self.m = item else: self.m = MinRedux[T]._min(self.m, item) def result(self, count: int) -> T: return self.m def empty(**kwargs): if isinstance(kwargs.get("initial", util._NoValue()), util._NoValue): raise ValueError( "zero-size array to reduction operation minimum which has no identity" ) def done(self): return False def gradual_init(self, **kwargs): initial = kwargs.get("initial", util._NoValue()) if isinstance(initial, util._NoValue): return None else: return util.cast(initial, T) def gradual_accept(self, curr, item, index: int, **kwargs): item = util.cast(item, T) initial = kwargs.get("initial", util._NoValue()) if isinstance(initial, util._NoValue): if index == 0: return item return MinRedux[T]._min(curr, item) def _min(m: T, x): x = util.cast(x, T) if T is float or T is float32 or T is float16: return util.fmin(m, x) else: return x if x < m else m def loop(self, a: Ptr[S], n: int, stride: int, partial: Static[int], S: type): if self.m is None: m: T = util.cast(a[0], T) a = _increment_ptr(a, stride) n -= 1 else: m: T = self.m for i in range(n): m = MinRedux[T]._min(m, a[0]) a = _increment_ptr(a, stride) if partial: self.accept(m, 0) else: self.m = m class MaxRedux: m: Optional[T] T: type def create(T: type, **kwargs): return MaxRedux[T](**kwargs) def __init__(self, **kwargs): initial = kwargs.get("initial", util._NoValue()) if isinstance(initial, util._NoValue): self.m = None else: self.m = util.cast(initial, T) def accept(self, item: T, index: int): if self.m is None: self.m = item else: self.m = MaxRedux[T]._max(self.m, item) def result(self, count: int) -> T: return self.m def empty(**kwargs): if isinstance(kwargs.get("initial", util._NoValue()), util._NoValue): raise ValueError( "zero-size array to reduction operation maximum which has no identity" ) def done(self): return False def gradual_init(self, **kwargs): initial = kwargs.get("initial", util._NoValue()) if isinstance(initial, util._NoValue): return None else: return util.cast(initial, T) def gradual_accept(self, curr, item, index: int, **kwargs): item = util.cast(item, T) initial = kwargs.get("initial", util._NoValue()) if isinstance(initial, util._NoValue): if index == 0: return item return MaxRedux[T]._max(curr, item) def _max(m: T, x): x = util.cast(x, T) if T is float or T is float32 or T is float16: return util.fmax(m, x) else: return x if x > m else m def loop(self, a: Ptr[S], n: int, stride: int, partial: Static[int], S: type): if self.m is None and n > 0: m: T = util.cast(a[0], T) a = _increment_ptr(a, stride) n -= 1 else: m: T = self.m for i in range(n): m = MaxRedux[T]._max(m, a[0]) a = _increment_ptr(a, stride) if partial: self.accept(m, 0) else: self.m = m class PTPRedux: hi: Optional[T] lo: Optional[T] T: type def create(T: type, **kwargs): return PTPRedux[T](**kwargs) def __init__(self, **kwargs): self.hi = None self.lo = None def accept(self, item: T, index: int): if self.hi is None: self.hi = item else: self.hi = MaxRedux[T]._max(self.hi, item) if self.lo is None: self.lo = item else: self.lo = MinRedux[T]._min(self.lo, item) def result(self, count: int) -> T: return self.hi - self.lo def empty(**kwargs): raise ValueError( "zero-size array to reduction operation maximum which has no identity" ) def done(self): return False def loop(self, a: Ptr[S], n: int, stride: int, partial: Static[int], S: type): # n must be >0 here or we would've thrown an exception earlier m = util.cast(a[0], T) M = m a = _increment_ptr(a, stride) n -= 1 for i in range(n): m = MinRedux[T]._min(m, a[0]) M = MaxRedux[T]._max(M, a[0]) a = _increment_ptr(a, stride) if partial: if self.hi is None or (M > self.hi): self.hi = M if self.lo is None or (m < self.lo): self.lo = m else: self.hi = M self.lo = m class ArgMinRedux: m: Optional[T] i: int T: type def create(T: type, **kwargs): return ArgMinRedux[T](**kwargs) def __init__(self, **kwargs): self.m = None self.i = 0 def accept(self, item: T, index: int): if self.m is None or (item < self.m): self.m = item self.i = index def result(self, count: int): return self.i def empty(**kwargs): raise ValueError("attempt to get argmin of an empty sequence") def done(self): return False class ArgMaxRedux: m: Optional[T] i: int T: type def create(T: type, **kwargs): return ArgMaxRedux[T](**kwargs) def __init__(self, **kwargs): self.m = None self.i = 0 def accept(self, item: T, index: int): if self.m is None or (item > self.m): self.m = item self.i = index def result(self, count: int): return self.i def empty(**kwargs): raise ValueError("attempt to get argmax of an empty sequence") def done(self): return False class AnyRedux: a: bool T: type def create(T: type, **kwargs): return AnyRedux[T](**kwargs) def __init__(self, **kwargs): self.a = False def accept(self, item: T, index: int): if item: self.a = True def result(self, count: int): return self.a def empty(**kwargs): pass def done(self): return self.a class AllRedux: a: bool T: type def create(T: type, **kwargs): return AllRedux[T](**kwargs) def __init__(self, **kwargs): self.a = True def accept(self, item: T, index: int): if not item: self.a = False def result(self, count: int): return self.a def empty(**kwargs): pass def done(self): return not self.a class NonZeroRedux: nonzero: int T: type def create(T: type, **kwargs): return NonZeroRedux[T](**kwargs) def __init__(self, **kwargs): self.nonzero = 0 def accept(self, item: T, index: int): if item: self.nonzero += 1 def result(self, count: int): return self.nonzero def empty(**kwargs): pass def done(self): return False def gradual_init(self, **kwargs): return 0 def gradual_accept(self, curr, item, index: int, **kwargs): if item: curr += 1 return curr def sum( a, axis=None, dtype: type = NoneType, out=None, keepdims: Static[int] = False, initial=0, where=util._NoValue(), ): a = asarray(a) return _reduce( a, R=SumRedux.create, empty=SumRedux.empty, axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where, initial=initial, bool_to_int=True, ) def nansum( a, axis=None, dtype: type = NoneType, out=None, keepdims: Static[int] = False, initial=0, where=util._NoValue(), ): a = asarray(a) return _reduce( a, R=NanSumRedux.create, empty=NanSumRedux.empty, axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where, initial=initial, bool_to_int=True, ) def prod( a, axis=None, dtype: type = NoneType, out=None, keepdims: Static[int] = False, initial=1, where=util._NoValue(), ): a = asarray(a) return _reduce( a, R=ProdRedux.create, empty=ProdRedux.empty, axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where, initial=initial, bool_to_int=True, ) def nanprod( a, axis=None, dtype: type = NoneType, out=None, keepdims: Static[int] = False, initial=1, where=util._NoValue(), ): a = asarray(a) return _reduce( a, R=NanProdRedux.create, empty=NanProdRedux.empty, axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where, initial=initial, bool_to_int=True, ) def mean( a, axis=None, dtype: type = NoneType, out=None, keepdims: Static[int] = False, where=util._NoValue(), ): a = asarray(a) return _reduce( a, R=MeanRedux.create, empty=MeanRedux.empty, axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where, conv_to_float=True, ) def nanmean( a, axis=None, dtype: type = NoneType, out=None, keepdims: Static[int] = False, where=util._NoValue(), ): a = asarray(a) return _reduce( a, R=NanMeanRedux.create, empty=NanMeanRedux.empty, axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where, conv_to_float=True, ) def _var_reducer(p: Ptr[T], s: int, n: int, dtype: type, T: type, **kwargs): if dtype is NoneType: zero = _cast_elem(util.zero(T), dtype, conv_to_float=True) else: zero = _cast_elem(util.zero(dtype), dtype, conv_to_float=True) Z = type(zero) u = zero q = p u = _pairwise_sum(p, n, s, dtype=Z) u /= util.cast(n, Z) if Z is complex: v = 0.0 elif Z is complex64: v = float32(0.0) else: v = zero q = p for _ in range(n): t = util.cast(q[0], Z) - u if Z is complex or Z is complex64: r = abs(t) v += r * r else: v += t * t q = _increment_ptr(q, s) v /= util.cast(n - kwargs['ddof'], type(v)) return v def var( a, axis=None, dtype: type = NoneType, out=None, ddof: int = 0, keepdims: Static[int] = False, where=util._NoValue(), ): a = asarray(a) return _reduce_buffered(a, _var_reducer, dtype=dtype, axis=axis, out=out, overwrite_input=True, force_contig=False, keepdims=keepdims, where=where, ddof=ddof) def _nanvar_reducer(p: Ptr[T], s: int, n: int, dtype: type, T: type, **kwargs): if dtype is NoneType: zero = _cast_elem(util.zero(T), dtype, conv_to_float=True) else: zero = _cast_elem(util.zero(dtype), dtype, conv_to_float=True) Z = type(zero) u = zero q = p nans = 0 for _ in range(n): e = util.cast(q[0], Z) if _isnan(e): nans += 1 else: u += e q = _increment_ptr(q, s) u /= util.cast(n - nans, Z) if Z is complex: v = 0.0 elif Z is complex64: v = float32(0.0) else: v = zero q = p for _ in range(n): e = util.cast(q[0], Z) if not _isnan(e): t = util.cast(q[0], Z) - u if Z is complex or Z is complex64: r = abs(t) v += r * r else: v += t * t q = _increment_ptr(q, s) v /= util.cast(n - nans - kwargs['ddof'], type(v)) return v def nanvar( a, axis=None, dtype: type = NoneType, out=None, ddof: int = 0, keepdims: Static[int] = False, where=util._NoValue(), ): a = asarray(a) return _reduce_buffered(a, _nanvar_reducer, dtype=dtype, axis=axis, out=out, overwrite_input=True, force_contig=False, keepdims=keepdims, where=where, ddof=ddof) def _std_reducer(p: Ptr[T], s: int, n: int, dtype: type, T: type, **kwargs): x = _var_reducer(p=p, s=s, n=n, dtype=dtype, T=T, **kwargs) return util.sqrt(x) def std( a, axis=None, dtype: type = NoneType, out=None, ddof: int = 0, keepdims: Static[int] = False, where=util._NoValue(), ): a = asarray(a) return _reduce_buffered(a, _std_reducer, dtype=dtype, axis=axis, out=out, overwrite_input=True, force_contig=False, keepdims=keepdims, where=where, ddof=ddof) def _nanstd_reducer(p: Ptr[T], s: int, n: int, dtype: type, T: type, **kwargs): x = _nanvar_reducer(p=p, s=s, n=n, dtype=dtype, T=T, **kwargs) return util.sqrt(x) def nanstd( a, axis=None, dtype: type = NoneType, out=None, ddof: int = 0, keepdims: Static[int] = False, where=util._NoValue(), ): a = asarray(a) return _reduce_buffered(a, _nanstd_reducer, dtype=dtype, axis=axis, out=out, overwrite_input=True, force_contig=False, keepdims=keepdims, where=where, ddof=ddof) def min( a, axis=None, dtype: type = NoneType, out=None, ddof: int = 0, keepdims: Static[int] = False, initial=util._NoValue(), where=util._NoValue(), ): if not isinstance(where, util._NoValue) and isinstance( initial, util._NoValue): compile_error( "reduction operation 'minimum' does not have an identity, so to use a where mask one has to specify 'initial'" ) a = asarray(a) return _reduce( a, R=MinRedux.create, empty=MinRedux.empty, axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where, initial=initial, ) def max( a, axis=None, dtype: type = NoneType, out=None, ddof: int = 0, keepdims: Static[int] = False, initial=util._NoValue(), where=util._NoValue(), ): if not isinstance(where, util._NoValue) and isinstance( initial, util._NoValue): compile_error( "reduction operation 'maximum' does not have an identity, so to use a where mask one has to specify 'initial'" ) a = asarray(a) return _reduce( a, R=MaxRedux.create, empty=MaxRedux.empty, axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where, initial=initial, ) def ptp(a, axis=None, out=None, keepdims: Static[int] = False): a = asarray(a) return _reduce( a, R=PTPRedux.create, empty=PTPRedux.empty, axis=axis, out=out, keepdims=keepdims, ) def argmin(a, axis=None, out=None, keepdims: Static[int] = False): a = asarray(a) return _reduce( a, R=ArgMinRedux.create, empty=ArgMinRedux.empty, axis=axis, out=out, keepdims=keepdims, ) def argmax(a, axis=None, out=None, keepdims: Static[int] = False): a = asarray(a) return _reduce( a, R=ArgMaxRedux.create, empty=ArgMaxRedux.empty, axis=axis, out=out, keepdims=keepdims, ) def any(a, axis=None, out=None, keepdims: Static[int] = False, where=util._NoValue()): a = asarray(a) return _reduce( a, R=AnyRedux.create, empty=AnyRedux.empty, axis=axis, out=out, keepdims=keepdims, where=where, ) def all(a, axis=None, out=None, keepdims: Static[int] = False, where=util._NoValue()): a = asarray(a) return _reduce( a, R=AllRedux.create, empty=AllRedux.empty, axis=axis, out=out, keepdims=keepdims, where=where, ) def count_nonzero(a, axis=None, keepdims: Static[int] = False): a = asarray(a) return _reduce(a, R=NonZeroRedux.create, empty=NonZeroRedux.empty, axis=axis, keepdims=keepdims) def _median_reducer_no_nan(v: Ptr[T], s: int, n: int, dtype: type, T: type, **kwargs): if n == 0: if T is complex or T is complex64: return _nan(T) else: return _nan(float) m1, m2 = util.median(v, n) if n & 1 == 0: if T is complex: return (m1 + m2) / 2.0 elif T is complex64: return (m1 + m2) / float32(2.0) else: return (util.cast(m1, float) + util.cast(m2, float)) / 2.0 else: if T is complex or T is complex64: return m1 else: return util.cast(m1, float) def _median_reducer(v: Ptr[T], s: int, n: int, dtype: type, T: type, **kwargs): if _supports_nan(T): for i in range(n): if _isnan(v[i]): if T is complex or T is complex64: return _nan(T) else: return _nan(float) return _median_reducer_no_nan(v, s, n, dtype, T, **kwargs) def median(a, axis=None, out=None, overwrite_input: bool = False, keepdims: Static[int] = False): a = asarray(a) return _reduce_buffered(a, _median_reducer, dtype=a.dtype, axis=axis, out=out, overwrite_input=overwrite_input, force_contig=True, keepdims=keepdims, where=None) def _nanmedian_reducer(v: Ptr[T], s: int, n: int, dtype: type, T: type, **kwargs): n = _nan_to_back(v, n) return _median_reducer_no_nan(v, s, n, dtype, T, **kwargs) def nanmedian(a, axis=None, out=None, overwrite_input: bool = False, keepdims: Static[int] = False): a = asarray(a) return _reduce_buffered(a, _nanmedian_reducer, dtype=a.dtype, axis=axis, out=out, overwrite_input=overwrite_input, force_contig=True, keepdims=keepdims, where=None) def _sorted(ar): if isinstance(ar, ndarray): x = ar.flatten() x.sort() return x else: x = asarray(ar).ravel() x.sort() return x def _check_interpolation_as_method(method, interpolation): if method != "linear": # sanity check, we assume this basically never happens raise TypeError( "You shall not pass both `method` and `interpolation`!\n" "(`interpolation` is Deprecated in favor of `method`)") return interpolation def _quantile_is_valid(q): # avoid expensive reductions, relevant for arrays with < O(1000) elements if staticlen(q.shape) == 1 and q.size < 10: for i in range(q.size): if not (0.0 <= q[i] <= 1.0): return False else: for idx in util.multirange(q.shape): if not ((0.0 <= q._ptr(idx)[0]) and (q._ptr(idx)[0] <= 1.0)): return False return True def _get_gamma_mask(shape, default_value, conditioned_value, where): out = full(shape, default_value) copyto(out, conditioned_value, where=where) return out def _discret_interpolation_to_boundaries(index, gamma_condition_fun): if not isinstance(index, ndarray): shape = () else: shape = index.shape previous = floor(index) next = previous + 1 gamma = index - previous res = _get_gamma_mask(shape=shape, default_value=next, conditioned_value=previous, where=gamma_condition_fun(gamma, index)).astype(int) # Some methods can lead to out-of-bound integers so we clip them res[res < 0] = 0 return res def _inverted_cdf(n, quantiles): gamma_fun = lambda gamma, _: (gamma == 0) return _discret_interpolation_to_boundaries((n * quantiles) - 1, gamma_fun) def _closest_observation(n, quantiles): gamma_fun = lambda gamma, index: (gamma == 0) & (floor(index) % 2 == 0) return _discret_interpolation_to_boundaries((n * quantiles) - 1 - 0.5, gamma_fun) def _compute_virtual_index(n, quantiles, alpha: float, beta: float): return n * quantiles + (alpha + quantiles * (1 - alpha - beta)) - 1 def _get_indexes(arr, virtual_indexes, valid_values_count, supports_nan: bool): previous_indexes = asarray(floor(virtual_indexes)) next_indexes = asarray(previous_indexes + 1) indexes_above_bounds = virtual_indexes >= valid_values_count - 1 # When indexes is above max index, take the max value of the array if indexes_above_bounds.any(): previous_indexes[indexes_above_bounds] = -1 next_indexes[indexes_above_bounds] = -1 # When indexes is below min index, take the min value of the array indexes_below_bounds = virtual_indexes < 0 if indexes_below_bounds.any(): previous_indexes[indexes_below_bounds] = 0 next_indexes[indexes_below_bounds] = 0 if supports_nan: # After the sort, slices having NaNs will have for last element a NaN virtual_indexes_nans = _isnan(virtual_indexes) if isinstance(virtual_indexes_nans, bool): if virtual_indexes_nans: previous_indexes[()] = -1 next_indexes[()] = -1 elif isinstance(virtual_indexes_nans, ndarray): if virtual_indexes_nans.any(): previous_indexes[virtual_indexes_nans] = -1 next_indexes[virtual_indexes_nans] = -1 previous_indexes = previous_indexes.astype(int) next_indexes = next_indexes.astype(int) return previous_indexes, next_indexes def _lerp(a, b, t, out=None): diff_b_a = subtract(b, a) lerp_interpolation = asarray(add(a, diff_b_a * t, out=out)) subtract(b, diff_b_a * (1 - t), out=lerp_interpolation, where=t >= 0.5) if staticlen(lerp_interpolation.shape) == 0 and out is None: lerp_interpolation2 = lerp_interpolation[()] # unpack 0d arrays else: lerp_interpolation2 = lerp_interpolation return lerp_interpolation2 def _quantile( arr, quantiles, axis: int = -1, method: str = "linear", out=None, ): supports_nan = _supports_nan(arr.dtype) arr = asarray(arr, float) values_count = arr.shape[axis] if axis != 0: arr = moveaxis(arr, axis, destination=0) def compute_quantile(arr, quantiles, axis: int, method: str, out, virtual_indexes, supports_nan: bool): virtual_indexes = asarray(virtual_indexes) if (virtual_indexes.dtype is int or isinstance(virtual_indexes.dtype, Int)): # No interpolation needed, take the points along axis if supports_nan: # may contain nan, which would sort to the end arr.partition(concatenate((virtual_indexes.ravel(), [-1])), axis=0) slices_having_nans = _isnan(arr[-1, ...]) else: # cannot contain nan arr.partition(virtual_indexes.ravel(), axis=0) result = take(arr, virtual_indexes, axis=0, out=out) else: previous_indexes, next_indexes = _get_indexes( arr, virtual_indexes, values_count, supports_nan) # --- Sorting arr.partition(unique( concatenate(( [0, -1], previous_indexes.ravel(), next_indexes.ravel(), ))), axis=0) if supports_nan: slices_having_nans = _isnan(arr[-1, ...]) # --- Get values from indexes previous = arr[previous_indexes] next = arr[next_indexes] # --- Linear interpolation def _get_gamma(virtual_indexes, previous_indexes, method: str): gamma = asarray(virtual_indexes - previous_indexes) if (method == 'inverted_cdf' or method == 'closest_observation' or method == 'interpolated_inverted_cdf' or method == 'hazen' or method == 'weibull' or method == 'linear' or method == 'median_unbiased' or method == 'normal_unbiased' or method == 'lower' or method == 'higher' or method == 'nearest'): return gamma elif method == 'averaged_inverted_cdf': return _get_gamma_mask(shape=gamma.shape, default_value=1., conditioned_value=0.5, where=gamma == 0) elif method == 'midpoint': return _get_gamma_mask(shape=gamma.shape, default_value=0.5, conditioned_value=0., where=virtual_indexes % 1 == 0) gamma = _get_gamma(virtual_indexes, previous_indexes, method) gamma = asarray(gamma) result_shape = virtual_indexes.shape + (1, ) * (arr.ndim - 1) gamma = gamma.reshape(result_shape) result = _lerp(previous, next, gamma, out=out) if supports_nan: if any(slices_having_nans): if isinstance(result, ndarray): if result.ndim == 0 and out is None: # can't write to a scalar, but indexing will be correct result = arr[-1] else: copyto(result, arr[-1, ...], where=slices_having_nans) else: if out is None: result = util.nan64() else: out[()] = util.nan64() return result # --- Computation of indexes # Index where to find the value in the sorted array. # Virtual because it is a floating point value, not an valid index. # The nearest neighbours are used for interpolation if method == 'inverted_cdf': virtual_indexes = _inverted_cdf(values_count, quantiles) return compute_quantile(arr, quantiles=quantiles, axis=axis, method=method, out=out, virtual_indexes=virtual_indexes, supports_nan=supports_nan) elif method == 'averaged_inverted_cdf': virtual_indexes = (values_count * quantiles) - 1 return compute_quantile(arr, quantiles=quantiles, axis=axis, method=method, out=out, virtual_indexes=virtual_indexes, supports_nan=supports_nan) elif method == 'closest_observation': virtual_indexes = _closest_observation(values_count, quantiles) return compute_quantile(arr, quantiles=quantiles, axis=axis, method=method, out=out, virtual_indexes=virtual_indexes, supports_nan=supports_nan) elif method == 'interpolated_inverted_cdf': virtual_indexes = _compute_virtual_index(values_count, quantiles, 0, 1) return compute_quantile(arr, quantiles=quantiles, axis=axis, method=method, out=out, virtual_indexes=virtual_indexes, supports_nan=supports_nan) elif method == 'hazen': virtual_indexes = _compute_virtual_index(values_count, quantiles, 0.5, 0.5) return compute_quantile(arr, quantiles=quantiles, axis=axis, method=method, out=out, virtual_indexes=virtual_indexes, supports_nan=supports_nan) elif method == 'weibull': virtual_indexes = _compute_virtual_index(values_count, quantiles, 0, 0) return compute_quantile(arr, quantiles=quantiles, axis=axis, method=method, out=out, virtual_indexes=virtual_indexes, supports_nan=supports_nan) elif method == 'linear': virtual_indexes = (values_count - 1) * quantiles return compute_quantile(arr, quantiles=quantiles, axis=axis, method=method, out=out, virtual_indexes=virtual_indexes, supports_nan=supports_nan) elif method == 'median_unbiased': virtual_indexes = _compute_virtual_index(values_count, quantiles, 1 / 3.0, 1 / 3.0) return compute_quantile(arr, quantiles=quantiles, axis=axis, method=method, out=out, virtual_indexes=virtual_indexes, supports_nan=supports_nan) elif method == 'normal_unbiased': virtual_indexes = _compute_virtual_index(values_count, quantiles, 3 / 8.0, 3 / 8.0) return compute_quantile(arr, quantiles=quantiles, axis=axis, method=method, out=out, virtual_indexes=virtual_indexes, supports_nan=supports_nan) elif method == 'lower': def get_virtual_indexes(quantiles, values_count): if isinstance(quantiles, ndarray): return floor((values_count - 1) * quantiles).astype(int) else: return int(floor((values_count - 1) * quantiles)) virtual_indexes2 = get_virtual_indexes(quantiles, values_count) return compute_quantile(arr, quantiles=quantiles, axis=axis, method=method, out=out, virtual_indexes=virtual_indexes2, supports_nan=supports_nan) elif method == 'higher': def get_virtual_indexes(quantiles, values_count): if isinstance(quantiles, ndarray): return ceil((values_count - 1) * quantiles).astype(int) else: return int(ceil((values_count - 1) * quantiles)) virtual_indexes2 = get_virtual_indexes(quantiles, values_count) return compute_quantile(arr, quantiles=quantiles, axis=axis, method=method, out=out, virtual_indexes=virtual_indexes2, supports_nan=supports_nan) elif method == 'midpoint': virtual_indexes = 0.5 * (floor((values_count - 1) * quantiles) + ceil( (values_count - 1) * quantiles)) return compute_quantile(arr, quantiles=quantiles, axis=axis, method=method, out=out, virtual_indexes=virtual_indexes, supports_nan=supports_nan) elif method == 'nearest': def get_virtual_indexes(quantiles, values_count): if isinstance(quantiles, ndarray): return round((values_count - 1) * quantiles).astype(int) else: return int(round((values_count - 1) * quantiles)) virtual_indexes2 = get_virtual_indexes(quantiles, values_count) return compute_quantile(arr, quantiles=quantiles, axis=axis, method=method, out=out, virtual_indexes=virtual_indexes2, supports_nan=supports_nan) else: raise ValueError(f"{method} is not a valid method.") def _quantile_reducer(v: Ptr[T], s: int, n: int, dtype: type, T: type, **kwargs): return _quantile(ndarray((n, ), (s, ), v), quantiles=kwargs['q'], axis=0, method=kwargs['method'], out=None) def _quantile_reducer_multi(v: Ptr[T], s: int, n: int, out: Ptr[float], dtype: type, T: type, **kwargs): q = kwargs['q'] _quantile(ndarray((n, ), (s, ), v), quantiles=q, axis=0, method=kwargs['method'], out=ndarray((q.size, ), (util.sizeof(dtype), ), out)) def _quantile_unchecked(a, q, axis=None, out=None, overwrite_input: bool = False, method: str = "linear", keepdims: Static[int] = False): # Assumes that q is in [0, 1], and is an ndarray if q.ndim == 0: return _reduce_buffered(a, _quantile_reducer, dtype=a.dtype, axis=axis, out=out, overwrite_input=overwrite_input, keepdims=keepdims, q=q.item(), method=method) elif q.ndim == 1: return _reduce_buffered_multi(a, _quantile_reducer_multi, multi_num=q.size, dtype=float, axis=axis, out=out, overwrite_input=overwrite_input, keepdims=keepdims, q=q, method=method) else: compile_error("q must be a scalar or 1d") def _asarray_no_complex(a): a = asarray(a) if a.dtype is complex or a.dtype is complex64: compile_error("a must be an array of real numbers") return a def quantile(a, q, axis=None, out=None, overwrite_input: bool = False, method: str = "linear", keepdims: Static[int] = False, interpolation=None): if interpolation is not None: method = _check_interpolation_as_method(method, interpolation) a = _asarray_no_complex(a) q = asarray(q) if not _quantile_is_valid(q): raise ValueError("Quantiles must be in the range [0, 1]") return _quantile_unchecked(a, q, axis, out, overwrite_input, method, keepdims) def _nanquantile_reducer(v: Ptr[T], s: int, n: int, dtype: type, T: type, **kwargs): n = _nan_to_back(v, n) return _quantile(ndarray((n, ), (s, ), v), quantiles=kwargs['q'], axis=0, method=kwargs['method'], out=None) def _nanquantile_reducer_multi(v: Ptr[T], s: int, n: int, out: Ptr[dtype], dtype: type, T: type, **kwargs): n = _nan_to_back(v, n) q = kwargs['q'] _quantile(ndarray((n, ), (s, ), v), quantiles=q, axis=0, method=kwargs['method'], out=ndarray((q.size, ), (util.sizeof(dtype), ), out)) def _nanquantile_unchecked(a, q, axis=None, out=None, overwrite_input: bool = False, method: str = "linear", keepdims: Static[int] = False): # Assumes that q is in [0, 1], and is an ndarray if q.ndim == 0: return _reduce_buffered(a, _nanquantile_reducer, dtype=a.dtype, axis=axis, out=out, overwrite_input=overwrite_input, keepdims=keepdims, q=q.item(), method=method) elif q.ndim == 1: return _reduce_buffered_multi(a, _nanquantile_reducer_multi, multi_num=q.size, dtype=float, axis=axis, out=out, overwrite_input=overwrite_input, keepdims=keepdims, q=q, method=method) else: compile_error("q must be a scalar or 1d") def nanquantile(a, q, axis=None, out=None, overwrite_input: bool = False, method: str = "linear", keepdims: Static[int] = False, interpolation=None): if interpolation is not None: method = _check_interpolation_as_method(method, interpolation) a = _asarray_no_complex(a) if not _supports_nan(a.dtype): return quantile(a, q, axis=axis, out=out, overwrite_input=overwrite_input, method=method, keepdims=keepdims, interpolation=interpolation) q = asarray(q) if not _quantile_is_valid(q): raise ValueError("Quantiles must be in the range [0, 1]") return _nanquantile_unchecked(a, q, axis, out, overwrite_input, method, keepdims) def percentile(a, q, axis=None, out=None, overwrite_input: bool = False, method: str = "linear", keepdims: Static[int] = False, interpolation=None): if interpolation is not None: method = _check_interpolation_as_method(method, interpolation) a = _asarray_no_complex(a) q = true_divide(q, 100) q = asarray(q) if not _quantile_is_valid(q): raise ValueError("Percentiles must be in the range [0, 100]") return _quantile_unchecked(a, q, axis, out, overwrite_input, method, keepdims) def nanpercentile(a, q, axis=None, out=None, overwrite_input: bool = False, method: str = "linear", keepdims: Static[int] = False, interpolation=None): if interpolation is not None: method = _check_interpolation_as_method(method, interpolation) a = _asarray_no_complex(a) if not _supports_nan(a.dtype): return percentile(a, q, axis=axis, out=out, overwrite_input=overwrite_input, method=method, keepdims=keepdims, interpolation=interpolation) q = true_divide(q, 100) q = asarray(q) if not _quantile_is_valid(q): raise ValueError("Percentiles must be in the range [0, 100]") return _nanquantile_unchecked(a, q, axis, out, overwrite_input, method, keepdims) @extend class ndarray: def sum( self, axis=None, dtype: type = NoneType, out=None, keepdims: Static[int] = False, initial=0, where=util._NoValue(), ): return sum( self, axis=axis, dtype=dtype, out=out, keepdims=keepdims, initial=initial, where=where, ) def prod( self, axis=None, dtype: type = NoneType, out=None, keepdims: Static[int] = False, initial=1, where=util._NoValue(), ): return prod( self, axis=axis, dtype=dtype, out=out, keepdims=keepdims, initial=initial, where=where, ) def mean( self, axis=None, dtype: type = NoneType, out=None, keepdims: Static[int] = False, where=util._NoValue(), ): return mean(self, axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where) def nanmean( self, axis=None, dtype: type = NoneType, out=None, keepdims: Static[int] = False, where=util._NoValue(), ): return nanmean(self, axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where) def var( self, axis=None, dtype: type = NoneType, out=None, ddof: int = 0, keepdims: Static[int] = False, where=util._NoValue(), ): return var( self, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims, where=where, ) def nanvar( self, axis=None, dtype: type = NoneType, out=None, ddof: int = 0, keepdims: Static[int] = False, where=util._NoValue(), ): return nanvar( self, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims, where=where, ) def std( self, axis=None, dtype: type = NoneType, out=None, ddof: int = 0, keepdims: Static[int] = False, where=util._NoValue(), ): return std( self, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims, where=where, ) def nanstd( self, axis=None, dtype: type = NoneType, out=None, ddof: int = 0, keepdims: Static[int] = False, where=util._NoValue(), ): return nanstd( self, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims, where=where, ) def min( self, axis=None, dtype: type = NoneType, out=None, ddof: int = 0, keepdims: Static[int] = False, initial=util._NoValue(), where=util._NoValue(), ): return min( self, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims, initial=initial, where=where, ) def ptp(self, axis=None, out=None, keepdims: Static[int] = False): return ptp(self, axis=axis, out=out, keepdims=keepdims) def max( self, axis=None, dtype: type = NoneType, out=None, ddof: int = 0, keepdims: Static[int] = False, initial=util._NoValue(), where=util._NoValue(), ): return max( self, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims, initial=initial, where=where, ) def argmin(self, axis=None, out=None, keepdims: Static[int] = False): return argmin(self, axis=axis, out=out, keepdims=keepdims) def argmax(self, axis=None, out=None, keepdims: Static[int] = False): return argmax(self, axis=axis, out=out, keepdims=keepdims) def any(self, axis=None, out=None, keepdims: Static[int] = False, where=util._NoValue()): return any(self, axis=axis, out=out, keepdims=keepdims, where=where) def all(self, axis=None, out=None, keepdims: Static[int] = False, where=util._NoValue()): return all(self, axis=axis, out=out, keepdims=keepdims, where=where)