1
0
mirror of https://github.com/exaloop/codon.git synced 2025-06-03 15:03:52 +08:00
codon/stdlib/internal/pynumerics.codon
A. R. Shajii ebd344f894
GPU and other updates (#52)
* Add nvptx pass

* Fix spaces

* Don't change name

* Add runtime support

* Add init call

* Add more runtime functions

* Add launch function

* Add intrinsics

* Fix codegen

* Run GPU pass between general opt passes

* Set data layout

* Create context

* Link libdevice

* Add function remapping

* Fix linkage

* Fix libdevice link

* Fix linking

* Fix personality

* Fix linking

* Fix linking

* Fix linking

* Add internalize pass

* Add more math conversions

* Add more re-mappings

* Fix conversions

* Fix __str__

* Add decorator attribute for any decorator

* Update kernel decorator

* Fix kernel decorator

* Fix kernel decorator

* Fix kernel decorator

* Fix kernel decorator

* Remove old decorator

* Fix pointer calc

* Fix fill-in codegen

* Fix linkage

* Add comment

* Update list conversion

* Add more conversions

* Add dict and set conversions

* Add float32 type to IR/LLVM

* Add float32

* Add float32 stdlib

* Keep required global values in PTX module

* Fix PTX module pruning

* Fix malloc

* Set will-return

* Fix name cleanup

* Fix access

* Fix name cleanup

* Fix function renaming

* Update dimension API

* Fix args

* Clean up API

* Move GPU transformations to end of opt pipeline

* Fix alloc replacements

* Fix naming

* Target PTX 4.2

* Fix global renaming

* Fix early return in static blocks; Add __realized__ function

* Format

* Add __llvm_name__ for functions

* Add vector type to IR

* SIMD support [wip]

* Update kernel naming

* Fix early returns; Fix SIMD calls

* Fix kernel naming

* Fix IR matcher

* Remove module print

* Update realloc

* Add overloads for 32-bit float math ops

* Add gpu.Pointer type for working with raw pointers

* Add float32 conversion

* Add to_gpu and from_gpu

* clang-format

* Add f32 reduction support to OpenMP

* Fix automatic GPU class conversions

* Fix conversion functions

* Fix conversions

* Rename self

* Fix tuple conversion

* Fix conversions

* Fix conversions

* Update PTX filename

* Fix filename

* Add raw function

* Add GPU docs

* Allow nested object conversions

* Add tests (WIP)

* Update SIMD

* Add staticrange and statictuple loop support

* SIMD updates

* Add new Vec constructors

* Fix UInt conversion

* Fix size-0 allocs

* Add more tests

* Add matmul test

* Rename gpu test file

* Add more tests

* Add alloc cache

* Fix object_to_gpu

* Fix frees

* Fix str conversion

* Fix set conversion

* Fix conversions

* Fix class conversion

* Fix str conversion

* Fix byte conversion

* Fix list conversion

* Fix pointer conversions

* Fix conversions

* Fix conversions

* Update tests

* Fix conversions

* Fix tuple conversion

* Fix tuple conversion

* Fix auto conversions

* Fix conversion

* Fix magics

* Update tests

* Support GPU in JIT mode

* Fix GPU+JIT

* Fix kernel filename in JIT mode

* Add __static_print__; Add earlyDefines; Various domination bugfixes; SimplifyContext RAII base handling

* Fix global static handling

* Fix float32 tests

* FIx gpu module

* Support OpenMP "collapse" option

* Add more collapse tests

* Capture generics and statics

* TraitVar handling

* Python exceptions / isinstance [wip; no_ci]

* clang-format

* Add list comparison operators

* Support empty raise in IR

* Add dict 'or' operator

* Fix repr

* Add copy module

* Fix spacing

* Use sm_30

* Python exceptions

* TypeTrait support; Fix defaultDict

* Fix earlyDefines

* Add defaultdict

* clang-format

* Fix invalid canonicalizations

* Fix empty raise

* Fix copyright

* Add Python numerics option

* Support py-numerics in math module

* Update docs

* Add static Python division / modulus

* Add static py numerics tests

* Fix staticrange/tuple; Add KwTuple.__getitem__

* clang-format

* Add gpu parameter to par

* Fix globals

* Don't init loop vars on loop collapse

* Add par-gpu tests

* Update gpu docs

* Fix isinstance check

* Remove invalid test

* Add -libdevice to set custom path [skip ci]

* Add release notes; bump version [skip ci]

* Add libdevice docs [skip ci]

Co-authored-by: Ibrahim Numanagić <ibrahimpasa@gmail.com>
2022-09-15 15:40:00 -04:00

169 lines
4.7 KiB
Python

# (c) 2022 Exaloop Inc. All rights reserved.
@pure
@llvm
def _floordiv_int_float(self: int, other: float) -> float:
declare double @llvm.floor.f64(double)
%0 = sitofp i64 %self to double
%1 = fdiv double %0, %other
%2 = call double @llvm.floor.f64(double %1)
ret double %2
@pure
@llvm
def _floordiv_int_int(self: int, other: int) -> int:
%0 = sdiv i64 %self, %other
ret i64 %0
@pure
@llvm
def _truediv_int_float(self: int, other: float) -> float:
%0 = sitofp i64 %self to double
%1 = fdiv double %0, %other
ret double %1
@pure
@llvm
def _truediv_int_int(self: int, other: int) -> float:
%0 = sitofp i64 %self to double
%1 = sitofp i64 %other to double
%2 = fdiv double %0, %1
ret double %2
@pure
@llvm
def _mod_int_float(self: int, other: float) -> float:
%0 = sitofp i64 %self to double
%1 = frem double %0, %other
ret double %1
@pure
@llvm
def _mod_int_int(self: int, other: int) -> int:
%0 = srem i64 %self, %other
ret i64 %0
@pure
@llvm
def _truediv_float_float(self: float, other: float) -> float:
%0 = fdiv double %self, %other
ret double %0
@pure
@llvm
def _mod_float_float(self: float, other: float) -> float:
%0 = frem double %self, %other
ret double %0
def _divmod_int_int(self: int, other: int):
d = _floordiv_int_int(self, other)
m = self - d * other
if m and ((other ^ m) < 0):
m += other
d -= 1
return (d, m)
def _divmod_float_float(self: float, other: float):
mod = _mod_float_float(self, other)
div = _truediv_float_float(self - mod, other)
if mod:
if (other < 0) != (mod < 0):
mod += other
div -= 1.0
else:
mod = (0.0).copysign(other)
floordiv = 0.0
if div:
floordiv = div.__floor__()
if div - floordiv > 0.5:
floordiv += 1.0
else:
floordiv = (0.0).copysign(self / other)
return (floordiv, mod)
@extend
class int:
def __floordiv__(self, other: float):
if other == 0.0:
raise ZeroDivisionError("float floor division by zero")
return _divmod_float_float(float(self), other)[0]
def __floordiv__(self, other: int):
if other == 0:
raise ZeroDivisionError("integer division or modulo by zero")
return _divmod_int_int(self, other)[0]
def __truediv__(self, other: float):
if other == 0.0:
raise ZeroDivisionError("float division by zero")
return _truediv_int_float(self, other)
def __truediv__(self, other: int):
if other == 0:
raise ZeroDivisionError("division by zero")
return _truediv_int_int(self, other)
def __mod__(self, other: float):
if other == 0.0:
raise ZeroDivisionError("float modulo")
return _divmod_float_float(self, other)[1]
def __mod__(self, other: int):
if other == 0:
raise ZeroDivisionError("integer division or modulo by zero")
return _divmod_int_int(self, other)[1]
def __divmod__(self, other: float):
if other == 0.0:
raise ZeroDivisionError("float divmod()")
return _divmod_float_float(float(self), other)
def __divmod__(self, other: int):
if other == 0:
raise ZeroDivisionError("integer division or modulo by zero")
return _divmod_int_int(self, other)
@extend
class float:
def __floordiv__(self, other: float):
if other == 0.0:
raise ZeroDivisionError("float floor division by zero")
return _divmod_float_float(self, other)[0]
def __floordiv__(self, other: int):
if other == 0:
raise ZeroDivisionError("float floor division by zero")
return _divmod_float_float(self, float(other))[0]
def __truediv__(self, other: float):
if other == 0.0:
raise ZeroDivisionError("float division by zero")
return _truediv_float_float(self, other)
def __truediv__(self, other: int):
if other == 0:
raise ZeroDivisionError("float division by zero")
return _truediv_float_float(self, float(other))
def __mod__(self, other: float):
if other == 0.0:
raise ZeroDivisionError("float modulo")
return _divmod_float_float(self, other)[1]
def __mod__(self, other: int):
if other == 0:
raise ZeroDivisionError("float modulo")
return _divmod_float_float(self, float(other))[1]
def __divmod__(self, other: float):
if other == 0.0:
raise ZeroDivisionError("float divmod()")
return _divmod_float_float(self, other)
def __divmod__(self, other: int):
if other == 0:
raise ZeroDivisionError("float divmod()")
return _divmod_float_float(self, float(other))