mirror of
https://github.com/exaloop/codon.git
synced 2025-06-03 15:03:52 +08:00
* Add nvptx pass * Fix spaces * Don't change name * Add runtime support * Add init call * Add more runtime functions * Add launch function * Add intrinsics * Fix codegen * Run GPU pass between general opt passes * Set data layout * Create context * Link libdevice * Add function remapping * Fix linkage * Fix libdevice link * Fix linking * Fix personality * Fix linking * Fix linking * Fix linking * Add internalize pass * Add more math conversions * Add more re-mappings * Fix conversions * Fix __str__ * Add decorator attribute for any decorator * Update kernel decorator * Fix kernel decorator * Fix kernel decorator * Fix kernel decorator * Fix kernel decorator * Remove old decorator * Fix pointer calc * Fix fill-in codegen * Fix linkage * Add comment * Update list conversion * Add more conversions * Add dict and set conversions * Add float32 type to IR/LLVM * Add float32 * Add float32 stdlib * Keep required global values in PTX module * Fix PTX module pruning * Fix malloc * Set will-return * Fix name cleanup * Fix access * Fix name cleanup * Fix function renaming * Update dimension API * Fix args * Clean up API * Move GPU transformations to end of opt pipeline * Fix alloc replacements * Fix naming * Target PTX 4.2 * Fix global renaming * Fix early return in static blocks; Add __realized__ function * Format * Add __llvm_name__ for functions * Add vector type to IR * SIMD support [wip] * Update kernel naming * Fix early returns; Fix SIMD calls * Fix kernel naming * Fix IR matcher * Remove module print * Update realloc * Add overloads for 32-bit float math ops * Add gpu.Pointer type for working with raw pointers * Add float32 conversion * Add to_gpu and from_gpu * clang-format * Add f32 reduction support to OpenMP * Fix automatic GPU class conversions * Fix conversion functions * Fix conversions * Rename self * Fix tuple conversion * Fix conversions * Fix conversions * Update PTX filename * Fix filename * Add raw function * Add GPU docs * Allow nested object conversions * Add tests (WIP) * Update SIMD * Add staticrange and statictuple loop support * SIMD updates * Add new Vec constructors * Fix UInt conversion * Fix size-0 allocs * Add more tests * Add matmul test * Rename gpu test file * Add more tests * Add alloc cache * Fix object_to_gpu * Fix frees * Fix str conversion * Fix set conversion * Fix conversions * Fix class conversion * Fix str conversion * Fix byte conversion * Fix list conversion * Fix pointer conversions * Fix conversions * Fix conversions * Update tests * Fix conversions * Fix tuple conversion * Fix tuple conversion * Fix auto conversions * Fix conversion * Fix magics * Update tests * Support GPU in JIT mode * Fix GPU+JIT * Fix kernel filename in JIT mode * Add __static_print__; Add earlyDefines; Various domination bugfixes; SimplifyContext RAII base handling * Fix global static handling * Fix float32 tests * FIx gpu module * Support OpenMP "collapse" option * Add more collapse tests * Capture generics and statics * TraitVar handling * Python exceptions / isinstance [wip; no_ci] * clang-format * Add list comparison operators * Support empty raise in IR * Add dict 'or' operator * Fix repr * Add copy module * Fix spacing * Use sm_30 * Python exceptions * TypeTrait support; Fix defaultDict * Fix earlyDefines * Add defaultdict * clang-format * Fix invalid canonicalizations * Fix empty raise * Fix copyright * Add Python numerics option * Support py-numerics in math module * Update docs * Add static Python division / modulus * Add static py numerics tests * Fix staticrange/tuple; Add KwTuple.__getitem__ * clang-format * Add gpu parameter to par * Fix globals * Don't init loop vars on loop collapse * Add par-gpu tests * Update gpu docs * Fix isinstance check * Remove invalid test * Add -libdevice to set custom path [skip ci] * Add release notes; bump version [skip ci] * Add libdevice docs [skip ci] Co-authored-by: Ibrahim Numanagić <ibrahimpasa@gmail.com>
382 lines
7.9 KiB
Python
382 lines
7.9 KiB
Python
# (c) 2022 Exaloop Inc. All rights reserved.
|
|
|
|
from internal.gc import alloc_atomic, free
|
|
from internal.types.optional import unwrap
|
|
|
|
|
|
@tuple
|
|
class object:
|
|
def __repr__(self) -> str:
|
|
return "<object>"
|
|
|
|
|
|
def id(x) -> int:
|
|
if isinstance(x, ByRef):
|
|
return int(x.__raw__())
|
|
else:
|
|
return 0
|
|
|
|
|
|
_stdout = _C.seq_stdout()
|
|
|
|
|
|
def print(*args, sep: str = " ", end: str = "\n", file=_stdout, flush: bool = False):
|
|
"""
|
|
Print args to the text stream file.
|
|
"""
|
|
fp = cobj()
|
|
if isinstance(file, cobj):
|
|
fp = file
|
|
else:
|
|
fp = file.fp
|
|
i = 0
|
|
for a in args:
|
|
if i and sep:
|
|
_C.seq_print_full(sep, fp)
|
|
_C.seq_print_full(str(a), fp)
|
|
i += 1
|
|
_C.seq_print_full(end, fp)
|
|
if flush:
|
|
_C.fflush(fp)
|
|
|
|
|
|
def min(*args):
|
|
if staticlen(args) == 0:
|
|
raise ValueError("empty sequence")
|
|
elif staticlen(args) == 1 and hasattr(args[0], "__iter__"):
|
|
x = args[0].__iter__()
|
|
if not x.done():
|
|
s = x.next()
|
|
while not x.done():
|
|
i = x.next()
|
|
if i < s:
|
|
s = i
|
|
x.destroy()
|
|
return s
|
|
else:
|
|
x.destroy()
|
|
raise ValueError("empty sequence")
|
|
elif staticlen(args) == 2:
|
|
a, b = args
|
|
return a if a <= b else b
|
|
else:
|
|
m = args[0]
|
|
for i in args:
|
|
if i < m:
|
|
m = i
|
|
return m
|
|
|
|
|
|
def max(*args):
|
|
if staticlen(args) == 0:
|
|
raise ValueError("empty sequence")
|
|
elif staticlen(args) == 1 and hasattr(args[0], "__iter__"):
|
|
x = args[0].__iter__()
|
|
if not x.done():
|
|
s = x.next()
|
|
while not x.done():
|
|
i = x.next()
|
|
if i > s:
|
|
s = i
|
|
x.destroy()
|
|
return s
|
|
else:
|
|
x.destroy()
|
|
raise ValueError("empty sequence")
|
|
elif staticlen(args) == 2:
|
|
a, b = args
|
|
return a if a >= b else b
|
|
else:
|
|
m = args[0]
|
|
for i in args:
|
|
if i > m:
|
|
m = i
|
|
return m
|
|
|
|
|
|
def len(x) -> int:
|
|
"""
|
|
Return the length of x
|
|
"""
|
|
return x.__len__()
|
|
|
|
|
|
def iter(x):
|
|
"""
|
|
Return an iterator for the given object
|
|
"""
|
|
return x.__iter__()
|
|
|
|
|
|
def abs(x):
|
|
"""
|
|
Return the absolute value of x
|
|
"""
|
|
return x.__abs__()
|
|
|
|
|
|
def hash(x) -> int:
|
|
"""
|
|
Returns hashed value only for immutable objects
|
|
"""
|
|
return x.__hash__()
|
|
|
|
|
|
def ord(s: str) -> int:
|
|
"""
|
|
Return an integer representing the Unicode code point of s
|
|
"""
|
|
if len(s) != 1:
|
|
raise TypeError(
|
|
f"ord() expected a character, but string of length {len(s)} found"
|
|
)
|
|
return int(s.ptr[0])
|
|
|
|
|
|
def divmod(a, b):
|
|
if hasattr(a, "__divmod__"):
|
|
return a.__divmod__(b)
|
|
else:
|
|
return (a // b, a % b)
|
|
|
|
|
|
def chr(i: int) -> str:
|
|
"""
|
|
Return a string representing a character whose Unicode
|
|
code point is an integer
|
|
"""
|
|
p = cobj(1)
|
|
p[0] = byte(i)
|
|
return str(p, 1)
|
|
|
|
|
|
def next(g: Generator[T], default: Optional[T] = None, T: type) -> T:
|
|
"""
|
|
Return the next item from g
|
|
"""
|
|
if g.done():
|
|
if default is not None:
|
|
return default.__val__()
|
|
else:
|
|
raise StopIteration()
|
|
return g.next()
|
|
|
|
|
|
def any(x: Generator[T], T: type) -> bool:
|
|
"""
|
|
Returns True if any item in x is true,
|
|
False otherwise
|
|
"""
|
|
for a in x:
|
|
if a:
|
|
return True
|
|
return False
|
|
|
|
|
|
def all(x: Generator[T], T: type) -> bool:
|
|
"""
|
|
Returns True when all elements in x are true,
|
|
False otherwise
|
|
"""
|
|
for a in x:
|
|
if not a:
|
|
return False
|
|
return True
|
|
|
|
|
|
def zip(*args):
|
|
"""
|
|
Returns a zip object, which is an iterator of tuples
|
|
that aggregates elements based on the iterables passed
|
|
"""
|
|
if staticlen(args) == 0:
|
|
yield from List[int]()
|
|
else:
|
|
iters = tuple(iter(i) for i in args)
|
|
done = False
|
|
while not done:
|
|
for i in iters:
|
|
if i.done():
|
|
done = True
|
|
if not done:
|
|
yield tuple(i.next() for i in iters)
|
|
for i in iters:
|
|
i.destroy()
|
|
|
|
|
|
def filter(f: Callable[[T], bool], x: Generator[T], T: type) -> Generator[T]:
|
|
"""
|
|
Returns all a from the iterable x that are filtered by f
|
|
"""
|
|
for a in x:
|
|
if f(a):
|
|
yield a
|
|
|
|
|
|
def map(f, *args):
|
|
"""
|
|
Applies a function on all a in x and returns map object
|
|
"""
|
|
if staticlen(args) == 0:
|
|
compile_error("map() expects at least one iterator")
|
|
elif staticlen(args) == 1:
|
|
for a in args[0]:
|
|
yield f(a)
|
|
else:
|
|
for a in zip(*args):
|
|
yield f(*a)
|
|
|
|
|
|
def enumerate(x, start: int = 0):
|
|
"""
|
|
Creates a tuple containing a count (from start which defaults
|
|
to 0) and the values obtained from iterating over x
|
|
"""
|
|
i = start
|
|
for a in x:
|
|
yield (i, a)
|
|
i += 1
|
|
|
|
|
|
def echo(x):
|
|
"""
|
|
Print and return argument
|
|
"""
|
|
print x
|
|
return x
|
|
|
|
|
|
def reversed(x):
|
|
"""
|
|
Return an iterator that accesses x in the reverse order
|
|
"""
|
|
if hasattr(x, "__reversed__"):
|
|
return x.__reversed__()
|
|
else:
|
|
i = x.__len__() - 1
|
|
while i >= 0:
|
|
yield x[i]
|
|
i -= 1
|
|
|
|
|
|
def round(x, n=0):
|
|
"""
|
|
Return the x rounded off to the given
|
|
n digits after the decimal point.
|
|
"""
|
|
nx = float.__pow__(10.0, n)
|
|
return float.__round__(x * nx) / nx
|
|
|
|
|
|
def sum(xi):
|
|
"""
|
|
Return the sum of the items added together from xi
|
|
"""
|
|
x = iter(xi)
|
|
if not x.done():
|
|
s = x.next()
|
|
while not x.done():
|
|
s += x.next()
|
|
x.destroy()
|
|
return s
|
|
else:
|
|
x.destroy()
|
|
|
|
|
|
def repr(x):
|
|
"""Return the string representation of x"""
|
|
return x.__repr__()
|
|
|
|
|
|
def _int_format(a: int, base: int, prefix: str = ""):
|
|
assert base == 2 or base == 8 or base == 10 or base == 16
|
|
chars = "0123456789abcdef-"
|
|
|
|
b = a
|
|
digits = 0
|
|
while b != 0:
|
|
digits += 1
|
|
b //= base
|
|
|
|
sz = digits + (1 if a <= 0 else 0) + len(prefix)
|
|
p = Ptr[byte](sz)
|
|
q = p
|
|
|
|
if a < 0:
|
|
q[0] = chars[-1].ptr[0]
|
|
q += 1
|
|
|
|
if prefix:
|
|
str.memcpy(q, prefix.ptr, len(prefix))
|
|
q += len(prefix)
|
|
|
|
if digits != 0:
|
|
b = a
|
|
q += digits - 1
|
|
i = 1
|
|
while b != 0:
|
|
i += 1
|
|
q[0] = chars.ptr[abs(b % base)]
|
|
q += -1
|
|
b //= base
|
|
else:
|
|
q[0] = chars.ptr[0]
|
|
|
|
return str(p, sz)
|
|
|
|
|
|
def bin(n):
|
|
return _int_format(n.__index__(), 2, "0b")
|
|
|
|
|
|
def oct(n):
|
|
return _int_format(n.__index__(), 8, "0o")
|
|
|
|
|
|
def hex(n):
|
|
return _int_format(n.__index__(), 16, "0x")
|
|
|
|
|
|
@extend
|
|
class int:
|
|
def _from_str(s: str, base: int):
|
|
from C import strtoll(cobj, Ptr[cobj], i32) -> int
|
|
|
|
if base < 0 or base > 36 or base == 1:
|
|
raise ValueError("int() base must be >= 2 and <= 36, or 0")
|
|
|
|
buf = __array__[byte](32)
|
|
n = len(s)
|
|
need_dyn_alloc = n >= len(buf)
|
|
|
|
p = alloc_atomic(n + 1) if need_dyn_alloc else buf.ptr
|
|
str.memcpy(p, s.ptr, n)
|
|
p[n] = byte(0)
|
|
|
|
end = cobj()
|
|
result = strtoll(p, __ptr__(end), i32(base))
|
|
|
|
if need_dyn_alloc:
|
|
free(p)
|
|
|
|
if end != p + n:
|
|
raise ValueError(
|
|
f"invalid literal for int() with base {base}: {s}"
|
|
)
|
|
|
|
return result
|
|
|
|
|
|
def _jit_display(x, s: Static[str], bundle: Set[str] = Set[str]()):
|
|
if isinstance(x, None):
|
|
return
|
|
if hasattr(x, "_repr_mimebundle_") and s == "jupyter":
|
|
d = x._repr_mimebundle_(bundle)
|
|
# TODO: pick appropriate mime
|
|
mime = next(d.keys()) # just pick first
|
|
print(f"\x00\x00__codon/mime__\x00{mime}\x00{d[mime]}", end='')
|
|
elif hasattr(x, "__repr__"):
|
|
print(x.__repr__(), end='')
|
|
elif hasattr(x, "__str__"):
|
|
print(x.__str__(), end='')
|