1
0
mirror of https://github.com/exaloop/codon.git synced 2025-06-03 15:03:52 +08:00
codon/stdlib/collections.codon
A. R. Shajii ebd344f894
GPU and other updates (#52)
* Add nvptx pass

* Fix spaces

* Don't change name

* Add runtime support

* Add init call

* Add more runtime functions

* Add launch function

* Add intrinsics

* Fix codegen

* Run GPU pass between general opt passes

* Set data layout

* Create context

* Link libdevice

* Add function remapping

* Fix linkage

* Fix libdevice link

* Fix linking

* Fix personality

* Fix linking

* Fix linking

* Fix linking

* Add internalize pass

* Add more math conversions

* Add more re-mappings

* Fix conversions

* Fix __str__

* Add decorator attribute for any decorator

* Update kernel decorator

* Fix kernel decorator

* Fix kernel decorator

* Fix kernel decorator

* Fix kernel decorator

* Remove old decorator

* Fix pointer calc

* Fix fill-in codegen

* Fix linkage

* Add comment

* Update list conversion

* Add more conversions

* Add dict and set conversions

* Add float32 type to IR/LLVM

* Add float32

* Add float32 stdlib

* Keep required global values in PTX module

* Fix PTX module pruning

* Fix malloc

* Set will-return

* Fix name cleanup

* Fix access

* Fix name cleanup

* Fix function renaming

* Update dimension API

* Fix args

* Clean up API

* Move GPU transformations to end of opt pipeline

* Fix alloc replacements

* Fix naming

* Target PTX 4.2

* Fix global renaming

* Fix early return in static blocks; Add __realized__ function

* Format

* Add __llvm_name__ for functions

* Add vector type to IR

* SIMD support [wip]

* Update kernel naming

* Fix early returns; Fix SIMD calls

* Fix kernel naming

* Fix IR matcher

* Remove module print

* Update realloc

* Add overloads for 32-bit float math ops

* Add gpu.Pointer type for working with raw pointers

* Add float32 conversion

* Add to_gpu and from_gpu

* clang-format

* Add f32 reduction support to OpenMP

* Fix automatic GPU class conversions

* Fix conversion functions

* Fix conversions

* Rename self

* Fix tuple conversion

* Fix conversions

* Fix conversions

* Update PTX filename

* Fix filename

* Add raw function

* Add GPU docs

* Allow nested object conversions

* Add tests (WIP)

* Update SIMD

* Add staticrange and statictuple loop support

* SIMD updates

* Add new Vec constructors

* Fix UInt conversion

* Fix size-0 allocs

* Add more tests

* Add matmul test

* Rename gpu test file

* Add more tests

* Add alloc cache

* Fix object_to_gpu

* Fix frees

* Fix str conversion

* Fix set conversion

* Fix conversions

* Fix class conversion

* Fix str conversion

* Fix byte conversion

* Fix list conversion

* Fix pointer conversions

* Fix conversions

* Fix conversions

* Update tests

* Fix conversions

* Fix tuple conversion

* Fix tuple conversion

* Fix auto conversions

* Fix conversion

* Fix magics

* Update tests

* Support GPU in JIT mode

* Fix GPU+JIT

* Fix kernel filename in JIT mode

* Add __static_print__; Add earlyDefines; Various domination bugfixes; SimplifyContext RAII base handling

* Fix global static handling

* Fix float32 tests

* FIx gpu module

* Support OpenMP "collapse" option

* Add more collapse tests

* Capture generics and statics

* TraitVar handling

* Python exceptions / isinstance [wip; no_ci]

* clang-format

* Add list comparison operators

* Support empty raise in IR

* Add dict 'or' operator

* Fix repr

* Add copy module

* Fix spacing

* Use sm_30

* Python exceptions

* TypeTrait support; Fix defaultDict

* Fix earlyDefines

* Add defaultdict

* clang-format

* Fix invalid canonicalizations

* Fix empty raise

* Fix copyright

* Add Python numerics option

* Support py-numerics in math module

* Update docs

* Add static Python division / modulus

* Add static py numerics tests

* Fix staticrange/tuple; Add KwTuple.__getitem__

* clang-format

* Add gpu parameter to par

* Fix globals

* Don't init loop vars on loop collapse

* Add par-gpu tests

* Update gpu docs

* Fix isinstance check

* Remove invalid test

* Add -libdevice to set custom path [skip ci]

* Add release notes; bump version [skip ci]

* Add libdevice docs [skip ci]

Co-authored-by: Ibrahim Numanagić <ibrahimpasa@gmail.com>
2022-09-15 15:40:00 -04:00

439 lines
12 KiB
Python

# (c) 2022 Exaloop Inc. All rights reserved.
from internal.types.optional import unwrap
class deque:
_arr: Array[T]
_head: int
_tail: int
_maxlen: int
T: type
def __init__(self, arr: Array[T], head: int, tail: int, maxlen: int):
self._arr = arr
self._head = head
self._tail = tail
self._maxlen = maxlen
def __init__(self):
self._arr = Array[T](16)
self._head = 0
self._tail = 0
self._maxlen = -1
def __init__(self, maxlen: int):
cap = 1
while cap < maxlen:
cap *= 2
self._arr = Array[T](cap)
self._head = 0
self._tail = 0
self._maxlen = maxlen
def __init__(self, it: Generator[T]):
self._arr = Array[T](16)
self._head = 0
self._tail = 0
self._maxlen = -1
for i in it:
self.append(i)
@property
def maxlen(self) -> int:
return self._maxlen
def _double_cap(self):
p = self._head
n = len(self._arr)
r = n - p
new_cap = n * 2
new_arr = Array[T](new_cap)
for i in range(r):
new_arr[i] = self._arr[p + i]
for i in range(p):
new_arr[i + r] = self._arr[i]
self._arr = new_arr
self._head = 0
self._tail = n
def _check_not_empty(self):
if not self:
raise IndexError("pop from an empty deque")
def __bool__(self) -> bool:
return self._head != self._tail
def __len__(self) -> int:
return (self._tail - self._head) & (len(self._arr) - 1)
def appendleft(self, x: T):
self._head = (self._head - 1) & (len(self._arr) - 1)
self._arr[self._head] = x
if self._maxlen >= 0 and len(self) > self._maxlen:
self.pop()
if self._head == self._tail:
self._double_cap()
def append(self, x: T):
self._arr[self._tail] = x
self._tail = (self._tail + 1) & (len(self._arr) - 1)
if self._maxlen >= 0 and len(self) > self._maxlen:
self.popleft()
if self._head == self._tail:
self._double_cap()
def popleft(self) -> T:
self._check_not_empty()
res = self._arr[self._head]
self._head = (self._head + 1) & (len(self._arr) - 1)
return res
def pop(self) -> T:
self._check_not_empty()
self._tail = (self._tail - 1) & (len(self._arr) - 1)
return self._arr[self._tail]
def clear(self):
self._head = 0
self._tail = 0
def __iter__(self) -> Generator[T]:
i = self._head
while i != self._tail:
yield self._arr[i]
i = (i + 1) & (len(self._arr) - 1)
def __contains__(self, x: T) -> bool:
for i in self:
if i == x:
return True
return False
def __deepcopy__(self) -> deque[T]:
return deque(i.__deepcopy__() for i in self)
def __copy__(self) -> deque[T]:
return deque[T](self._arr.__copy__(), self._head, self._tail, self._maxlen)
def copy(self) -> deque[T]:
return self.__copy__()
def __repr__(self) -> str:
return f"deque({repr(List[T](iter(self)))})"
def _idx_check(self, idx: int, msg: str):
if self._head == self._tail or idx >= len(self) or idx < 0:
raise IndexError(msg)
@property
def left(self) -> T:
self._idx_check(0, "list index out of range")
return self._arr[self._head]
def __getitem__(self, idx: int) -> T:
if idx < 0:
idx += len(self)
self._idx_check(idx, "list index out of range")
if self._head <= self._tail:
return self._arr[self._head + idx]
elif self._head + idx < len(self._arr):
return self._arr[self._head + idx]
else:
idx -= len(self._arr) - self._head
assert 0 <= idx < self._tail
return self._arr[idx]
@tuple
class _CounterItem:
element: T
count: int
T: type
def __eq__(self, other: _CounterItem[T]) -> bool:
return self.count == other.count
def __ne__(self, other: _CounterItem[T]) -> bool:
return self.count != other.count
def __lt__(self, other: _CounterItem[T]) -> bool:
return self.count < other.count
def __gt__(self, other: _CounterItem[T]) -> bool:
return self.count > other.count
def __le__(self, other: _CounterItem[T]) -> bool:
return self.count <= other.count
def __ge__(self, other: _CounterItem[T]) -> bool:
return self.count >= other.count
class Counter(Dict[T, int]):
T: type
def __init__(self, elements: Generator[T]):
self._init()
self.update(elements)
def __init__(self, other: Counter[T]):
self._init_from(other)
def __init__(self, other: Dict[T, int]):
self._init_from(other)
def elements(self) -> Generator[T]:
for k, v in self.items():
for i in range(v):
yield k
def most_common(self, n: Optional[int] = None) -> List[Tuple[T, int]]:
if len(self) == 0:
return List[_CounterItem](capacity=0)
if n is None:
v = List[_CounterItem](capacity=len(self))
for t in self.items():
v.append(t)
v.sort(reverse=True)
return v
else:
from heapq import heapify, heapreplace
n: int = n
if n == 1:
top: Optional[_CounterItem] = None
for t in self.items():
if top is None or t[1] > top.count:
top = t
return [unwrap(top)]
if n <= 0:
return List[_CounterItem](capacity=0)
result = List[_CounterItem](capacity=n)
for t in self.items():
if len(result) < n:
result.append(t)
if len(result) == n:
heapify(result)
else:
if result[0] < t:
heapreplace(result, t)
result.sort(reverse=True)
return result
def subtract(self, elements: Generator[T]):
for a in elements:
self.increment(a, -1)
def subtract(self, other: Counter[T]):
for k, v in other.items():
self.increment(k, -v)
def subtract(self, other: Dict[T, int]):
for k, v in other.items():
self.increment(k, -v)
def update(self, elements: Generator[T]):
for a in elements:
self.increment(a)
def update(self, other: Counter[T]):
for k, v in other.items():
self.increment(k, by=v)
def update(self, other: Dict[T, int]):
for k, v in other.items():
self.increment(k, by=v)
def update(self):
pass
def total(self) -> int:
m = 0
for v in self.values():
m += v
return m
def __getitem__(self, key: T) -> int:
return self.get(key, 0)
def __delitem__(self, key: T):
x = self._kh_get(key)
if x != self._kh_end():
self._kh_del(x)
def __eq__(self, other: Counter[T]) -> bool:
if self.__len__() != other.__len__():
return False
for k, v in self.items():
if k not in other or other[k] != v:
return False
return True
def __ne__(self, other: Counter[T]) -> bool:
return not (self == other)
def __copy__(self) -> Counter[T]:
return Counter[T](self)
def __iadd__(self, other: Counter[T]) -> Counter[T]:
for k, v in other.items():
self.increment(k, by=v)
self._del_non_positives()
return self
def __isub__(self, other: Counter[T]) -> Counter[T]:
for k, v in other.items():
self.increment(k, by=-v)
self._del_non_positives()
return self
def __iand__(self, other: Counter[T]) -> Counter[T]:
for k, v in other.items():
self[k] = min(self.get(k, 0), v)
self._del_non_positives()
return self
def __ior__(self, other: Counter[T]) -> Counter[T]:
self._del_non_positives()
for k, v in other.items():
self[k] = max(self.get(k, 0), v)
self._del_non_positives()
return self
def __pos__(self) -> Counter[T]:
result = Counter[T]()
result.resize(self._n_buckets)
for k, v in self.items():
if v > 0:
result[k] = v
return result
def __neg__(self) -> Counter[T]:
result = Counter[T]()
result.resize(self._n_buckets)
for k, v in self.items():
if v < 0:
result[k] = -v
return result
def __add__(self, other: Counter[T]) -> Counter[T]:
result = self.__copy__()
result += other
return result
def __sub__(self, other: Counter[T]) -> Counter[T]:
result = self.__copy__()
result -= other
return result
def __and__(self, other: Counter[T]) -> Counter[T]:
result = self.__copy__()
result &= other
return result
def __or__(self, other: Counter[T]) -> Counter[T]:
result = self.__copy__()
result |= other
return result
def __repr__(self):
return f"Counter({super().__repr__()})"
def __dict_do_op_throws__(self, key: T, other: Z, op: F, F: type, Z: type):
self.__dict_do_op__(key, other, 0, op)
def _del_non_positives(self):
for k, v in self.items():
if v <= 0:
del self[k]
@extend
class Dict:
def __init__(self: Dict[K, int], other: Counter[K]):
self._init_from(other)
class defaultdict(Dict[K,V]):
default_factory: S
K: type
V: type
S: TypeVar[Callable[[], V]]
def __init__(self: defaultdict[K, VV, Function[[], V]], VV: TypeVar[V]):
super().__init__()
self.default_factory = lambda: VV()
def __init__(self, f: S):
super().__init__()
self.default_factory = f
def __init__(self: defaultdict[K, VV, Function[[], V]], VV: TypeVar[V], other: Dict[K, V]):
super().__init__(other)
self.default_factory = lambda: VV()
def __init__(self, f: S, other: Dict[K, V]):
super().__init__(other)
self.default_factory = f
def __missing__(self, key: K):
default_value = self.default_factory()
self.__setitem__(key, default_value)
return default_value
def __getitem__(self, key: K) -> V:
if key not in self:
return self.__missing__(key)
return super().__getitem__(key)
def __dict_do_op_throws__(self, key: K, other: Z, op: F, F: type, Z: type):
x = self._kh_get(key)
if x == self._kh_end():
self.__missing__(key)
x = self._kh_get(key)
self._vals[x] = op(self._vals[x], other)
def copy(self):
d = defaultdict[K,V,S](self.default_factory)
d._init_from(self)
return d
def __copy__(self):
return self.copy()
def __deepcopy__(self):
d = defaultdict[K,V,S](self.default_factory)
for k,v in self.items():
d[k.__deepcopy__()] = v.__deepcopy__()
return d
def __eq__(self, other: defaultdict[K,V,S]) -> bool:
if self.__len__() != other.__len__():
return False
for k, v in self.items():
if k not in other or other[k] != v:
return False
return True
def __ne__(self, other: defaultdict[K,V,S]) -> bool:
return not (self == other)
def __repr__(self):
return f"defaultdict(<default factory of '{V.__name__}'>, {super().__repr__()})"
@extend
class Dict:
def __init__(self: Dict[K, V], other: defaultdict[K, V, S], S: type):
self._init_from(other)
def namedtuple(name: Static[str], args): # internal
pass