1
0
mirror of https://github.com/exaloop/codon.git synced 2025-06-03 15:03:52 +08:00
codon/stdlib/pickle.codon
A. R. Shajii ebd344f894
GPU and other updates (#52)
* Add nvptx pass

* Fix spaces

* Don't change name

* Add runtime support

* Add init call

* Add more runtime functions

* Add launch function

* Add intrinsics

* Fix codegen

* Run GPU pass between general opt passes

* Set data layout

* Create context

* Link libdevice

* Add function remapping

* Fix linkage

* Fix libdevice link

* Fix linking

* Fix personality

* Fix linking

* Fix linking

* Fix linking

* Add internalize pass

* Add more math conversions

* Add more re-mappings

* Fix conversions

* Fix __str__

* Add decorator attribute for any decorator

* Update kernel decorator

* Fix kernel decorator

* Fix kernel decorator

* Fix kernel decorator

* Fix kernel decorator

* Remove old decorator

* Fix pointer calc

* Fix fill-in codegen

* Fix linkage

* Add comment

* Update list conversion

* Add more conversions

* Add dict and set conversions

* Add float32 type to IR/LLVM

* Add float32

* Add float32 stdlib

* Keep required global values in PTX module

* Fix PTX module pruning

* Fix malloc

* Set will-return

* Fix name cleanup

* Fix access

* Fix name cleanup

* Fix function renaming

* Update dimension API

* Fix args

* Clean up API

* Move GPU transformations to end of opt pipeline

* Fix alloc replacements

* Fix naming

* Target PTX 4.2

* Fix global renaming

* Fix early return in static blocks; Add __realized__ function

* Format

* Add __llvm_name__ for functions

* Add vector type to IR

* SIMD support [wip]

* Update kernel naming

* Fix early returns; Fix SIMD calls

* Fix kernel naming

* Fix IR matcher

* Remove module print

* Update realloc

* Add overloads for 32-bit float math ops

* Add gpu.Pointer type for working with raw pointers

* Add float32 conversion

* Add to_gpu and from_gpu

* clang-format

* Add f32 reduction support to OpenMP

* Fix automatic GPU class conversions

* Fix conversion functions

* Fix conversions

* Rename self

* Fix tuple conversion

* Fix conversions

* Fix conversions

* Update PTX filename

* Fix filename

* Add raw function

* Add GPU docs

* Allow nested object conversions

* Add tests (WIP)

* Update SIMD

* Add staticrange and statictuple loop support

* SIMD updates

* Add new Vec constructors

* Fix UInt conversion

* Fix size-0 allocs

* Add more tests

* Add matmul test

* Rename gpu test file

* Add more tests

* Add alloc cache

* Fix object_to_gpu

* Fix frees

* Fix str conversion

* Fix set conversion

* Fix conversions

* Fix class conversion

* Fix str conversion

* Fix byte conversion

* Fix list conversion

* Fix pointer conversions

* Fix conversions

* Fix conversions

* Update tests

* Fix conversions

* Fix tuple conversion

* Fix tuple conversion

* Fix auto conversions

* Fix conversion

* Fix magics

* Update tests

* Support GPU in JIT mode

* Fix GPU+JIT

* Fix kernel filename in JIT mode

* Add __static_print__; Add earlyDefines; Various domination bugfixes; SimplifyContext RAII base handling

* Fix global static handling

* Fix float32 tests

* FIx gpu module

* Support OpenMP "collapse" option

* Add more collapse tests

* Capture generics and statics

* TraitVar handling

* Python exceptions / isinstance [wip; no_ci]

* clang-format

* Add list comparison operators

* Support empty raise in IR

* Add dict 'or' operator

* Fix repr

* Add copy module

* Fix spacing

* Use sm_30

* Python exceptions

* TypeTrait support; Fix defaultDict

* Fix earlyDefines

* Add defaultdict

* clang-format

* Fix invalid canonicalizations

* Fix empty raise

* Fix copyright

* Add Python numerics option

* Support py-numerics in math module

* Update docs

* Add static Python division / modulus

* Add static py numerics tests

* Fix staticrange/tuple; Add KwTuple.__getitem__

* clang-format

* Add gpu parameter to par

* Fix globals

* Don't init loop vars on loop collapse

* Add par-gpu tests

* Update gpu docs

* Fix isinstance check

* Remove invalid test

* Add -libdevice to set custom path [skip ci]

* Add release notes; bump version [skip ci]

* Add libdevice docs [skip ci]

Co-authored-by: Ibrahim Numanagić <ibrahimpasa@gmail.com>
2022-09-15 15:40:00 -04:00

254 lines
6.6 KiB
Python

# (c) 2022 Exaloop Inc. All rights reserved.
from internal.file import _gz_errcheck
from internal.gc import sizeof, atomic
def pickle(x: T, jar: Jar, T: type):
x.__pickle__(jar)
def unpickle(jar: Jar, T: type) -> T:
return T.__unpickle__(jar)
def dump(x: T, f, T: type):
x.__pickle__(f.fp)
def load(f, T: type) -> T:
return T.__unpickle__(f.fp)
def _write_raw(jar: Jar, p: cobj, n: int):
LIMIT = 0x7FFFFFFF
while n > 0:
b = n if n < LIMIT else LIMIT
status = int(_C.gzwrite(jar, p, u32(b)))
if status != b:
_gz_errcheck(jar)
raise IOError(f"pickle error: gzwrite returned {status}")
p += b
n -= b
def _read_raw(jar: Jar, p: cobj, n: int):
LIMIT = 0x7FFFFFFF
while n > 0:
b = n if n < LIMIT else LIMIT
status = int(_C.gzread(jar, p, u32(b)))
if status != b:
_gz_errcheck(jar)
raise IOError(f"pickle error: gzread returned {status}")
p += b
n -= b
def _write(jar: Jar, x: T, T: type):
y = __ptr__(x)
_write_raw(jar, y.as_byte(), sizeof(T))
def _read(jar: Jar, T: type) -> T:
x = T()
y = __ptr__(x)
_read_raw(jar, y.as_byte(), sizeof(T))
return x
# Extend core types to allow pickling
@extend
class int:
def __pickle__(self, jar: Jar):
_write(jar, self)
def __unpickle__(jar: Jar) -> int:
return _read(jar, int)
@extend
class float:
def __pickle__(self, jar: Jar):
_write(jar, self)
def __unpickle__(jar: Jar) -> float:
return _read(jar, float)
@extend
class float32:
def __pickle__(self, jar: Jar):
_write(jar, self)
def __unpickle__(jar: Jar) -> float32:
return _read(jar, float32)
@extend
class bool:
def __pickle__(self, jar: Jar):
_write(jar, self)
def __unpickle__(jar: Jar) -> bool:
return _read(jar, bool)
@extend
class byte:
def __pickle__(self, jar: Jar):
_write(jar, self)
def __unpickle__(jar: Jar) -> byte:
return _read(jar, byte)
@extend
class str:
def __pickle__(self, jar: Jar):
_write(jar, self.len)
_write_raw(jar, self.ptr, self.len)
def __unpickle__(jar: Jar) -> str:
n = _read(jar, int)
p = Ptr[byte](n)
_read_raw(jar, p, n)
return str(p, n)
@extend
class List:
def __pickle__(self, jar: Jar):
n = len(self)
pickle(n, jar)
if atomic(T):
_write_raw(jar, (self.arr.ptr).as_byte(), n * sizeof(T))
else:
for i in range(n):
pickle(self.arr[i], jar)
def __unpickle__(jar: Jar) -> List[T]:
n = unpickle(jar, int)
arr = Array[T](n)
if atomic(T):
_read_raw(jar, (arr.ptr).as_byte(), n * sizeof(T))
else:
for i in range(n):
arr[i] = unpickle(jar, T)
return List[T](arr, n)
@extend
class Dict:
def __pickle__(self, jar: Jar):
import internal.khash as khash
if atomic(K) and atomic(V):
pickle(self._n_buckets, jar)
pickle(self._size, jar)
pickle(self._n_occupied, jar)
pickle(self._upper_bound, jar)
fsize = khash.__ac_fsize(self._n_buckets) if self._n_buckets > 0 else 0
_write_raw(jar, self._flags.as_byte(), fsize * sizeof(u32))
_write_raw(jar, self._keys.as_byte(), self._n_buckets * sizeof(K))
_write_raw(jar, self._vals.as_byte(), self._n_buckets * sizeof(V))
else:
pickle(self._n_buckets, jar)
size = len(self)
pickle(size, jar)
for k, v in self.items():
pickle(k, jar)
pickle(v, jar)
def __unpickle__(jar: Jar) -> Dict[K, V]:
import internal.khash as khash
d = {}
if atomic(K) and atomic(V):
n_buckets = unpickle(jar, int)
size = unpickle(jar, int)
n_occupied = unpickle(jar, int)
upper_bound = unpickle(jar, int)
fsize = khash.__ac_fsize(n_buckets) if n_buckets > 0 else 0
flags = Ptr[u32](fsize)
keys = Ptr[K](n_buckets)
vals = Ptr[V](n_buckets)
_read_raw(jar, flags.as_byte(), fsize * sizeof(u32))
_read_raw(jar, keys.as_byte(), n_buckets * sizeof(K))
_read_raw(jar, vals.as_byte(), n_buckets * sizeof(V))
d._n_buckets = n_buckets
d._size = size
d._n_occupied = n_occupied
d._upper_bound = upper_bound
d._flags = flags
d._keys = keys
d._vals = vals
else:
n_buckets = unpickle(jar, int)
size = unpickle(jar, int)
d.resize(n_buckets)
i = 0
while i < size:
k = unpickle(jar, K)
v = unpickle(jar, V)
d[k] = v
i += 1
return d
@extend
class Set:
def __pickle__(self, jar: Jar):
import internal.khash as khash
if atomic(K):
pickle(self._n_buckets, jar)
pickle(self._size, jar)
pickle(self._n_occupied, jar)
pickle(self._upper_bound, jar)
fsize = khash.__ac_fsize(self._n_buckets) if self._n_buckets > 0 else 0
_write_raw(jar, self._flags.as_byte(), fsize * sizeof(u32))
_write_raw(jar, self._keys.as_byte(), self._n_buckets * sizeof(K))
else:
pickle(self._n_buckets, jar)
size = len(self)
pickle(size, jar)
for k in self:
pickle(k, jar)
def __unpickle__(jar: Jar) -> Set[K]:
import internal.khash as khash
s = set[K]()
if atomic(K):
n_buckets = unpickle(jar, int)
size = unpickle(jar, int)
n_occupied = unpickle(jar, int)
upper_bound = unpickle(jar, int)
fsize = khash.__ac_fsize(n_buckets) if n_buckets > 0 else 0
flags = Ptr[u32](fsize)
keys = Ptr[K](n_buckets)
_read_raw(jar, flags.as_byte(), fsize * sizeof(u32))
_read_raw(jar, keys.as_byte(), n_buckets * sizeof(K))
s._n_buckets = n_buckets
s._size = size
s._n_occupied = n_occupied
s._upper_bound = upper_bound
s._flags = flags
s._keys = keys
else:
n_buckets = unpickle(jar, int)
size = unpickle(jar, int)
s.resize(n_buckets)
i = 0
while i < size:
k = unpickle(jar, K)
s.add(k)
i += 1
return s