codon/stdlib/internal/file.codon

262 lines
6.9 KiB
Python
Raw Normal View History

# Copyright (C) 2022-2023 Exaloop Inc. <https://exaloop.io>
2022-01-24 15:00:24 +08:00
2021-09-28 02:02:44 +08:00
from internal.gc import realloc, free
2022-01-24 15:00:24 +08:00
2021-09-28 02:02:44 +08:00
class File:
sz: int
buf: Ptr[byte]
fp: cobj
2022-02-16 23:51:16 +08:00
def __init__(self, fp: cobj):
2021-09-28 02:02:44 +08:00
self.fp = fp
self._reset()
2022-02-16 23:51:16 +08:00
def __init__(self, path: str, mode: str):
2021-09-28 02:02:44 +08:00
self.fp = _C.fopen(path.c_str(), mode.c_str())
if not self.fp:
2022-02-16 23:51:16 +08:00
raise IOError(f"file {path} could not be opened")
2021-09-28 02:02:44 +08:00
self._reset()
2022-02-16 23:51:16 +08:00
def _errcheck(self, msg: str):
2021-09-28 02:02:44 +08:00
err = int(_C.ferror(self.fp))
if err:
2022-02-16 23:51:16 +08:00
raise IOError(f"file I/O error: {msg}")
2021-09-28 02:02:44 +08:00
2022-02-16 23:51:16 +08:00
def __enter__(self):
2021-09-28 02:02:44 +08:00
pass
2022-02-16 23:51:16 +08:00
def __exit__(self):
2021-09-28 02:02:44 +08:00
self.close()
2022-01-24 15:00:24 +08:00
def __iter__(self) -> Generator[str]:
2021-09-28 02:02:44 +08:00
for a in self._iter():
yield a.__ptrcopy__()
2021-09-28 02:02:44 +08:00
2022-01-24 15:00:24 +08:00
def readlines(self) -> List[str]:
2021-09-28 02:02:44 +08:00
return [l for l in self]
2022-02-16 23:51:16 +08:00
def write(self, s: str):
2021-09-28 02:02:44 +08:00
self._ensure_open()
_C.fwrite(s.ptr, 1, len(s), self.fp)
self._errcheck("error in write")
2022-02-16 23:51:16 +08:00
def __file_write_gen__(self, g: Generator[T], T: type):
2021-09-28 02:02:44 +08:00
for s in g:
self.write(str(s))
def read(self, sz: int = -1) -> str:
2021-09-28 02:02:44 +08:00
self._ensure_open()
if sz < 0:
SEEK_SET = 0
SEEK_END = 2
cur = _C.ftell(self.fp)
_C.fseek(self.fp, 0, i32(SEEK_END))
sz = _C.ftell(self.fp) - cur
_C.fseek(self.fp, cur, i32(SEEK_SET))
2021-09-28 02:02:44 +08:00
buf = Ptr[byte](sz)
ret = _C.fread(buf, 1, sz, self.fp)
self._errcheck("error in read")
return str(buf, ret)
2022-01-24 15:00:24 +08:00
def tell(self) -> int:
2023-03-29 23:36:46 +08:00
self._ensure_open()
2021-09-28 02:02:44 +08:00
ret = _C.ftell(self.fp)
self._errcheck("error in tell")
return ret
2022-02-16 23:51:16 +08:00
def seek(self, offset: int, whence: int):
2023-03-29 23:36:46 +08:00
self._ensure_open()
2021-09-28 02:02:44 +08:00
_C.fseek(self.fp, offset, i32(whence))
self._errcheck("error in seek")
2022-02-16 23:51:16 +08:00
def flush(self):
2023-03-29 23:36:46 +08:00
self._ensure_open()
2021-09-28 02:02:44 +08:00
_C.fflush(self.fp)
2022-02-16 23:51:16 +08:00
def close(self):
2021-09-28 02:02:44 +08:00
if self.fp:
_C.fclose(self.fp)
self.fp = cobj()
if self.buf:
_C.free(self.buf)
self._reset()
2022-02-16 23:51:16 +08:00
def _ensure_open(self):
2021-09-28 02:02:44 +08:00
if not self.fp:
raise IOError("I/O operation on closed file")
2022-02-16 23:51:16 +08:00
def _reset(self):
2021-09-28 02:02:44 +08:00
self.buf = Ptr[byte]()
self.sz = 0
2022-01-24 15:00:24 +08:00
def _iter(self) -> Generator[str]:
2021-09-28 02:02:44 +08:00
self._ensure_open()
while True:
2022-01-24 15:00:24 +08:00
rd = _C.getline(
Ptr[Ptr[byte]](self.__raw__() + 8), Ptr[int](self.__raw__()), self.fp
)
2021-09-28 02:02:44 +08:00
if rd != -1:
yield str(self.buf, rd)
else:
break
2022-01-24 15:00:24 +08:00
def _iter_trim_newline(self) -> Generator[str]:
2021-09-28 02:02:44 +08:00
self._ensure_open()
while True:
2022-01-24 15:00:24 +08:00
rd = _C.getline(
Ptr[Ptr[byte]](self.__raw__() + 8), Ptr[int](self.__raw__()), self.fp
)
2021-09-28 02:02:44 +08:00
if rd != -1:
if self.buf[rd - 1] == byte(10):
rd -= 1
yield str(self.buf, rd)
else:
break
2022-02-16 23:51:16 +08:00
def _gz_errcheck(stream: cobj):
2021-09-28 02:02:44 +08:00
errnum = i32(0)
msg = _C.gzerror(stream, __ptr__(errnum))
if msg and msg[0]:
2022-02-16 23:51:16 +08:00
raise IOError(f"zlib error: {str(msg, _C.strlen(msg))}")
2021-09-28 02:02:44 +08:00
class gzFile:
sz: int
buf: Ptr[byte]
fp: cobj
2022-02-16 23:51:16 +08:00
def __init__(self, fp: cobj):
2021-09-28 02:02:44 +08:00
self.fp = fp
self._reset()
2022-02-16 23:51:16 +08:00
def __init__(self, path: str, mode: str):
2021-09-28 02:02:44 +08:00
self.fp = _C.gzopen(path.c_str(), mode.c_str())
if not self.fp:
2022-02-16 23:51:16 +08:00
raise IOError(f"file {path} could not be opened")
2021-09-28 02:02:44 +08:00
self._reset()
2022-01-24 15:00:24 +08:00
def _getline(self) -> int:
2021-09-28 02:02:44 +08:00
if not self.buf:
self.sz = 128
self.buf = Ptr[byte](self.sz)
offset = 0
while True:
if not _C.gzgets(self.fp, self.buf + offset, i32(self.sz - offset)):
_gz_errcheck(self.fp)
if offset == 0:
return -1
break
offset += _C.strlen(self.buf + offset)
if self.buf[offset - 1] == byte(10): # '\n'
break
GPU and other updates (#52) * Add nvptx pass * Fix spaces * Don't change name * Add runtime support * Add init call * Add more runtime functions * Add launch function * Add intrinsics * Fix codegen * Run GPU pass between general opt passes * Set data layout * Create context * Link libdevice * Add function remapping * Fix linkage * Fix libdevice link * Fix linking * Fix personality * Fix linking * Fix linking * Fix linking * Add internalize pass * Add more math conversions * Add more re-mappings * Fix conversions * Fix __str__ * Add decorator attribute for any decorator * Update kernel decorator * Fix kernel decorator * Fix kernel decorator * Fix kernel decorator * Fix kernel decorator * Remove old decorator * Fix pointer calc * Fix fill-in codegen * Fix linkage * Add comment * Update list conversion * Add more conversions * Add dict and set conversions * Add float32 type to IR/LLVM * Add float32 * Add float32 stdlib * Keep required global values in PTX module * Fix PTX module pruning * Fix malloc * Set will-return * Fix name cleanup * Fix access * Fix name cleanup * Fix function renaming * Update dimension API * Fix args * Clean up API * Move GPU transformations to end of opt pipeline * Fix alloc replacements * Fix naming * Target PTX 4.2 * Fix global renaming * Fix early return in static blocks; Add __realized__ function * Format * Add __llvm_name__ for functions * Add vector type to IR * SIMD support [wip] * Update kernel naming * Fix early returns; Fix SIMD calls * Fix kernel naming * Fix IR matcher * Remove module print * Update realloc * Add overloads for 32-bit float math ops * Add gpu.Pointer type for working with raw pointers * Add float32 conversion * Add to_gpu and from_gpu * clang-format * Add f32 reduction support to OpenMP * Fix automatic GPU class conversions * Fix conversion functions * Fix conversions * Rename self * Fix tuple conversion * Fix conversions * Fix conversions * Update PTX filename * Fix filename * Add raw function * Add GPU docs * Allow nested object conversions * Add tests (WIP) * Update SIMD * Add staticrange and statictuple loop support * SIMD updates * Add new Vec constructors * Fix UInt conversion * Fix size-0 allocs * Add more tests * Add matmul test * Rename gpu test file * Add more tests * Add alloc cache * Fix object_to_gpu * Fix frees * Fix str conversion * Fix set conversion * Fix conversions * Fix class conversion * Fix str conversion * Fix byte conversion * Fix list conversion * Fix pointer conversions * Fix conversions * Fix conversions * Update tests * Fix conversions * Fix tuple conversion * Fix tuple conversion * Fix auto conversions * Fix conversion * Fix magics * Update tests * Support GPU in JIT mode * Fix GPU+JIT * Fix kernel filename in JIT mode * Add __static_print__; Add earlyDefines; Various domination bugfixes; SimplifyContext RAII base handling * Fix global static handling * Fix float32 tests * FIx gpu module * Support OpenMP "collapse" option * Add more collapse tests * Capture generics and statics * TraitVar handling * Python exceptions / isinstance [wip; no_ci] * clang-format * Add list comparison operators * Support empty raise in IR * Add dict 'or' operator * Fix repr * Add copy module * Fix spacing * Use sm_30 * Python exceptions * TypeTrait support; Fix defaultDict * Fix earlyDefines * Add defaultdict * clang-format * Fix invalid canonicalizations * Fix empty raise * Fix copyright * Add Python numerics option * Support py-numerics in math module * Update docs * Add static Python division / modulus * Add static py numerics tests * Fix staticrange/tuple; Add KwTuple.__getitem__ * clang-format * Add gpu parameter to par * Fix globals * Don't init loop vars on loop collapse * Add par-gpu tests * Update gpu docs * Fix isinstance check * Remove invalid test * Add -libdevice to set custom path [skip ci] * Add release notes; bump version [skip ci] * Add libdevice docs [skip ci] Co-authored-by: Ibrahim Numanagić <ibrahimpasa@gmail.com>
2022-09-16 03:40:00 +08:00
oldsz = self.sz
2021-09-28 02:02:44 +08:00
self.sz *= 2
GPU and other updates (#52) * Add nvptx pass * Fix spaces * Don't change name * Add runtime support * Add init call * Add more runtime functions * Add launch function * Add intrinsics * Fix codegen * Run GPU pass between general opt passes * Set data layout * Create context * Link libdevice * Add function remapping * Fix linkage * Fix libdevice link * Fix linking * Fix personality * Fix linking * Fix linking * Fix linking * Add internalize pass * Add more math conversions * Add more re-mappings * Fix conversions * Fix __str__ * Add decorator attribute for any decorator * Update kernel decorator * Fix kernel decorator * Fix kernel decorator * Fix kernel decorator * Fix kernel decorator * Remove old decorator * Fix pointer calc * Fix fill-in codegen * Fix linkage * Add comment * Update list conversion * Add more conversions * Add dict and set conversions * Add float32 type to IR/LLVM * Add float32 * Add float32 stdlib * Keep required global values in PTX module * Fix PTX module pruning * Fix malloc * Set will-return * Fix name cleanup * Fix access * Fix name cleanup * Fix function renaming * Update dimension API * Fix args * Clean up API * Move GPU transformations to end of opt pipeline * Fix alloc replacements * Fix naming * Target PTX 4.2 * Fix global renaming * Fix early return in static blocks; Add __realized__ function * Format * Add __llvm_name__ for functions * Add vector type to IR * SIMD support [wip] * Update kernel naming * Fix early returns; Fix SIMD calls * Fix kernel naming * Fix IR matcher * Remove module print * Update realloc * Add overloads for 32-bit float math ops * Add gpu.Pointer type for working with raw pointers * Add float32 conversion * Add to_gpu and from_gpu * clang-format * Add f32 reduction support to OpenMP * Fix automatic GPU class conversions * Fix conversion functions * Fix conversions * Rename self * Fix tuple conversion * Fix conversions * Fix conversions * Update PTX filename * Fix filename * Add raw function * Add GPU docs * Allow nested object conversions * Add tests (WIP) * Update SIMD * Add staticrange and statictuple loop support * SIMD updates * Add new Vec constructors * Fix UInt conversion * Fix size-0 allocs * Add more tests * Add matmul test * Rename gpu test file * Add more tests * Add alloc cache * Fix object_to_gpu * Fix frees * Fix str conversion * Fix set conversion * Fix conversions * Fix class conversion * Fix str conversion * Fix byte conversion * Fix list conversion * Fix pointer conversions * Fix conversions * Fix conversions * Update tests * Fix conversions * Fix tuple conversion * Fix tuple conversion * Fix auto conversions * Fix conversion * Fix magics * Update tests * Support GPU in JIT mode * Fix GPU+JIT * Fix kernel filename in JIT mode * Add __static_print__; Add earlyDefines; Various domination bugfixes; SimplifyContext RAII base handling * Fix global static handling * Fix float32 tests * FIx gpu module * Support OpenMP "collapse" option * Add more collapse tests * Capture generics and statics * TraitVar handling * Python exceptions / isinstance [wip; no_ci] * clang-format * Add list comparison operators * Support empty raise in IR * Add dict 'or' operator * Fix repr * Add copy module * Fix spacing * Use sm_30 * Python exceptions * TypeTrait support; Fix defaultDict * Fix earlyDefines * Add defaultdict * clang-format * Fix invalid canonicalizations * Fix empty raise * Fix copyright * Add Python numerics option * Support py-numerics in math module * Update docs * Add static Python division / modulus * Add static py numerics tests * Fix staticrange/tuple; Add KwTuple.__getitem__ * clang-format * Add gpu parameter to par * Fix globals * Don't init loop vars on loop collapse * Add par-gpu tests * Update gpu docs * Fix isinstance check * Remove invalid test * Add -libdevice to set custom path [skip ci] * Add release notes; bump version [skip ci] * Add libdevice docs [skip ci] Co-authored-by: Ibrahim Numanagić <ibrahimpasa@gmail.com>
2022-09-16 03:40:00 +08:00
self.buf = realloc(self.buf, self.sz, oldsz)
2021-09-28 02:02:44 +08:00
return offset
2022-01-24 15:00:24 +08:00
def __iter__(self) -> Generator[str]:
2021-09-28 02:02:44 +08:00
for a in self._iter():
yield a.__ptrcopy__()
2021-09-28 02:02:44 +08:00
2022-02-16 23:51:16 +08:00
def __enter__(self):
2021-09-28 02:02:44 +08:00
pass
2022-02-16 23:51:16 +08:00
def __exit__(self):
2021-09-28 02:02:44 +08:00
self.close()
2022-02-16 23:51:16 +08:00
def close(self):
2021-09-28 02:02:44 +08:00
if self.fp:
_C.gzclose(self.fp)
self.fp = cobj()
if self.buf:
free(self.buf)
self._reset()
2022-01-24 15:00:24 +08:00
def readlines(self) -> List[str]:
2021-09-28 02:02:44 +08:00
return [l for l in self]
2022-02-16 23:51:16 +08:00
def write(self, s: str):
2021-09-28 02:02:44 +08:00
self._ensure_open()
_C.gzwrite(self.fp, s.ptr, u32(len(s)))
_gz_errcheck(self.fp)
2022-02-16 23:51:16 +08:00
def __file_write_gen__(self, g: Generator[T], T: type):
2021-09-28 02:02:44 +08:00
for s in g:
self.write(str(s))
def read(self, sz: int = -1) -> str:
self._ensure_open()
if sz < 0:
buf = _strbuf()
for a in self._iter():
buf.append(a)
return buf.__str__()
buf = Ptr[byte](sz)
ret = _C.gzread(self.fp, buf, u32(sz))
_gz_errcheck(self.fp)
return str(buf, int(ret))
2022-01-24 15:00:24 +08:00
def tell(self) -> int:
2023-03-29 23:36:46 +08:00
self._ensure_open()
2021-09-28 02:02:44 +08:00
ret = _C.gztell(self.fp)
_gz_errcheck(self.fp)
return ret
2022-02-16 23:51:16 +08:00
def seek(self, offset: int, whence: int):
2023-03-29 23:36:46 +08:00
self._ensure_open()
2021-09-28 02:02:44 +08:00
_C.gzseek(self.fp, offset, i32(whence))
_gz_errcheck(self.fp)
2023-03-29 23:36:46 +08:00
def flush(self):
Z_FINISH = 4
self._ensure_open()
_C.gzflush(self.fp, i32(Z_FINISH))
_gz_errcheck(self.fp)
2022-01-24 15:00:24 +08:00
def _iter(self) -> Generator[str]:
2021-09-28 02:02:44 +08:00
self._ensure_open()
while True:
rd = self._getline()
if rd != -1:
yield str(self.buf, rd)
else:
break
2022-01-24 15:00:24 +08:00
def _iter_trim_newline(self) -> Generator[str]:
2021-09-28 02:02:44 +08:00
self._ensure_open()
while True:
rd = self._getline()
if rd != -1:
if self.buf[rd - 1] == byte(10):
rd -= 1
yield str(self.buf, rd)
else:
break
2022-02-16 23:51:16 +08:00
def _ensure_open(self):
2021-09-28 02:02:44 +08:00
if not self.fp:
raise IOError("I/O operation on closed file")
2022-02-16 23:51:16 +08:00
def _reset(self):
2021-09-28 02:02:44 +08:00
self.buf = cobj()
self.sz = 0
2022-01-24 15:00:24 +08:00
def open(path: str, mode: str = "r") -> File:
2021-09-28 02:02:44 +08:00
return File(path, mode)
2022-01-24 15:00:24 +08:00
def gzopen(path: str, mode: str = "r") -> gzFile:
2021-09-28 02:02:44 +08:00
return gzFile(path, mode)
2022-01-24 15:00:24 +08:00
def is_binary(path: str) -> bool:
2022-02-16 23:51:16 +08:00
# https://stackoverflow.com/questions/898669/how-can-i-detect-if-a-file-is-binary-non-text-in-python/7392391#7392391
# Can get both false positive and false negatives, but still is a
# clever approach that works for the large majority of files
2022-01-24 15:00:24 +08:00
textchars = {7, 8, 9, 10, 12, 13, 27} | set(iter(range(0x20, 0x100))) - {0x7F}
2021-09-28 02:02:44 +08:00
with open(path, "rb") as f:
header = f.read(1024)
return any(ord(c) not in textchars for c in header)