mirror of https://github.com/exaloop/codon.git
1485 lines
43 KiB
Python
1485 lines
43 KiB
Python
# Copyright (C) 2022-2023 Exaloop Inc. <https://exaloop.io>
|
|
|
|
_MAX: Static[int] = 0x7FFFFFFFFFFFFFFF
|
|
|
|
@extend
|
|
class str:
|
|
# Magic methods
|
|
|
|
def __hash__(self) -> int:
|
|
h = 0
|
|
p, n = self.ptr, self.len
|
|
i = 0
|
|
while i < n:
|
|
h = 31 * h + int(p[i])
|
|
i += 1
|
|
return h
|
|
|
|
def __lt__(self, other: str) -> bool:
|
|
return self._cmp(other) < 0
|
|
|
|
def __le__(self, other: str) -> bool:
|
|
return self._cmp(other) <= 0
|
|
|
|
def __gt__(self, other: str) -> bool:
|
|
return self._cmp(other) > 0
|
|
|
|
def __ge__(self, other: str) -> bool:
|
|
return self._cmp(other) >= 0
|
|
|
|
def __repr__(self) -> str:
|
|
v = _strbuf(len(self) + 2)
|
|
|
|
q, qe = "'", "\\'"
|
|
found_single = False
|
|
found_double = False
|
|
for c in self:
|
|
if c == "'":
|
|
found_single = True
|
|
elif c == '"':
|
|
found_double = True
|
|
|
|
if found_single and not found_double:
|
|
q, qe = '"', '\\"'
|
|
|
|
v.append(q)
|
|
for c in self:
|
|
d = c
|
|
if c == "\n":
|
|
d = "\\n"
|
|
elif c == "\r":
|
|
d = "\\r"
|
|
elif c == "\t":
|
|
d = "\\t"
|
|
elif c == "\\":
|
|
d = "\\\\"
|
|
elif c == q:
|
|
d = qe
|
|
else:
|
|
b = int(c.ptr[0])
|
|
if not (32 <= b <= 126):
|
|
h = "0123456789abcdef"
|
|
v.append("\\x")
|
|
v.append(h[b // 16])
|
|
v.append(h[b % 16])
|
|
d = ""
|
|
if d:
|
|
v.append(d)
|
|
v.append(q)
|
|
return v.__str__()
|
|
|
|
def __getitem__(self, idx: int) -> str:
|
|
if idx < 0:
|
|
idx += len(self)
|
|
if not (0 <= idx < len(self)):
|
|
raise IndexError("string index out of range")
|
|
return str(self.ptr + idx, 1)
|
|
|
|
def __getitem__(self, s: Slice) -> str:
|
|
if s.start is None and s.stop is None and s.step is None:
|
|
return self.__copy__()
|
|
elif s.step is None:
|
|
start, stop, step, length = s.adjust_indices(len(self))
|
|
return str(self.ptr + start, length)
|
|
else:
|
|
start, stop, step, length = s.adjust_indices(len(self))
|
|
return self._make_from_range(start, stop, step, length)
|
|
|
|
def _make_from_range(self, start: int, stop: int, step: int, length: int) -> str:
|
|
p = Ptr[byte](length)
|
|
j = 0
|
|
for i in range(start, stop, step):
|
|
p[j] = self.ptr[i]
|
|
j += 1
|
|
return str(p, length)
|
|
|
|
def __iter__(self) -> Generator[str]:
|
|
i = 0
|
|
n = len(self)
|
|
while i < n:
|
|
yield str(self.ptr + i, 1)
|
|
i += 1
|
|
|
|
def __reversed__(self) -> Generator[str]:
|
|
i = len(self) - 1
|
|
while i >= 0:
|
|
yield str(self.ptr + i, 1)
|
|
i -= 1
|
|
|
|
def __mul__(self, x: int) -> str:
|
|
total = x * self.len
|
|
p = Ptr[byte](total)
|
|
n = 0
|
|
for _ in range(x):
|
|
str.memcpy(p + n, self.ptr, self.len)
|
|
n += self.len
|
|
return str(p, total)
|
|
|
|
def _cmp(self, other: str) -> int:
|
|
n = min(self.len, other.len)
|
|
i = 0
|
|
while i < n:
|
|
c1 = self.ptr[i]
|
|
c2 = other.ptr[i]
|
|
if c1 != c2:
|
|
return int(c1) - int(c2)
|
|
i += 1
|
|
return self.len - other.len
|
|
|
|
import algorithms.strings as algorithms
|
|
|
|
@extend
|
|
class str:
|
|
def __contains__(self, pattern: str) -> bool:
|
|
return self.find(pattern) >= 0
|
|
|
|
# Helper methods
|
|
|
|
def _isdigit(a: byte) -> bool:
|
|
return _C.isdigit(i32(int(a))) != i32(0)
|
|
|
|
def _isspace(a: byte) -> bool:
|
|
return _C.isspace(i32(int(a))) != i32(0)
|
|
|
|
def _isupper(a: byte) -> bool:
|
|
return _C.isupper(i32(int(a))) != i32(0)
|
|
|
|
def _islower(a: byte) -> bool:
|
|
return _C.islower(i32(int(a))) != i32(0)
|
|
|
|
def _isalpha(a: byte) -> bool:
|
|
return _C.isalpha(i32(int(a))) != i32(0)
|
|
|
|
def _isalnum(a: byte) -> bool:
|
|
return _C.isalnum(i32(int(a))) != i32(0)
|
|
|
|
def _toupper(a: byte) -> byte:
|
|
return byte(int(_C.toupper(i32(int(a)))))
|
|
|
|
def _tolower(a: byte) -> byte:
|
|
return byte(int(_C.tolower(i32(int(a)))))
|
|
|
|
def _slice(self, i: int, j: int) -> str:
|
|
return str(self.ptr + i, j - i)
|
|
|
|
def _at(self, i: int) -> str:
|
|
return str(self.ptr + i, 1)
|
|
|
|
def join(self, l: Generator[str]) -> str:
|
|
buf = _strbuf()
|
|
if len(self) == 0:
|
|
for a in l:
|
|
buf.append(a)
|
|
else:
|
|
first = True
|
|
for a in l:
|
|
if first:
|
|
first = False
|
|
else:
|
|
buf.append(self)
|
|
buf.append(a)
|
|
return buf.__str__()
|
|
|
|
def join(self, l: List[str]) -> str:
|
|
if len(l) == 0:
|
|
return ""
|
|
if len(l) == 1:
|
|
return l[0]
|
|
if len(self) == 0:
|
|
return str.cat(l)
|
|
|
|
# compute length
|
|
n = 0
|
|
i = 0
|
|
while i < len(l):
|
|
n += len(l[i])
|
|
if i < len(l) - 1:
|
|
n += len(self)
|
|
i += 1
|
|
|
|
# copy to new buffer
|
|
p = Ptr[byte](n)
|
|
r = 0
|
|
i = 0
|
|
while i < len(l):
|
|
str.memcpy(p + r, l[i].ptr, len(l[i]))
|
|
r += len(l[i])
|
|
if i < len(l) - 1:
|
|
str.memcpy(p + r, self.ptr, len(self))
|
|
r += len(self)
|
|
i += 1
|
|
|
|
return str(p, n)
|
|
|
|
def isdigit(self) -> bool:
|
|
"""
|
|
str.isdigit() -> bool
|
|
|
|
Return True if all characters in str are digits
|
|
and there is at least one character in str, False otherwise.
|
|
"""
|
|
if len(self) == 0:
|
|
return False
|
|
|
|
for i in range(len(self)):
|
|
if not str._isdigit(self.ptr[i]):
|
|
return False
|
|
return True
|
|
|
|
def islower(self) -> bool:
|
|
"""
|
|
str.islower() -> bool
|
|
|
|
Return True if all cased characters in str are lowercase and there is
|
|
at least one cased character in str, False otherwise.
|
|
"""
|
|
cased = False
|
|
|
|
# For empty strings
|
|
if len(self) == 0:
|
|
return False
|
|
|
|
# For single character strings
|
|
if len(self) == 1:
|
|
return str._islower(self.ptr[0])
|
|
|
|
for i in range(len(self)):
|
|
if str._isupper(self.ptr[i]):
|
|
return False
|
|
elif not cased and str._islower(self.ptr[i]):
|
|
cased = True
|
|
return cased
|
|
|
|
def isupper(self) -> bool:
|
|
"""
|
|
str.isupper() -> bool
|
|
|
|
Return True if all cased characters in str are uppercase and there is
|
|
at least one cased character in str, False otherwise.
|
|
"""
|
|
cased = False
|
|
|
|
# For empty strings
|
|
if len(self) == 0:
|
|
return False
|
|
|
|
# For single character strings
|
|
if len(self) == 1:
|
|
return str._isupper(self.ptr[0])
|
|
|
|
for i in range(len(self)):
|
|
if str._islower(self.ptr[i]):
|
|
return False
|
|
elif not cased and str._isupper(self.ptr[i]):
|
|
cased = True
|
|
return cased
|
|
|
|
def isalnum(self) -> bool:
|
|
"""
|
|
str.isalnum() -> bool
|
|
|
|
Return True if all characters in str are alphanumeric
|
|
and there is at least one character in str, False otherwise.
|
|
"""
|
|
if len(self) == 0:
|
|
return False
|
|
|
|
for i in range(len(self)):
|
|
if not str._isalnum(self.ptr[i]):
|
|
return False
|
|
return True
|
|
|
|
def isalpha(self) -> bool:
|
|
"""
|
|
str.isalpha() -> bool
|
|
|
|
Return True if all characters in str are alphabetic
|
|
and there is at least one character in str, False otherwise.
|
|
"""
|
|
if len(self) == 0:
|
|
return False
|
|
|
|
for i in range(len(self)):
|
|
if not str._isalpha(self.ptr[i]):
|
|
return False
|
|
return True
|
|
|
|
def isspace(self) -> bool:
|
|
"""
|
|
str.isspace() -> bool
|
|
|
|
Return True if all characters in str are whitespace
|
|
and there is at least one character in str, False otherwise.
|
|
"""
|
|
if len(self) == 0:
|
|
return False
|
|
|
|
for i in range(len(self)):
|
|
if not str._isspace(self.ptr[i]):
|
|
return False
|
|
return True
|
|
|
|
def istitle(self) -> bool:
|
|
"""
|
|
str.istitle() -> bool
|
|
|
|
Return True if str is a titlecased string and there is at least one
|
|
character in str, i.e. uppercase characters may only follow uncased
|
|
characters and lowercase characters only cased ones. Return False
|
|
otherwise.
|
|
"""
|
|
|
|
# For empty strings
|
|
if len(self) == 0:
|
|
return False
|
|
|
|
# For single character strings
|
|
if len(self) == 1:
|
|
return str._isupper(self.ptr[0])
|
|
|
|
cased = False
|
|
prev_is_cased = False
|
|
for i in range(len(self)):
|
|
if str._isupper(self.ptr[i]):
|
|
if prev_is_cased:
|
|
return False
|
|
prev_is_cased = True
|
|
cased = True
|
|
elif str._islower(self.ptr[i]):
|
|
if not prev_is_cased:
|
|
return False
|
|
prev_is_cased = True
|
|
cased = True
|
|
else:
|
|
prev_is_cased = False
|
|
return cased
|
|
|
|
def capitalize(self) -> str:
|
|
"""
|
|
str.capitalize() -> copy of str
|
|
|
|
Return a copy of str with only its first character capitalized (ASCII)
|
|
and the rest lower-cased.
|
|
"""
|
|
n = len(self)
|
|
if n > 0:
|
|
p = Ptr[byte](n)
|
|
p[0] = str._toupper(self.ptr[0])
|
|
for i in range(1, n):
|
|
p[i] = str._tolower(self.ptr[i])
|
|
return str(p, n)
|
|
return ""
|
|
|
|
def isdecimal(self) -> bool:
|
|
"""
|
|
str.isdecimal() -> bool
|
|
|
|
Return True if str is a decimal string, False otherwise.
|
|
str is a decimal string if all characters in str are decimal and
|
|
there is at least one character in str.
|
|
"""
|
|
if len(self) == 0:
|
|
return False
|
|
|
|
for i in range(len(self)):
|
|
# test ascii values 48-57 == 0-9
|
|
if not (48 <= int(self.ptr[i]) <= 57):
|
|
return False
|
|
return True
|
|
|
|
def lower(self) -> str:
|
|
"""
|
|
str.lower() -> copy of str
|
|
|
|
Return a copy of str with all ASCII characters converted to lowercase.
|
|
"""
|
|
# Empty string
|
|
n = len(self)
|
|
if n == 0:
|
|
return ""
|
|
p = Ptr[byte](n)
|
|
for i in range(n):
|
|
p[i] = str._tolower(self.ptr[i])
|
|
return str(p, n)
|
|
|
|
def upper(self) -> str:
|
|
"""
|
|
str.upper() -> copy of str
|
|
|
|
Return a copy of str with all ASCII characters converted to uppercase.
|
|
"""
|
|
# Empty string
|
|
n = len(self)
|
|
if n == 0:
|
|
return ""
|
|
p = Ptr[byte](n)
|
|
for i in range(n):
|
|
p[i] = str._toupper(self.ptr[i])
|
|
return str(p, n)
|
|
|
|
def isascii(self) -> bool:
|
|
"""
|
|
str.isascii() -> bool
|
|
|
|
Return True if str is empty or all characters in str are ASCII,
|
|
False otherwise.
|
|
"""
|
|
for i in range(len(self)):
|
|
if int(self.ptr[i]) >= 128:
|
|
return False
|
|
return True
|
|
|
|
def casefold(self) -> str:
|
|
"""
|
|
str.casefold() -> copy of str
|
|
|
|
Return a version of the string suitable for caseless comparisons.
|
|
|
|
Unlike Python, casefold() deals with just ASCII characters.
|
|
"""
|
|
return self.lower()
|
|
|
|
def swapcase(self) -> str:
|
|
"""
|
|
str.swapcase() -> copy of str
|
|
|
|
Return a copy of str with uppercase ASCII characters converted
|
|
to lowercase ASCII and vice versa.
|
|
"""
|
|
# Empty string
|
|
n = len(self)
|
|
if n == 0:
|
|
return ""
|
|
p = Ptr[byte](n)
|
|
for i in range(n):
|
|
if str._islower(self.ptr[i]):
|
|
p[i] = str._toupper(self.ptr[i])
|
|
elif str._isupper(self.ptr[i]):
|
|
p[i] = str._tolower(self.ptr[i])
|
|
else:
|
|
p[i] = self.ptr[i]
|
|
return str(p, n)
|
|
|
|
def title(self) -> str:
|
|
"""
|
|
str.title() -> copy of str
|
|
|
|
Return a titlecased version of str, i.e. ASCII words start with uppercase
|
|
characters, all remaining cased characters have lowercase.
|
|
"""
|
|
prev_is_cased = False
|
|
|
|
n = len(self)
|
|
if n == 0:
|
|
return ""
|
|
|
|
p = Ptr[byte](n)
|
|
for i in range(n):
|
|
if str._islower(self.ptr[i]):
|
|
# lowercase to uppercase
|
|
if not prev_is_cased:
|
|
p[i] = str._toupper(self.ptr[i])
|
|
else:
|
|
p[i] = self.ptr[i]
|
|
prev_is_cased = True
|
|
elif str._isupper(self.ptr[i]):
|
|
# uppercase to lowercase
|
|
if prev_is_cased:
|
|
p[i] = str._tolower(self.ptr[i])
|
|
else:
|
|
p[i] = self.ptr[i]
|
|
prev_is_cased = True
|
|
else:
|
|
p[i] = self.ptr[i]
|
|
prev_is_cased = False
|
|
return str(p, n)
|
|
|
|
def isnumeric(self) -> bool:
|
|
"""
|
|
str.isdecimal() -> bool
|
|
|
|
Return True if the string is a numeric string, False otherwise.
|
|
A string is numeric if all characters in the string are numeric
|
|
and there is at least one character in the string.
|
|
|
|
Unlike Python, isnumeric() deals with just ASCII characters.
|
|
"""
|
|
return self.isdecimal()
|
|
|
|
def _build(*args):
|
|
total = 0
|
|
for t in args:
|
|
if isinstance(t, str):
|
|
total += len(t)
|
|
else:
|
|
total += len(t[0]) * t[1]
|
|
p = Ptr[byte](total)
|
|
i = 0
|
|
|
|
for t in args:
|
|
if isinstance(t, str):
|
|
str.memcpy(p + i, t.ptr, t.len)
|
|
i += t.len
|
|
else:
|
|
s, n = t
|
|
for _ in range(n):
|
|
str.memcpy(p + i, s.ptr, s.len)
|
|
i += s.len
|
|
|
|
return str(p, total)
|
|
|
|
def ljust(self, width: int, fillchar: str = " ") -> str:
|
|
"""
|
|
ljust(width[, fillchar]) -> string
|
|
|
|
Return a left-justified string of length width.
|
|
|
|
Padding is done using the specified fill character (default is a space).
|
|
"""
|
|
if len(fillchar) != 1:
|
|
raise ValueError("The fill character must be exactly one character long")
|
|
if width <= len(self):
|
|
return self
|
|
return str._build(self, (fillchar, width - len(self)))
|
|
|
|
def rjust(self, width: int, fillchar: str = " ") -> str:
|
|
"""
|
|
rjust(width[, fillchar]) -> string
|
|
|
|
Return a right-justified string of length width.
|
|
|
|
Padding is done using the specified fill character (default is a space).
|
|
"""
|
|
if len(fillchar) != 1:
|
|
raise ValueError("The fill character must be exactly one character long")
|
|
if width <= len(self):
|
|
return self
|
|
return str._build((fillchar, width - len(self)), self)
|
|
|
|
def center(self, width: int, fillchar: str = " ") -> str:
|
|
"""
|
|
str.center(width[, fillchar]) -> string
|
|
|
|
Return str centered in a string of length width. Padding is
|
|
done using the specified fill character (default is a space)
|
|
"""
|
|
if len(fillchar) != 1:
|
|
raise ValueError("The fill character must be exactly one character long")
|
|
if width <= len(self):
|
|
return self
|
|
|
|
pad = width - len(self)
|
|
left_pad = pad // 2
|
|
right_pad = width - len(self) - left_pad
|
|
return str._build((fillchar, left_pad), self, (fillchar, right_pad))
|
|
|
|
def zfill(self, width: int) -> str:
|
|
"""
|
|
str.zfill(width) -> string
|
|
|
|
Pad a numeric string str with zeros on the left, to fill a field
|
|
of the specified width. The string str is never truncated.
|
|
"""
|
|
if len(self) >= width:
|
|
return self
|
|
|
|
plus = byte(43) # +
|
|
minus = byte(45) # -
|
|
zero = byte(48) # 0
|
|
|
|
zf = self.rjust(width, '0')
|
|
fill = width - len(self)
|
|
p = zf.ptr
|
|
|
|
if len(self) > 0 and (p[fill] == plus or p[fill] == minus):
|
|
p[0] = p[fill]
|
|
p[fill] = zero
|
|
|
|
return zf
|
|
|
|
def count(self, sub: str, start: int = 0, end: Optional[int] = None) -> int:
|
|
"""
|
|
str.count(sub[, start[, end]]) -> int
|
|
|
|
Return the number of occurrences of subsection sub in
|
|
bytes str[start:end]. Optional arguments start and end are interpreted
|
|
as in slice notation.
|
|
"""
|
|
end: int = end if end is not None else len(self)
|
|
start, end = self._correct_indices(start, end)
|
|
if end - start < len(sub):
|
|
return 0
|
|
return algorithms.count(self._slice(start, end), sub)
|
|
|
|
def find(self, sub: str, start: int = 0, end: Optional[int] = None) -> int:
|
|
"""
|
|
str.find(sub [,start [,end]]) -> int
|
|
|
|
Return the lowest index in str where substring sub is found,
|
|
such that sub is contained within str[start:end]. Optional
|
|
arguments start and end are interpreted as in slice notation.
|
|
|
|
Return -1 on failure.
|
|
"""
|
|
end: int = end if end is not None else len(self)
|
|
start, end = self._correct_indices(start, end)
|
|
if end - start < len(sub):
|
|
return -1
|
|
pos = algorithms.find(self._slice(start, end), sub)
|
|
return pos if pos < 0 else pos + start
|
|
|
|
def rfind(self, sub: str, start: int = 0, end: Optional[int] = None) -> int:
|
|
"""
|
|
str.rfind(sub [,start [,end]]) -> int
|
|
|
|
Return the highest index in str where substring sub is found,
|
|
such that sub is contained within str[start:end]. Optional
|
|
arguments start and end are interpreted as in slice notation.
|
|
|
|
Return -1 on failure.
|
|
"""
|
|
end: int = end if end is not None else len(self)
|
|
start, end = self._correct_indices(start, end)
|
|
if end - start < len(sub):
|
|
return -1
|
|
pos = algorithms.rfind(self._slice(start, end), sub)
|
|
return pos if pos < 0 else pos + start
|
|
|
|
def isidentifier(self) -> bool:
|
|
"""
|
|
str.isidentifier() -> bool
|
|
|
|
Return True if the string is a valid identifier, False otherwise.
|
|
Unlike Python, isidentifier() deals with just ASCII characters.
|
|
"""
|
|
# empty string
|
|
if len(self) == 0:
|
|
return False
|
|
|
|
# is not a letter or _
|
|
first = self._at(0)
|
|
if not first.isalpha():
|
|
if first != "_":
|
|
return False
|
|
|
|
if first.isalpha() or first == "_":
|
|
for i in range(1, len(self)):
|
|
ith = self._at(i)
|
|
if not ith.isalpha():
|
|
if not ith.isdecimal():
|
|
if ith != "_":
|
|
return False
|
|
|
|
return True
|
|
|
|
def isprintable(self) -> bool:
|
|
"""
|
|
str.isprintable() -> bool
|
|
|
|
Return True if the string is printable or empty, False otherwise.
|
|
Unlike Python, isprintable() deals with just ASCII characters.
|
|
"""
|
|
for i in range(len(self)):
|
|
if not (31 < int(self.ptr[i]) < 128):
|
|
return False
|
|
return True
|
|
|
|
def _has_char(self, chars: str) -> bool:
|
|
s = self._at(0)
|
|
if chars:
|
|
for c in chars:
|
|
if s == c:
|
|
return True
|
|
return False
|
|
else:
|
|
return s.isspace()
|
|
|
|
def lstrip(self, chars: str = "") -> str:
|
|
"""
|
|
str.lstrip([chars]) -> string
|
|
|
|
Return a copy of the string str with leading whitespace removed.
|
|
If chars is given, remove characters in chars instead.
|
|
Unlike Python, lstrip() deals with just ASCII characters.
|
|
"""
|
|
i = 0
|
|
while i < len(self) and self._at(i)._has_char(chars):
|
|
i += 1
|
|
return self._slice(i, len(self))
|
|
|
|
def rstrip(self, chars: str = "") -> str:
|
|
"""
|
|
str.rstrip([chars]) -> string
|
|
|
|
Return a copy of the string str with trailing whitespace removed.
|
|
If chars is given, remove characters in chars instead.
|
|
Unlike Python, rstrip() deals with just ASCII characters.
|
|
"""
|
|
i = len(self) - 1
|
|
while i >= 0 and self._at(i)._has_char(chars):
|
|
i -= 1
|
|
return self._slice(0, i + 1)
|
|
|
|
def strip(self, chars: str = "") -> str:
|
|
"""
|
|
str.strip([chars]) -> string
|
|
|
|
Return a copy of the string str with leading and trailing
|
|
whitespace removed.
|
|
If chars is given, remove characters in chars instead.
|
|
Unlike Python, strip() deals with just ASCII characters.
|
|
"""
|
|
return self.lstrip(chars).rstrip(chars)
|
|
|
|
def partition(self, sep: str) -> Tuple[str, str, str]:
|
|
"""
|
|
Search for the separator sep in str, and return the part before it,
|
|
the separator itself, and the part after it. If the separator is not
|
|
found, return str and two empty strings.
|
|
"""
|
|
if not sep:
|
|
raise ValueError("empty separator")
|
|
pos = algorithms.find(self, sep)
|
|
if pos < 0:
|
|
return self, "", ""
|
|
return self._slice(0, pos), sep, self._slice(pos + len(sep), len(self))
|
|
|
|
def rpartition(self, sep: str) -> Tuple[str, str, str]: # XXX
|
|
"""
|
|
Search for the separator sep in str, starting at the end of str, and return
|
|
the part before it, the separator itself, and the part after it. If the
|
|
separator is not found, return two empty strings and str.
|
|
"""
|
|
if not sep:
|
|
raise ValueError("empty separator")
|
|
pos = algorithms.rfind(self, sep)
|
|
if pos < 0:
|
|
return "", "", self
|
|
return self._slice(0, pos), sep, self._slice(pos + len(sep), len(self))
|
|
|
|
def split(self, sep: Optional[str] = None, maxsplit: int = -1) -> List[str]:
|
|
"""
|
|
str.split([sep [,maxsplit]]) -> list of strings
|
|
|
|
Return a list of the words in the string str, using sep as the
|
|
delimiter string. If maxsplit is given, at most maxsplit
|
|
splits are done. If sep is not specified, any
|
|
whitespace string is a separator and empty strings are removed
|
|
from the result.
|
|
"""
|
|
if sep is None:
|
|
return self._split_whitespace(
|
|
maxsplit if maxsplit >= 0 else _MAX
|
|
)
|
|
sep: str = sep
|
|
|
|
if len(sep) == 0:
|
|
raise ValueError("empty separator")
|
|
|
|
# special case for length-1 pattern
|
|
if len(sep) == 1:
|
|
return self._split_char(sep.ptr[0], maxsplit if maxsplit >= 0 else _MAX)
|
|
|
|
MAX_PREALLOC = 12
|
|
maxsplit = maxsplit if maxsplit >= 0 else _MAX
|
|
prealloc_size = MAX_PREALLOC if maxsplit >= MAX_PREALLOC else maxsplit + 1
|
|
v = List[str](capacity=prealloc_size)
|
|
i = 0
|
|
j = 0
|
|
n = len(self)
|
|
|
|
while maxsplit > 0:
|
|
maxsplit -= 1
|
|
pos = algorithms.find(self._slice(i, n), sep)
|
|
if pos < 0:
|
|
break
|
|
j = i + pos
|
|
v.append(self._slice(i, j))
|
|
i = j + len(sep)
|
|
|
|
v.append(self._slice(i, n))
|
|
return v
|
|
|
|
def rsplit(self, sep: Optional[str] = None, maxsplit: int = -1) -> List[str]:
|
|
"""
|
|
str.rsplit([sep [,maxsplit]]) -> list of strings
|
|
|
|
Return a list of the words in the string str, using sep as the
|
|
delimiter string, starting at the end of the string and working
|
|
to the front. If maxsplit is given, at most maxsplit splits are
|
|
done. If sep is not specified, any whitespace string
|
|
is a separator.
|
|
"""
|
|
if sep is None:
|
|
return self._rsplit_whitespace(
|
|
maxsplit if maxsplit >= 0 else _MAX
|
|
)
|
|
sep: str = sep
|
|
|
|
if len(sep) == 0:
|
|
raise ValueError("empty separator")
|
|
|
|
# special case for length-1 pattern
|
|
if len(sep) == 1:
|
|
return self._rsplit_char(sep.ptr[0], maxsplit if maxsplit >= 0 else _MAX)
|
|
|
|
MAX_PREALLOC = 12
|
|
maxsplit = maxsplit if maxsplit >= 0 else _MAX
|
|
prealloc_size = MAX_PREALLOC if maxsplit >= MAX_PREALLOC else maxsplit + 1
|
|
v = List[str](capacity=prealloc_size)
|
|
i = 0
|
|
j = len(self)
|
|
n = j
|
|
|
|
while maxsplit > 0:
|
|
maxsplit -= 1
|
|
pos = algorithms.rfind(self._slice(0, j), sep)
|
|
if pos < 0:
|
|
break
|
|
v.append(self._slice(pos + len(sep), j))
|
|
j = pos
|
|
|
|
v.append(self._slice(0, j))
|
|
v.reverse()
|
|
return v
|
|
|
|
def splitlines(self, keepends: bool = False) -> List[str]:
|
|
"""
|
|
str.splitlines([keepends]) -> list of strings
|
|
|
|
Return a list of the lines in str, breaking at line boundaries.
|
|
Line breaks are not included in the resulting list unless keepends
|
|
is given and true.
|
|
"""
|
|
v = []
|
|
i = 0
|
|
j = 0
|
|
n = len(self)
|
|
|
|
break_r = byte(13) # \r
|
|
break_n = byte(10) # \n
|
|
|
|
while i < n:
|
|
while i < n and not (self.ptr[i] == break_r or self.ptr[i] == break_n):
|
|
i += 1
|
|
|
|
eol = i
|
|
if i < n:
|
|
if self.ptr[i] == break_r and i + 1 < n and self.ptr[i + 1] == break_n:
|
|
i += 2
|
|
else:
|
|
i += 1
|
|
if keepends:
|
|
eol = i
|
|
|
|
if j == 0 and eol == n:
|
|
v.append(self)
|
|
break
|
|
|
|
v.append(self._slice(j, eol))
|
|
j = i
|
|
|
|
return v
|
|
|
|
def startswith(
|
|
self, prefix: str, start: int = 0, end: Optional[int] = None
|
|
) -> bool:
|
|
"""
|
|
str.startswith(prefix[, start[, end]]) -> bool
|
|
|
|
Return True if str starts with the specified prefix, False otherwise.
|
|
With optional start, test str beginning at that position.
|
|
With optional end, stop comparing str at that position.
|
|
"""
|
|
end: int = end if end is not None else len(self)
|
|
if end < 0:
|
|
end += len(self)
|
|
elif start < 0:
|
|
start += len(self)
|
|
|
|
# length prefix is longer than range of string being compared to
|
|
if start + len(prefix) > len(self):
|
|
return False
|
|
|
|
# length of prefix is longer than range of string[start:end]
|
|
if end - start < len(prefix):
|
|
return False
|
|
|
|
# prefix is an empty string
|
|
if not prefix:
|
|
return True
|
|
|
|
return prefix == self._slice(start, start + len(prefix))
|
|
|
|
def endswith(self, suffix: str, start: int = 0, end: Optional[int] = None) -> bool:
|
|
"""
|
|
str.endswith(prefix[, start[, end]]) -> bool
|
|
|
|
Return True if str ends with the specified suffix, False otherwise.
|
|
With optional start, test str beginning at that position.
|
|
With optional end, stop comparing str at that position.
|
|
"""
|
|
end: int = end if end is not None else len(self)
|
|
if end < 0:
|
|
end += len(self)
|
|
elif start < 0:
|
|
start += len(self)
|
|
if end > len(self):
|
|
end = len(self)
|
|
|
|
# length prefix is longer than range of string being compared to
|
|
if end - start < len(suffix) or start > len(self):
|
|
return False
|
|
|
|
if end - len(suffix) > start:
|
|
start = end - len(suffix)
|
|
|
|
# length of prefix is longer than range of string[start:end]
|
|
if end - start < len(suffix):
|
|
return False
|
|
|
|
# prefix is an empty string
|
|
if not suffix:
|
|
return True
|
|
|
|
return suffix == self._slice(start, start + len(suffix))
|
|
|
|
def index(self, sub: str, start: int = 0, end: Optional[int] = None) -> int:
|
|
"""
|
|
str.index(sub [,start [,end]]) -> int
|
|
|
|
Like str.find() but raise ValueError when the substring is not found.
|
|
"""
|
|
i = self.find(sub, start, end)
|
|
if i == -1:
|
|
raise ValueError("substring not found")
|
|
else:
|
|
return i
|
|
|
|
def rindex(self, sub: str, start: int = 0, end: Optional[int] = None) -> int:
|
|
"""
|
|
str.index(sub [,start [,end]]) -> int
|
|
|
|
Like str.find() but raise ValueError when the substring is not found.
|
|
"""
|
|
i = self.rfind(sub, start, end)
|
|
if i == -1:
|
|
raise ValueError("substring not found")
|
|
else:
|
|
return i
|
|
|
|
def replace(self, old: str, new: str, maxcount: int = -1) -> str:
|
|
"""
|
|
str.replace(old, new[, count]) -> string
|
|
|
|
Return a copy of string str with all occurrences of substring
|
|
old replaced by new. If the optional argument maxcount is
|
|
given, only the first maxcount occurrences are replaced.
|
|
"""
|
|
return self._replace(old, new, maxcount)
|
|
|
|
def expandtabs(self, tabsize: int = 8) -> str:
|
|
"""
|
|
str.expandtabs([tabsize]) -> string
|
|
|
|
Return a copy of str where all tab characters are expanded using spaces.
|
|
If tabsize is not given, a tab size of 8 characters is assumed.
|
|
"""
|
|
i = 0
|
|
j = 0
|
|
p = self.ptr
|
|
e = p + len(self)
|
|
|
|
break_r = byte(13) # \r
|
|
break_n = byte(10) # \n
|
|
tab = byte(9) # \t
|
|
space = byte(32) # ' '
|
|
|
|
def overflow():
|
|
raise OverflowError("result too long")
|
|
|
|
while p < e:
|
|
if p[0] == tab:
|
|
if tabsize > 0:
|
|
incr = tabsize - (j % tabsize)
|
|
if j > _MAX - incr:
|
|
overflow()
|
|
j += incr
|
|
else:
|
|
if j > _MAX - 1:
|
|
overflow()
|
|
j += 1
|
|
if p[0] == break_n or p[0] == break_r:
|
|
if i > _MAX - j:
|
|
overflow()
|
|
i += j
|
|
j = 0
|
|
p += 1
|
|
|
|
if i > _MAX - j:
|
|
overflow()
|
|
|
|
u_len = i + j
|
|
u = Ptr[byte](u_len)
|
|
j = 0
|
|
q = u
|
|
p = self.ptr
|
|
|
|
while p < e:
|
|
if p[0] == tab:
|
|
if tabsize > 0:
|
|
i = tabsize - (j % tabsize)
|
|
j += i
|
|
while True:
|
|
k = i
|
|
i -= 1
|
|
if k == 0:
|
|
break
|
|
q[0] = space
|
|
q += 1
|
|
else:
|
|
j += 1
|
|
q[0] = p[0]
|
|
q += 1
|
|
if p[0] == break_n or p[0] == break_r:
|
|
j = 0
|
|
p += 1
|
|
|
|
return str(u, u_len)
|
|
|
|
def translate(self, map) -> str:
|
|
"""
|
|
Return a copy with each character mapped by the given translation table.
|
|
"""
|
|
n = len(self)
|
|
m = 0
|
|
|
|
for i in range(n):
|
|
key = int(self.ptr[i])
|
|
if key in map:
|
|
val = map[key]
|
|
if val is not None:
|
|
m += len(val)
|
|
else:
|
|
m += 1
|
|
|
|
p = Ptr[byte](m)
|
|
q = p
|
|
|
|
for i in range(n):
|
|
key = int(self.ptr[i])
|
|
if key in map:
|
|
val = map[key]
|
|
if val is not None:
|
|
str.memcpy(q, val.ptr, len(val))
|
|
q += len(val)
|
|
else:
|
|
q[0] = self.ptr[i]
|
|
q += 1
|
|
|
|
return str(p, m)
|
|
|
|
|
|
# Internal helpers
|
|
|
|
def _correct_indices(self, start: int, end: int) -> Tuple[int, int]:
|
|
n = len(self)
|
|
|
|
if end > n:
|
|
end = n
|
|
elif end < 0:
|
|
end += n
|
|
if end < 0:
|
|
end = 0
|
|
|
|
if start < 0:
|
|
start += n
|
|
if start < 0:
|
|
start = 0
|
|
|
|
return (start, end)
|
|
|
|
def _split_whitespace(self, maxcount: int) -> List[str]:
|
|
PREALLOC_MAX = 12
|
|
l = List[str](PREALLOC_MAX if maxcount >= PREALLOC_MAX else maxcount + 1)
|
|
|
|
str_len = len(self)
|
|
i = 0
|
|
j = 0
|
|
while maxcount > 0:
|
|
maxcount -= 1
|
|
while i < str_len and str._isspace(self.ptr[i]):
|
|
i += 1
|
|
if i == str_len:
|
|
break
|
|
j = i
|
|
i += 1
|
|
while i < str_len and not str._isspace(self.ptr[i]):
|
|
i += 1
|
|
l.append(self._slice(j, i))
|
|
|
|
if i < str_len:
|
|
while i < str_len and str._isspace(self.ptr[i]):
|
|
i += 1
|
|
if i != str_len:
|
|
l.append(self._slice(i, str_len))
|
|
|
|
return l
|
|
|
|
def _rsplit_whitespace(self, maxcount: int) -> List[str]:
|
|
PREALLOC_MAX = 12
|
|
l = List[str](PREALLOC_MAX if maxcount >= PREALLOC_MAX else maxcount + 1)
|
|
|
|
str_len = len(self)
|
|
i = str_len - 1
|
|
j = str_len - 1
|
|
while maxcount > 0:
|
|
maxcount -= 1
|
|
while i >= 0 and str._isspace(self.ptr[i]):
|
|
i -= 1
|
|
if i < 0:
|
|
break
|
|
j = i
|
|
i -= 1
|
|
while i >= 0 and not str._isspace(self.ptr[i]):
|
|
i -= 1
|
|
l.append(self._slice(i + 1, j + 1))
|
|
|
|
if i >= 0:
|
|
while i >= 0 and str._isspace(self.ptr[i]):
|
|
i -= 1
|
|
if i >= 0:
|
|
l.append(self._slice(0, i + 1))
|
|
|
|
l.reverse()
|
|
return l
|
|
|
|
def _split_char(self, char: byte, maxcount: int) -> List[str]:
|
|
PREALLOC_MAX = 12
|
|
l = List[str](PREALLOC_MAX if maxcount >= PREALLOC_MAX else maxcount + 1)
|
|
|
|
str_len = len(self)
|
|
i = 0
|
|
j = 0
|
|
|
|
while i < str_len and maxcount > 0:
|
|
if self.ptr[i] == char:
|
|
l.append(self._slice(j, i))
|
|
j = i + 1
|
|
maxcount -= 1
|
|
i += 1
|
|
|
|
l.append(self._slice(j, str_len))
|
|
return l
|
|
|
|
def _rsplit_char(self, char: byte, maxcount: int) -> List[str]:
|
|
PREALLOC_MAX = 12
|
|
l = List[str](PREALLOC_MAX if maxcount >= PREALLOC_MAX else maxcount + 1)
|
|
|
|
str_len = len(self)
|
|
i = str_len - 1
|
|
j = str_len - 1
|
|
|
|
while i >= 0 and maxcount > 0:
|
|
if self.ptr[i] == char:
|
|
l.append(self._slice(i + 1, j + 1))
|
|
j = i - 1
|
|
maxcount -= 1
|
|
i -= 1
|
|
|
|
l.append(self._slice(0, j + 1))
|
|
l.reverse()
|
|
return l
|
|
|
|
def _findchar(self, c: byte):
|
|
return _C.memchr(self.ptr, i32(int(c)), len(self))
|
|
|
|
def _countchar(self, c: byte, maxcount: int):
|
|
count = 0
|
|
start = self.ptr
|
|
end = start + len(self)
|
|
|
|
while True:
|
|
start = str(start, end - start)._findchar(c)
|
|
if not start:
|
|
break
|
|
count += 1
|
|
if count >= maxcount:
|
|
break
|
|
start += 1
|
|
return count
|
|
|
|
def _replace_interleave(self, to: str, maxcount: int):
|
|
self_s = self.ptr
|
|
self_len = len(self)
|
|
to_len = len(to)
|
|
to_s = to.ptr
|
|
count = 0
|
|
i = 0
|
|
|
|
if maxcount <= self_len:
|
|
count = maxcount
|
|
else:
|
|
count = self_len + 1
|
|
|
|
# assert count > 0
|
|
if to_len > (_MAX - self_len) // count:
|
|
raise OverflowError("replace bytes is too long")
|
|
|
|
result_len = count * to_len + self_len
|
|
result_s = Ptr[byte](result_len)
|
|
result_s0 = result_s
|
|
|
|
if to_len > 1:
|
|
str.memcpy(result_s, to_s, to_len)
|
|
result_s += to_len
|
|
count -= 1
|
|
|
|
while i < count:
|
|
result_s[0] = self_s[0]
|
|
result_s += 1
|
|
self_s += 1
|
|
str.memcpy(result_s, to_s, to_len)
|
|
result_s += to_len
|
|
i += 1
|
|
else:
|
|
result_s[0] = to_s[0]
|
|
result_s += to_len
|
|
count -= 1
|
|
|
|
while i < count:
|
|
result_s[0] = self_s[0]
|
|
result_s += 1
|
|
self_s += 1
|
|
result_s[0] = to_s[0]
|
|
result_s += to_len
|
|
i += 1
|
|
|
|
str.memcpy(result_s, self_s, self_len - i)
|
|
return str(result_s0, result_len)
|
|
|
|
def _replace_delete_single_character(self, from_c: byte, maxcount: int):
|
|
self_len = len(self)
|
|
self_s = self.ptr
|
|
|
|
count = self._countchar(from_c, maxcount)
|
|
if count == 0:
|
|
return self
|
|
|
|
result_len = self_len - count
|
|
# assert result_len >= 0
|
|
result_s = Ptr[byte](result_len)
|
|
result_s0 = result_s
|
|
|
|
start = self_s
|
|
end = self_s + self_len
|
|
while count > 0:
|
|
count -= 1
|
|
nxt = str(start, end - start)._findchar(from_c)
|
|
if not nxt:
|
|
break
|
|
str.memcpy(result_s, start, nxt - start)
|
|
result_s += nxt - start
|
|
start = nxt + 1
|
|
|
|
str.memcpy(result_s, start, end - start)
|
|
return str(result_s0, result_len)
|
|
|
|
def _replace_delete_substring(self, from_s: str, maxcount: int):
|
|
self_len = len(self)
|
|
self_s = self.ptr
|
|
from_len = len(from_s)
|
|
|
|
count = algorithms.count_with_max(self, from_s, maxcount)
|
|
if count == 0:
|
|
return self
|
|
|
|
result_len = self_len - (count * from_len)
|
|
# assert result_len >= 0
|
|
result_s = Ptr[byte](result_len)
|
|
result_s0 = result_s
|
|
|
|
start = self_s
|
|
end = self_s + self_len
|
|
while count > 0:
|
|
count -= 1
|
|
offset = algorithms.find(str(start, end - start), from_s)
|
|
if offset == -1:
|
|
break
|
|
nxt = start + offset
|
|
str.memcpy(result_s, start, nxt - start)
|
|
result_s += nxt - start
|
|
start = nxt + from_len
|
|
|
|
str.memcpy(result_s, start, end - start)
|
|
return str(result_s0, result_len)
|
|
|
|
def _replace_single_character_in_place(self, from_c: byte, to_c: byte, maxcount: int):
|
|
self_s = self.ptr
|
|
self_len = len(self)
|
|
|
|
nxt = self._findchar(from_c)
|
|
if not nxt:
|
|
return self
|
|
|
|
result_s = Ptr[byte](self_len)
|
|
str.memcpy(result_s, self_s, self_len)
|
|
|
|
start = result_s + (nxt - self_s)
|
|
start[0] = to_c
|
|
start += 1
|
|
end = result_s + self_len
|
|
maxcount -= 1
|
|
|
|
while maxcount > 0:
|
|
maxcount -= 1
|
|
nxt = str(start, end - start)._findchar(from_c)
|
|
if not nxt:
|
|
break
|
|
nxt[0] = to_c
|
|
start = nxt + 1
|
|
|
|
return str(result_s, self_len)
|
|
|
|
def _replace_substring_in_place(self, from_s: str, to: str, maxcount: int):
|
|
self_s = self.ptr
|
|
self_len = len(self)
|
|
from_len = len(from_s)
|
|
to_s = to.ptr
|
|
|
|
offset = algorithms.find(self, from_s)
|
|
if offset == -1:
|
|
return self
|
|
|
|
result_s = Ptr[byte](self_len)
|
|
str.memcpy(result_s, self_s, self_len)
|
|
|
|
start = result_s + offset
|
|
str.memcpy(start, to_s, from_len)
|
|
start += from_len
|
|
end = result_s + self_len
|
|
maxcount -= 1
|
|
|
|
while maxcount > 0:
|
|
maxcount -= 1
|
|
offset = algorithms.find(str(start, end - start), from_s)
|
|
if offset == -1:
|
|
break
|
|
str.memcpy(start + offset, to_s, from_len)
|
|
start += offset + from_len
|
|
|
|
return str(result_s, self_len)
|
|
|
|
def _replace_single_character(self, from_c: byte, to_s: str, maxcount: int):
|
|
self_s = self.ptr
|
|
self_len = len(self)
|
|
to_len = len(to_s)
|
|
|
|
count = self._countchar(from_c, maxcount)
|
|
if count == 0:
|
|
return self
|
|
|
|
# assert count > 0
|
|
if to_len - 1 > (_MAX - self_len) // count:
|
|
raise OverflowError("replace bytes is too long")
|
|
|
|
result_len = self_len + count * (to_len - 1)
|
|
result_s = Ptr[byte](result_len)
|
|
result_s0 = result_s
|
|
|
|
start = self_s
|
|
end = self_s + self_len
|
|
while count > 0:
|
|
count -= 1
|
|
nxt = str(start, end - start)._findchar(from_c)
|
|
if not nxt:
|
|
break
|
|
|
|
if nxt == start:
|
|
str.memcpy(result_s, to_s.ptr, to_len)
|
|
result_s += to_len
|
|
start += 1
|
|
else:
|
|
str.memcpy(result_s, start, nxt - start)
|
|
result_s += (nxt - start)
|
|
str.memcpy(result_s, to_s.ptr, to_len)
|
|
result_s += to_len
|
|
start = nxt + 1
|
|
|
|
str.memcpy(result_s, start, end - start)
|
|
return str(result_s0, result_len)
|
|
|
|
def _replace_substring(self, from_s: str, to_s: str, maxcount: int):
|
|
self_s = self.ptr
|
|
self_len = len(self)
|
|
from_len = len(from_s)
|
|
to_len = len(to_s)
|
|
|
|
count = algorithms.count_with_max(self, from_s, maxcount)
|
|
if count == 0:
|
|
return self
|
|
|
|
# assert count > 0
|
|
if to_len - from_len > (_MAX - self_len) // count:
|
|
raise OverflowError("replace bytes is too long")
|
|
|
|
result_len = self_len + count * (to_len - from_len)
|
|
result_s = Ptr[byte](result_len)
|
|
result_s0 = result_s
|
|
|
|
start = self_s
|
|
end = self_s + self_len
|
|
while count > 0:
|
|
count -= 1
|
|
offset = algorithms.find(str(start, end - start), from_s)
|
|
if offset == -1:
|
|
break
|
|
|
|
nxt = start + offset
|
|
if nxt == start:
|
|
str.memcpy(result_s, to_s.ptr, to_len)
|
|
result_s += to_len
|
|
start += from_len
|
|
else:
|
|
str.memcpy(result_s, start, nxt - start)
|
|
result_s += (nxt - start)
|
|
str.memcpy(result_s, to_s.ptr, to_len)
|
|
result_s += to_len
|
|
start = nxt + from_len
|
|
|
|
str.memcpy(result_s, start, end - start)
|
|
return str(result_s0, result_len)
|
|
|
|
def _replace(self, from_s: str, to_s: str, maxcount: int):
|
|
self_len = len(self)
|
|
from_len = len(from_s)
|
|
to_len = len(to_s)
|
|
|
|
if self_len < from_len:
|
|
return self
|
|
|
|
if maxcount < 0:
|
|
maxcount = _MAX
|
|
elif maxcount == 0:
|
|
return self
|
|
|
|
if from_len == 0:
|
|
if to_len == 0:
|
|
return self
|
|
return self._replace_interleave(to_s, maxcount)
|
|
|
|
if to_len == 0:
|
|
if from_len == 1:
|
|
return self._replace_delete_single_character(from_s.ptr[0], maxcount)
|
|
return self._replace_delete_substring(from_s, maxcount)
|
|
|
|
if from_len == to_len:
|
|
if from_len == 1:
|
|
return self._replace_single_character_in_place(from_s.ptr[0], to_s.ptr[0], maxcount)
|
|
return self._replace_substring_in_place(from_s, to_s, maxcount)
|
|
|
|
if from_len == 1:
|
|
return self._replace_single_character(from_s.ptr[0], to_s, maxcount)
|
|
else:
|
|
return self._replace_substring(from_s, to_s, maxcount)
|