# Copyright (C) 2022-2023 Exaloop Inc. _MAX: Static[int] = 0x7FFFFFFFFFFFFFFF @extend class str: # Magic methods def __hash__(self) -> int: h = 0 p, n = self.ptr, self.len i = 0 while i < n: h = 31 * h + int(p[i]) i += 1 return h def __lt__(self, other: str) -> bool: return self._cmp(other) < 0 def __le__(self, other: str) -> bool: return self._cmp(other) <= 0 def __gt__(self, other: str) -> bool: return self._cmp(other) > 0 def __ge__(self, other: str) -> bool: return self._cmp(other) >= 0 def __repr__(self) -> str: v = _strbuf(len(self) + 2) q, qe = "'", "\\'" found_single = False found_double = False for c in self: if c == "'": found_single = True elif c == '"': found_double = True if found_single and not found_double: q, qe = '"', '\\"' v.append(q) for c in self: d = c if c == "\n": d = "\\n" elif c == "\r": d = "\\r" elif c == "\t": d = "\\t" elif c == "\\": d = "\\\\" elif c == q: d = qe else: b = int(c.ptr[0]) if not (32 <= b <= 126): h = "0123456789abcdef" v.append("\\x") v.append(h[b // 16]) v.append(h[b % 16]) d = "" if d: v.append(d) v.append(q) return v.__str__() def __getitem__(self, idx: int) -> str: if idx < 0: idx += len(self) if not (0 <= idx < len(self)): raise IndexError("string index out of range") return str(self.ptr + idx, 1) def __getitem__(self, s: Slice) -> str: if s.start is None and s.stop is None and s.step is None: return self.__copy__() elif s.step is None: start, stop, step, length = s.adjust_indices(len(self)) return str(self.ptr + start, length) else: start, stop, step, length = s.adjust_indices(len(self)) return self._make_from_range(start, stop, step, length) def _make_from_range(self, start: int, stop: int, step: int, length: int) -> str: p = Ptr[byte](length) j = 0 for i in range(start, stop, step): p[j] = self.ptr[i] j += 1 return str(p, length) def __iter__(self) -> Generator[str]: i = 0 n = len(self) while i < n: yield str(self.ptr + i, 1) i += 1 def __reversed__(self) -> Generator[str]: i = len(self) - 1 while i >= 0: yield str(self.ptr + i, 1) i -= 1 def __mul__(self, x: int) -> str: total = x * self.len p = Ptr[byte](total) n = 0 for _ in range(x): str.memcpy(p + n, self.ptr, self.len) n += self.len return str(p, total) def _cmp(self, other: str) -> int: n = min(self.len, other.len) i = 0 while i < n: c1 = self.ptr[i] c2 = other.ptr[i] if c1 != c2: return int(c1) - int(c2) i += 1 return self.len - other.len import algorithms.strings as algorithms @extend class str: def __contains__(self, pattern: str) -> bool: return self.find(pattern) >= 0 # Helper methods def _isdigit(a: byte) -> bool: return _C.isdigit(i32(int(a))) != i32(0) def _isspace(a: byte) -> bool: return _C.isspace(i32(int(a))) != i32(0) def _isupper(a: byte) -> bool: return _C.isupper(i32(int(a))) != i32(0) def _islower(a: byte) -> bool: return _C.islower(i32(int(a))) != i32(0) def _isalpha(a: byte) -> bool: return _C.isalpha(i32(int(a))) != i32(0) def _isalnum(a: byte) -> bool: return _C.isalnum(i32(int(a))) != i32(0) def _toupper(a: byte) -> byte: return byte(int(_C.toupper(i32(int(a))))) def _tolower(a: byte) -> byte: return byte(int(_C.tolower(i32(int(a))))) def _slice(self, i: int, j: int) -> str: return str(self.ptr + i, j - i) def _at(self, i: int) -> str: return str(self.ptr + i, 1) def join(self, l: Generator[str]) -> str: buf = _strbuf() if len(self) == 0: for a in l: buf.append(a) else: first = True for a in l: if first: first = False else: buf.append(self) buf.append(a) return buf.__str__() def join(self, l: List[str]) -> str: if len(l) == 0: return "" if len(l) == 1: return l[0] if len(self) == 0: return str.cat(l) # compute length n = 0 i = 0 while i < len(l): n += len(l[i]) if i < len(l) - 1: n += len(self) i += 1 # copy to new buffer p = Ptr[byte](n) r = 0 i = 0 while i < len(l): str.memcpy(p + r, l[i].ptr, len(l[i])) r += len(l[i]) if i < len(l) - 1: str.memcpy(p + r, self.ptr, len(self)) r += len(self) i += 1 return str(p, n) def isdigit(self) -> bool: """ str.isdigit() -> bool Return True if all characters in str are digits and there is at least one character in str, False otherwise. """ if len(self) == 0: return False for i in range(len(self)): if not str._isdigit(self.ptr[i]): return False return True def islower(self) -> bool: """ str.islower() -> bool Return True if all cased characters in str are lowercase and there is at least one cased character in str, False otherwise. """ cased = False # For empty strings if len(self) == 0: return False # For single character strings if len(self) == 1: return str._islower(self.ptr[0]) for i in range(len(self)): if str._isupper(self.ptr[i]): return False elif not cased and str._islower(self.ptr[i]): cased = True return cased def isupper(self) -> bool: """ str.isupper() -> bool Return True if all cased characters in str are uppercase and there is at least one cased character in str, False otherwise. """ cased = False # For empty strings if len(self) == 0: return False # For single character strings if len(self) == 1: return str._isupper(self.ptr[0]) for i in range(len(self)): if str._islower(self.ptr[i]): return False elif not cased and str._isupper(self.ptr[i]): cased = True return cased def isalnum(self) -> bool: """ str.isalnum() -> bool Return True if all characters in str are alphanumeric and there is at least one character in str, False otherwise. """ if len(self) == 0: return False for i in range(len(self)): if not str._isalnum(self.ptr[i]): return False return True def isalpha(self) -> bool: """ str.isalpha() -> bool Return True if all characters in str are alphabetic and there is at least one character in str, False otherwise. """ if len(self) == 0: return False for i in range(len(self)): if not str._isalpha(self.ptr[i]): return False return True def isspace(self) -> bool: """ str.isspace() -> bool Return True if all characters in str are whitespace and there is at least one character in str, False otherwise. """ if len(self) == 0: return False for i in range(len(self)): if not str._isspace(self.ptr[i]): return False return True def istitle(self) -> bool: """ str.istitle() -> bool Return True if str is a titlecased string and there is at least one character in str, i.e. uppercase characters may only follow uncased characters and lowercase characters only cased ones. Return False otherwise. """ # For empty strings if len(self) == 0: return False # For single character strings if len(self) == 1: return str._isupper(self.ptr[0]) cased = False prev_is_cased = False for i in range(len(self)): if str._isupper(self.ptr[i]): if prev_is_cased: return False prev_is_cased = True cased = True elif str._islower(self.ptr[i]): if not prev_is_cased: return False prev_is_cased = True cased = True else: prev_is_cased = False return cased def capitalize(self) -> str: """ str.capitalize() -> copy of str Return a copy of str with only its first character capitalized (ASCII) and the rest lower-cased. """ n = len(self) if n > 0: p = Ptr[byte](n) p[0] = str._toupper(self.ptr[0]) for i in range(1, n): p[i] = str._tolower(self.ptr[i]) return str(p, n) return "" def isdecimal(self) -> bool: """ str.isdecimal() -> bool Return True if str is a decimal string, False otherwise. str is a decimal string if all characters in str are decimal and there is at least one character in str. """ if len(self) == 0: return False for i in range(len(self)): # test ascii values 48-57 == 0-9 if not (48 <= int(self.ptr[i]) <= 57): return False return True def lower(self) -> str: """ str.lower() -> copy of str Return a copy of str with all ASCII characters converted to lowercase. """ # Empty string n = len(self) if n == 0: return "" p = Ptr[byte](n) for i in range(n): p[i] = str._tolower(self.ptr[i]) return str(p, n) def upper(self) -> str: """ str.upper() -> copy of str Return a copy of str with all ASCII characters converted to uppercase. """ # Empty string n = len(self) if n == 0: return "" p = Ptr[byte](n) for i in range(n): p[i] = str._toupper(self.ptr[i]) return str(p, n) def isascii(self) -> bool: """ str.isascii() -> bool Return True if str is empty or all characters in str are ASCII, False otherwise. """ for i in range(len(self)): if int(self.ptr[i]) >= 128: return False return True def casefold(self) -> str: """ str.casefold() -> copy of str Return a version of the string suitable for caseless comparisons. Unlike Python, casefold() deals with just ASCII characters. """ return self.lower() def swapcase(self) -> str: """ str.swapcase() -> copy of str Return a copy of str with uppercase ASCII characters converted to lowercase ASCII and vice versa. """ # Empty string n = len(self) if n == 0: return "" p = Ptr[byte](n) for i in range(n): if str._islower(self.ptr[i]): p[i] = str._toupper(self.ptr[i]) elif str._isupper(self.ptr[i]): p[i] = str._tolower(self.ptr[i]) else: p[i] = self.ptr[i] return str(p, n) def title(self) -> str: """ str.title() -> copy of str Return a titlecased version of str, i.e. ASCII words start with uppercase characters, all remaining cased characters have lowercase. """ prev_is_cased = False n = len(self) if n == 0: return "" p = Ptr[byte](n) for i in range(n): if str._islower(self.ptr[i]): # lowercase to uppercase if not prev_is_cased: p[i] = str._toupper(self.ptr[i]) else: p[i] = self.ptr[i] prev_is_cased = True elif str._isupper(self.ptr[i]): # uppercase to lowercase if prev_is_cased: p[i] = str._tolower(self.ptr[i]) else: p[i] = self.ptr[i] prev_is_cased = True else: p[i] = self.ptr[i] prev_is_cased = False return str(p, n) def isnumeric(self) -> bool: """ str.isdecimal() -> bool Return True if the string is a numeric string, False otherwise. A string is numeric if all characters in the string are numeric and there is at least one character in the string. Unlike Python, isnumeric() deals with just ASCII characters. """ return self.isdecimal() def _build(*args): total = 0 for t in args: if isinstance(t, str): total += len(t) else: total += len(t[0]) * t[1] p = Ptr[byte](total) i = 0 for t in args: if isinstance(t, str): str.memcpy(p + i, t.ptr, t.len) i += t.len else: s, n = t for _ in range(n): str.memcpy(p + i, s.ptr, s.len) i += s.len return str(p, total) def ljust(self, width: int, fillchar: str = " ") -> str: """ ljust(width[, fillchar]) -> string Return a left-justified string of length width. Padding is done using the specified fill character (default is a space). """ if len(fillchar) != 1: raise ValueError("The fill character must be exactly one character long") if width <= len(self): return self return str._build(self, (fillchar, width - len(self))) def rjust(self, width: int, fillchar: str = " ") -> str: """ rjust(width[, fillchar]) -> string Return a right-justified string of length width. Padding is done using the specified fill character (default is a space). """ if len(fillchar) != 1: raise ValueError("The fill character must be exactly one character long") if width <= len(self): return self return str._build((fillchar, width - len(self)), self) def center(self, width: int, fillchar: str = " ") -> str: """ str.center(width[, fillchar]) -> string Return str centered in a string of length width. Padding is done using the specified fill character (default is a space) """ if len(fillchar) != 1: raise ValueError("The fill character must be exactly one character long") if width <= len(self): return self pad = width - len(self) left_pad = pad // 2 right_pad = width - len(self) - left_pad return str._build((fillchar, left_pad), self, (fillchar, right_pad)) def zfill(self, width: int) -> str: """ str.zfill(width) -> string Pad a numeric string str with zeros on the left, to fill a field of the specified width. The string str is never truncated. """ if len(self) >= width: return self plus = byte(43) # + minus = byte(45) # - zero = byte(48) # 0 zf = self.rjust(width, '0') fill = width - len(self) p = zf.ptr if p[fill] == plus or p[fill] == minus: p[0] = p[fill] p[fill] = zero return zf def count(self, sub: str, start: int = 0, end: Optional[int] = None) -> int: """ str.count(sub[, start[, end]]) -> int Return the number of occurrences of subsection sub in bytes str[start:end]. Optional arguments start and end are interpreted as in slice notation. """ end: int = end if end is not None else len(self) start, end = self._correct_indices(start, end) if end - start < len(sub): return 0 return algorithms.count(self._slice(start, end), sub) def find(self, sub: str, start: int = 0, end: Optional[int] = None) -> int: """ str.find(sub [,start [,end]]) -> int Return the lowest index in str where substring sub is found, such that sub is contained within str[start:end]. Optional arguments start and end are interpreted as in slice notation. Return -1 on failure. """ end: int = end if end is not None else len(self) start, end = self._correct_indices(start, end) if end - start < len(sub): return -1 pos = algorithms.find(self._slice(start, end), sub) return pos if pos < 0 else pos + start def rfind(self, sub: str, start: int = 0, end: Optional[int] = None) -> int: """ str.rfind(sub [,start [,end]]) -> int Return the highest index in str where substring sub is found, such that sub is contained within str[start:end]. Optional arguments start and end are interpreted as in slice notation. Return -1 on failure. """ end: int = end if end is not None else len(self) start, end = self._correct_indices(start, end) if end - start < len(sub): return -1 pos = algorithms.rfind(self._slice(start, end), sub) return pos if pos < 0 else pos + start def isidentifier(self) -> bool: """ str.isidentifier() -> bool Return True if the string is a valid identifier, False otherwise. Unlike Python, isidentifier() deals with just ASCII characters. """ # empty string if len(self) == 0: return False # is not a letter or _ first = self._at(0) if not first.isalpha(): if first != "_": return False if first.isalpha() or first == "_": for i in range(1, len(self)): ith = self._at(i) if not ith.isalpha(): if not ith.isdecimal(): if ith != "_": return False return True def isprintable(self) -> bool: """ str.isprintable() -> bool Return True if the string is printable or empty, False otherwise. Unlike Python, isprintable() deals with just ASCII characters. """ for i in range(len(self)): if not (31 < int(self.ptr[i]) < 128): return False return True def _has_char(self, chars: str) -> bool: s = self._at(0) if chars: for c in chars: if s == c: return True return False else: return s.isspace() def lstrip(self, chars: str = "") -> str: """ str.lstrip([chars]) -> string Return a copy of the string str with leading whitespace removed. If chars is given, remove characters in chars instead. Unlike Python, lstrip() deals with just ASCII characters. """ i = 0 while i < len(self) and self._at(i)._has_char(chars): i += 1 return self._slice(i, len(self)) def rstrip(self, chars: str = "") -> str: """ str.rstrip([chars]) -> string Return a copy of the string str with trailing whitespace removed. If chars is given, remove characters in chars instead. Unlike Python, rstrip() deals with just ASCII characters. """ i = len(self) - 1 while i >= 0 and self._at(i)._has_char(chars): i -= 1 return self._slice(0, i + 1) def strip(self, chars: str = "") -> str: """ str.strip([chars]) -> string Return a copy of the string str with leading and trailing whitespace removed. If chars is given, remove characters in chars instead. Unlike Python, strip() deals with just ASCII characters. """ return self.lstrip(chars).rstrip(chars) def partition(self, sep: str) -> Tuple[str, str, str]: """ Search for the separator sep in str, and return the part before it, the separator itself, and the part after it. If the separator is not found, return str and two empty strings. """ if not sep: raise ValueError("empty separator") pos = algorithms.find(self, sep) if pos < 0: return self, "", "" return self._slice(0, pos), sep, self._slice(pos + len(sep), len(self)) def rpartition(self, sep: str) -> Tuple[str, str, str]: # XXX """ Search for the separator sep in str, starting at the end of str, and return the part before it, the separator itself, and the part after it. If the separator is not found, return two empty strings and str. """ if not sep: raise ValueError("empty separator") pos = algorithms.rfind(self, sep) if pos < 0: return "", "", self return self._slice(0, pos), sep, self._slice(pos + len(sep), len(self)) def split(self, sep: Optional[str] = None, maxsplit: int = -1) -> List[str]: """ str.split([sep [,maxsplit]]) -> list of strings Return a list of the words in the string str, using sep as the delimiter string. If maxsplit is given, at most maxsplit splits are done. If sep is not specified, any whitespace string is a separator and empty strings are removed from the result. """ if sep is None: return self._split_whitespace( maxsplit if maxsplit >= 0 else _MAX ) sep: str = sep if len(sep) == 0: raise ValueError("empty separator") # special case for length-1 pattern if len(sep) == 1: return self._split_char(sep.ptr[0], maxsplit if maxsplit >= 0 else _MAX) MAX_PREALLOC = 12 maxsplit = maxsplit if maxsplit >= 0 else _MAX prealloc_size = MAX_PREALLOC if maxsplit >= MAX_PREALLOC else maxsplit + 1 v = List[str](capacity=prealloc_size) i = 0 j = 0 n = len(self) while maxsplit > 0: maxsplit -= 1 pos = algorithms.find(self._slice(i, n), sep) if pos < 0: break j = i + pos v.append(self._slice(i, j)) i = j + len(sep) v.append(self._slice(i, n)) return v def rsplit(self, sep: Optional[str] = None, maxsplit: int = -1) -> List[str]: """ str.rsplit([sep [,maxsplit]]) -> list of strings Return a list of the words in the string str, using sep as the delimiter string, starting at the end of the string and working to the front. If maxsplit is given, at most maxsplit splits are done. If sep is not specified, any whitespace string is a separator. """ if sep is None: return self._rsplit_whitespace( maxsplit if maxsplit >= 0 else _MAX ) sep: str = sep if len(sep) == 0: raise ValueError("empty separator") # special case for length-1 pattern if len(sep) == 1: return self._rsplit_char(sep.ptr[0], maxsplit if maxsplit >= 0 else _MAX) MAX_PREALLOC = 12 maxsplit = maxsplit if maxsplit >= 0 else _MAX prealloc_size = MAX_PREALLOC if maxsplit >= MAX_PREALLOC else maxsplit + 1 v = List[str](capacity=prealloc_size) i = 0 j = len(self) n = j while maxsplit > 0: maxsplit -= 1 pos = algorithms.rfind(self._slice(0, j), sep) if pos < 0: break v.append(self._slice(pos + len(sep), j)) j = pos v.append(self._slice(0, j)) v.reverse() return v def splitlines(self, keepends: bool = False) -> List[str]: """ str.splitlines([keepends]) -> list of strings Return a list of the lines in str, breaking at line boundaries. Line breaks are not included in the resulting list unless keepends is given and true. """ v = [] i = 0 j = 0 n = len(self) break_r = byte(13) # \r break_n = byte(10) # \n while i < n: while i < n and not (self.ptr[i] == break_r or self.ptr[i] == break_n): i += 1 eol = i if i < n: if self.ptr[i] == break_r and i + 1 < n and self.ptr[i + 1] == break_n: i += 2 else: i += 1 if keepends: eol = i if j == 0 and eol == n: v.append(self) break v.append(self._slice(j, eol)) j = i return v def startswith( self, prefix: str, start: int = 0, end: Optional[int] = None ) -> bool: """ str.startswith(prefix[, start[, end]]) -> bool Return True if str starts with the specified prefix, False otherwise. With optional start, test str beginning at that position. With optional end, stop comparing str at that position. """ end: int = end if end is not None else len(self) if end < 0: end += len(self) elif start < 0: start += len(self) # length prefix is longer than range of string being compared to if start + len(prefix) > len(self): return False # length of prefix is longer than range of string[start:end] if end - start < len(prefix): return False # prefix is an empty string if not prefix: return True return prefix == self._slice(start, start + len(prefix)) def endswith(self, suffix: str, start: int = 0, end: Optional[int] = None) -> bool: """ str.endswith(prefix[, start[, end]]) -> bool Return True if str ends with the specified suffix, False otherwise. With optional start, test str beginning at that position. With optional end, stop comparing str at that position. """ end: int = end if end is not None else len(self) if end < 0: end += len(self) elif start < 0: start += len(self) if end > len(self): end = len(self) # length prefix is longer than range of string being compared to if end - start < len(suffix) or start > len(self): return False if end - len(suffix) > start: start = end - len(suffix) # length of prefix is longer than range of string[start:end] if end - start < len(suffix): return False # prefix is an empty string if not suffix: return True return suffix == self._slice(start, start + len(suffix)) def index(self, sub: str, start: int = 0, end: Optional[int] = None) -> int: """ str.index(sub [,start [,end]]) -> int Like str.find() but raise ValueError when the substring is not found. """ i = self.find(sub, start, end) if i == -1: raise ValueError("substring not found") else: return i def rindex(self, sub: str, start: int = 0, end: Optional[int] = None) -> int: """ str.index(sub [,start [,end]]) -> int Like str.find() but raise ValueError when the substring is not found. """ i = self.rfind(sub, start, end) if i == -1: raise ValueError("substring not found") else: return i def replace(self, old: str, new: str, maxcount: int = -1) -> str: """ str.replace(old, new[, count]) -> string Return a copy of string str with all occurrences of substring old replaced by new. If the optional argument maxcount is given, only the first maxcount occurrences are replaced. """ return self._replace(old, new, maxcount) def expandtabs(self, tabsize: int = 8) -> str: """ str.expandtabs([tabsize]) -> string Return a copy of str where all tab characters are expanded using spaces. If tabsize is not given, a tab size of 8 characters is assumed. """ i = 0 j = 0 p = self.ptr e = p + len(self) break_r = byte(13) # \r break_n = byte(10) # \n tab = byte(9) # \t space = byte(32) # ' ' def overflow(): raise OverflowError("result too long") while p < e: if p[0] == tab: if tabsize > 0: incr = tabsize - (j % tabsize) if j > _MAX - incr: overflow() j += incr else: if j > _MAX - 1: overflow() j += 1 if p[0] == break_n or p[0] == break_r: if i > _MAX - j: overflow() i += j j = 0 p += 1 if i > _MAX - j: overflow() u_len = i + j u = Ptr[byte](u_len) j = 0 q = u p = self.ptr while p < e: if p[0] == tab: if tabsize > 0: i = tabsize - (j % tabsize) j += i while True: k = i i -= 1 if k == 0: break q[0] = space q += 1 else: j += 1 q[0] = p[0] q += 1 if p[0] == break_n or p[0] == break_r: j = 0 p += 1 return str(u, u_len) def translate(self, map) -> str: """ Return a copy with each character mapped by the given translation table. """ n = len(self) m = 0 for i in range(n): key = int(self.ptr[i]) if key in map: val = map[key] if val is not None: m += len(val) else: m += 1 p = Ptr[byte](m) q = p for i in range(n): key = int(self.ptr[i]) if key in map: val = map[key] if val is not None: str.memcpy(q, val.ptr, len(val)) q += len(val) else: q[0] = self.ptr[i] q += 1 return str(p, m) # Internal helpers def _correct_indices(self, start: int, end: int) -> Tuple[int, int]: n = len(self) if end > n: end = n elif end < 0: end += n if end < 0: end = 0 if start < 0: start += n if start < 0: start = 0 return (start, end) def _split_whitespace(self, maxcount: int) -> List[str]: PREALLOC_MAX = 12 l = List[str](PREALLOC_MAX if maxcount >= PREALLOC_MAX else maxcount + 1) str_len = len(self) i = 0 j = 0 while maxcount > 0: maxcount -= 1 while i < str_len and str._isspace(self.ptr[i]): i += 1 if i == str_len: break j = i i += 1 while i < str_len and not str._isspace(self.ptr[i]): i += 1 l.append(self._slice(j, i)) if i < str_len: while i < str_len and str._isspace(self.ptr[i]): i += 1 if i != str_len: l.append(self._slice(i, str_len)) return l def _rsplit_whitespace(self, maxcount: int) -> List[str]: PREALLOC_MAX = 12 l = List[str](PREALLOC_MAX if maxcount >= PREALLOC_MAX else maxcount + 1) str_len = len(self) i = str_len - 1 j = str_len - 1 while maxcount > 0: maxcount -= 1 while i >= 0 and str._isspace(self.ptr[i]): i -= 1 if i < 0: break j = i i -= 1 while i >= 0 and not str._isspace(self.ptr[i]): i -= 1 l.append(self._slice(i + 1, j + 1)) if i >= 0: while i >= 0 and str._isspace(self.ptr[i]): i -= 1 if i >= 0: l.append(self._slice(0, i + 1)) l.reverse() return l def _split_char(self, char: byte, maxcount: int) -> List[str]: PREALLOC_MAX = 12 l = List[str](PREALLOC_MAX if maxcount >= PREALLOC_MAX else maxcount + 1) str_len = len(self) i = 0 j = 0 while i < str_len and maxcount > 0: if self.ptr[i] == char: l.append(self._slice(j, i)) j = i + 1 maxcount -= 1 i += 1 l.append(self._slice(j, str_len)) return l def _rsplit_char(self, char: byte, maxcount: int) -> List[str]: PREALLOC_MAX = 12 l = List[str](PREALLOC_MAX if maxcount >= PREALLOC_MAX else maxcount + 1) str_len = len(self) i = str_len - 1 j = str_len - 1 while i >= 0 and maxcount > 0: if self.ptr[i] == char: l.append(self._slice(i + 1, j + 1)) j = i - 1 maxcount -= 1 i -= 1 l.append(self._slice(0, j + 1)) l.reverse() return l def _findchar(self, c: byte): return _C.memchr(self.ptr, i32(int(c)), len(self)) def _countchar(self, c: byte, maxcount: int): count = 0 start = self.ptr end = start + len(self) while True: start = str(start, end - start)._findchar(c) if not start: break count += 1 if count >= maxcount: break start += 1 return count def _replace_interleave(self, to: str, maxcount: int): self_s = self.ptr self_len = len(self) to_len = len(to) to_s = to.ptr count = 0 i = 0 if maxcount <= self_len: count = maxcount else: count = self_len + 1 # assert count > 0 if to_len > (_MAX - self_len) // count: raise OverflowError("replace bytes is too long") result_len = count * to_len + self_len result_s = Ptr[byte](result_len) result_s0 = result_s if to_len > 1: str.memcpy(result_s, to_s, to_len) result_s += to_len count -= 1 while i < count: result_s[0] = self_s[0] result_s += 1 self_s += 1 str.memcpy(result_s, to_s, to_len) result_s += to_len i += 1 else: result_s[0] = to_s[0] result_s += to_len count -= 1 while i < count: result_s[0] = self_s[0] result_s += 1 self_s += 1 result_s[0] = to_s[0] result_s += to_len i += 1 str.memcpy(result_s, self_s, self_len - i) return str(result_s0, result_len) def _replace_delete_single_character(self, from_c: byte, maxcount: int): self_len = len(self) self_s = self.ptr count = self._countchar(from_c, maxcount) if count == 0: return self result_len = self_len - count # assert result_len >= 0 result_s = Ptr[byte](result_len) result_s0 = result_s start = self_s end = self_s + self_len while count > 0: count -= 1 nxt = str(start, end - start)._findchar(from_c) if not nxt: break str.memcpy(result_s, start, nxt - start) result_s += nxt - start start = nxt + 1 str.memcpy(result_s, start, end - start) return str(result_s0, result_len) def _replace_delete_substring(self, from_s: str, maxcount: int): self_len = len(self) self_s = self.ptr from_len = len(from_s) count = algorithms.count_with_max(self, from_s, maxcount) if count == 0: return self result_len = self_len - (count * from_len) # assert result_len >= 0 result_s = Ptr[byte](result_len) result_s0 = result_s start = self_s end = self_s + self_len while count > 0: count -= 1 offset = algorithms.find(str(start, end - start), from_s) if offset == -1: break nxt = start + offset str.memcpy(result_s, start, nxt - start) result_s += nxt - start start = nxt + from_len str.memcpy(result_s, start, end - start) return str(result_s0, result_len) def _replace_single_character_in_place(self, from_c: byte, to_c: byte, maxcount: int): self_s = self.ptr self_len = len(self) nxt = self._findchar(from_c) if not nxt: return self result_s = Ptr[byte](self_len) str.memcpy(result_s, self_s, self_len) start = result_s + (nxt - self_s) start[0] = to_c start += 1 end = result_s + self_len maxcount -= 1 while maxcount > 0: maxcount -= 1 nxt = str(start, end - start)._findchar(from_c) if not nxt: break nxt[0] = to_c start = nxt + 1 return str(result_s, self_len) def _replace_substring_in_place(self, from_s: str, to: str, maxcount: int): self_s = self.ptr self_len = len(self) from_len = len(from_s) to_s = to.ptr offset = algorithms.find(self, from_s) if offset == -1: return self result_s = Ptr[byte](self_len) str.memcpy(result_s, self_s, self_len) start = result_s + offset str.memcpy(start, to_s, from_len) start += from_len end = result_s + self_len maxcount -= 1 while maxcount > 0: maxcount -= 1 offset = algorithms.find(str(start, end - start), from_s) if offset == -1: break str.memcpy(start + offset, to_s, from_len) start += offset + from_len return str(result_s, self_len) def _replace_single_character(self, from_c: byte, to_s: str, maxcount: int): self_s = self.ptr self_len = len(self) to_len = len(to_s) count = self._countchar(from_c, maxcount) if count == 0: return self # assert count > 0 if to_len - 1 > (_MAX - self_len) // count: raise OverflowError("replace bytes is too long") result_len = self_len + count * (to_len - 1) result_s = Ptr[byte](result_len) result_s0 = result_s start = self_s end = self_s + self_len while count > 0: count -= 1 nxt = str(start, end - start)._findchar(from_c) if not nxt: break if nxt == start: str.memcpy(result_s, to_s.ptr, to_len) result_s += to_len start += 1 else: str.memcpy(result_s, start, nxt - start) result_s += (nxt - start) str.memcpy(result_s, to_s.ptr, to_len) result_s += to_len start = nxt + 1 str.memcpy(result_s, start, end - start) return str(result_s0, result_len) def _replace_substring(self, from_s: str, to_s: str, maxcount: int): self_s = self.ptr self_len = len(self) from_len = len(from_s) to_len = len(to_s) count = algorithms.count_with_max(self, from_s, maxcount) if count == 0: return self # assert count > 0 if to_len - from_len > (_MAX - self_len) // count: raise OverflowError("replace bytes is too long") result_len = self_len + count * (to_len - from_len) result_s = Ptr[byte](result_len) result_s0 = result_s start = self_s end = self_s + self_len while count > 0: count -= 1 offset = algorithms.find(str(start, end - start), from_s) if offset == -1: break nxt = start + offset if nxt == start: str.memcpy(result_s, to_s.ptr, to_len) result_s += to_len start += from_len else: str.memcpy(result_s, start, nxt - start) result_s += (nxt - start) str.memcpy(result_s, to_s.ptr, to_len) result_s += to_len start = nxt + from_len str.memcpy(result_s, start, end - start) return str(result_s0, result_len) def _replace(self, from_s: str, to_s: str, maxcount: int): self_len = len(self) from_len = len(from_s) to_len = len(to_s) if self_len < from_len: return self if maxcount < 0: maxcount = _MAX elif maxcount == 0: return self if from_len == 0: if to_len == 0: return self return self._replace_interleave(to_s, maxcount) if to_len == 0: if from_len == 1: return self._replace_delete_single_character(from_s.ptr[0], maxcount) return self._replace_delete_substring(from_s, maxcount) if from_len == to_len: if from_len == 1: return self._replace_single_character_in_place(from_s.ptr[0], to_s.ptr[0], maxcount) return self._replace_substring_in_place(from_s, to_s, maxcount) if from_len == 1: return self._replace_single_character(from_s.ptr[0], to_s, maxcount) else: return self._replace_substring(from_s, to_s, maxcount)