# (c) 2022 Exaloop Inc. All rights reserved. def filter_overlaps(v: Generator[int], n: int) -> Generator[int]: prev = -1 for pos in v: if prev < 0 or pos - prev >= n: prev = pos yield pos def rfilter_overlaps(v: Generator[int], n: int) -> Generator[int]: prev = -1 for pos in v: if prev < 0 or prev - pos >= n: prev = pos yield pos def string_search_slow(text: str, pattern: str) -> Generator[int]: """ Return a list containing the non-overlapping position of each index the pattern is found. """ if pattern == "": for i in range(len(text) + 1): yield i return for i in range(len(text) - len(pattern) + 1): if text[i : i + len(pattern)] == pattern: yield i def rstring_search_slow(text: str, pattern: str) -> Generator[int]: """ Return a list containing the non-overlapping position of each index the pattern is found from right to left. """ if pattern == "": for i in range(len(text), -1, -1): yield i return for i in range(len(text), len(pattern) - 1, -1): if text[i - len(pattern) : i] == pattern: yield i - len(pattern) def string_search_rabin_karp( text: str, pattern: str, prime: int = 645419 ) -> Generator[int]: """ Return a list containing the position of each index the pattern is found. """ BASE = 256 # hash formula hash = 1 for i in range(len(pattern) - 1): hash = (hash * BASE) % prime # calculate hash value of the pattern and the hash value of the # first section of text p, t = 0, 0 for i in range(len(pattern)): p = (BASE * p + ord(pattern[i])) % prime t = (BASE * t + ord(text[i])) % prime # shift pattern over text one bye one for i in range(len(text) - len(pattern)): # check the hash values of current position of text and # pattern if the hash values are equal then compare # characters one by one if p == t and text[i : i + len(pattern)] == pattern: yield i # calculate hash value for next position of text. # the leading digit is removed while trailing digit is added t = BASE * (t - (ord(text[i]) * hash) % prime + prime) % prime t = (t + ord(text[i + len(pattern)])) % prime if p == t and text[-len(pattern) :] == pattern: yield len(text) - len(pattern) def compute_lp_array(pattern: str, len_pat: int) -> List[int]: """ Return a list containing the length of the maximum matching proper prefix of the pattern[0, 1, ..., i] """ len = 0 # length of the previous longest prefix lp = [0] # longest proper prefix will hold the longest prefix values for pattern i = 1 while i < len_pat: if pattern[i] == pattern[len]: len += 1 lp.append(len) i += 1 else: if len != 0: len = lp[len - 1] else: lp.append(0) i += 1 return lp def string_search_kmp(text: str, pattern: str) -> Generator[int]: """ Knuth-Morris-Pratt algorithm Return a list containing the position of each index the pattern is found. """ if not pattern: for i in range(len(text) + 1): yield i return lp_array = compute_lp_array(pattern, len(pattern)) i, j = 0, 0 # indices for text and pattern while i < len(text): if pattern[j] == text[i]: i += 1 j += 1 if j == len(pattern): yield i - j j = lp_array[j - 1] # mismatch after j matches elif i < len(text) and pattern[j] != text[i]: # do not match lp[0...lp[j-1]] characters, # they will match anyways if j != 0: j = lp_array[j - 1] else: i += 1 def replace_interleave(self, new: str, maxcount: int) -> str: """ Returns a string inserting the 'new' string everywhere. """ j = 0 res = [] # insert the new string for maxcount <= len(self) times. for i in range(len(self)): if i + 1 > maxcount: break res.append(new) res.append(self[i]) j = i if maxcount - 1 == len(self) or maxcount > len(self) or maxcount < 0: res.append(new) else: res.append(self[j + 1 :]) return str.cat(res) def replace_delete_substring(self, old: str, new: str, maxcount: int) -> str: """ Returns a string deleting any instances of the 'old' string in self and replaceing it with the 'new' string. """ li = list(string_search_kmp(self, old)) # no matches if len(li) == 0: return self # when the whole string is replaced by '' if len(li) * len(old) == len(self) and maxcount >= len(self): return "" if maxcount > len(li): maxcount = len(li) result_len = len(self) - (maxcount * len(old)) assert result_len >= 0 j = li[0] res = [] res.append(self[0:j]) # replace the old substring with the new substring for i in range(1, maxcount): res.append(new) res.append(self[j + len(old) : li[i]]) j = li[i] res.append(new) res.append(self[j + len(old) :]) return str.cat(res)