mirror of https://github.com/exaloop/codon.git
stdlib/algorithms/strings.codon
parent
db0ae077f9
commit
4829d86c21
stdlib/algorithms
|
@ -1,48 +1,56 @@
|
|||
def filter_overlaps(v: Generator[int], n: int):
|
||||
# (c) 2022 Exaloop Inc. All rights reserved.
|
||||
|
||||
def filter_overlaps(v: Generator[int], n: int) -> Generator[int]:
|
||||
prev = -1
|
||||
for pos in v:
|
||||
if prev < 0 or pos - prev >= n:
|
||||
prev = pos
|
||||
yield pos
|
||||
|
||||
def rfilter_overlaps(v: Generator[int], n: int):
|
||||
|
||||
def rfilter_overlaps(v: Generator[int], n: int) -> Generator[int]:
|
||||
prev = -1
|
||||
for pos in v:
|
||||
if prev < 0 or prev - pos >= n:
|
||||
prev = pos
|
||||
yield pos
|
||||
|
||||
def string_search_slow(text: str, pattern: str):
|
||||
|
||||
def string_search_slow(text: str, pattern: str) -> Generator[int]:
|
||||
"""
|
||||
Return a list containing the non-overlapping position of each index
|
||||
the pattern is found.
|
||||
"""
|
||||
|
||||
if pattern == '':
|
||||
if pattern == "":
|
||||
for i in range(len(text) + 1):
|
||||
yield i
|
||||
return
|
||||
|
||||
for i in range(len(text) - len(pattern) + 1):
|
||||
if text[i:i + len(pattern)] == pattern:
|
||||
if text[i : i + len(pattern)] == pattern:
|
||||
yield i
|
||||
|
||||
def rstring_search_slow(text: str, pattern: str):
|
||||
|
||||
def rstring_search_slow(text: str, pattern: str) -> Generator[int]:
|
||||
"""
|
||||
Return a list containing the non-overlapping position of each index
|
||||
the pattern is found from right to left.
|
||||
"""
|
||||
|
||||
if pattern == '':
|
||||
if pattern == "":
|
||||
for i in range(len(text), -1, -1):
|
||||
yield i
|
||||
return
|
||||
|
||||
for i in range(len(text), len(pattern) - 1, -1):
|
||||
if text[i-len(pattern):i] == pattern:
|
||||
if text[i - len(pattern) : i] == pattern:
|
||||
yield i - len(pattern)
|
||||
|
||||
def string_search_rabin_karp(text: str, pattern: str, prime: int = 645419):
|
||||
|
||||
def string_search_rabin_karp(
|
||||
text: str, pattern: str, prime: int = 645419
|
||||
) -> Generator[int]:
|
||||
"""
|
||||
Return a list containing the position of each index
|
||||
the pattern is found.
|
||||
|
@ -66,7 +74,7 @@ def string_search_rabin_karp(text: str, pattern: str, prime: int = 645419):
|
|||
# check the hash values of current position of text and
|
||||
# pattern if the hash values are equal then compare
|
||||
# characters one by one
|
||||
if p == t and text[i:i + len(pattern)] == pattern:
|
||||
if p == t and text[i : i + len(pattern)] == pattern:
|
||||
yield i
|
||||
|
||||
# calculate hash value for next position of text.
|
||||
|
@ -74,16 +82,19 @@ def string_search_rabin_karp(text: str, pattern: str, prime: int = 645419):
|
|||
t = BASE * (t - (ord(text[i]) * hash) % prime + prime) % prime
|
||||
t = (t + ord(text[i + len(pattern)])) % prime
|
||||
|
||||
if p == t and text[-len(pattern):] == pattern:
|
||||
if p == t and text[-len(pattern) :] == pattern:
|
||||
yield len(text) - len(pattern)
|
||||
|
||||
|
||||
def computeLPArray(pattern: str, len_pat: int) -> List[int]:
|
||||
"""
|
||||
Return a list containing the length of the maximum matching
|
||||
proper prefix of the pattern[0, 1, ..., i]
|
||||
"""
|
||||
len = 0 # length of the previous longest prefix
|
||||
lp = List[int]() # longest proper prefix will hold the longest prefix values for pattern
|
||||
len = 0 # length of the previous longest prefix
|
||||
lp = List[
|
||||
int
|
||||
]() # longest proper prefix will hold the longest prefix values for pattern
|
||||
lp.append(0)
|
||||
i = 1
|
||||
|
||||
|
@ -94,13 +105,14 @@ def computeLPArray(pattern: str, len_pat: int) -> List[int]:
|
|||
i += 1
|
||||
else:
|
||||
if len != 0:
|
||||
len = lp[len-1]
|
||||
len = lp[len - 1]
|
||||
else:
|
||||
lp.append(0)
|
||||
i += 1
|
||||
return lp
|
||||
|
||||
def string_search_KMP(text: str, pattern: str):
|
||||
|
||||
def string_search_KMP(text: str, pattern: str) -> Generator[int]:
|
||||
"""
|
||||
Knuth-Morris-Pratt algorithm
|
||||
Return a list containing the position of each index
|
||||
|
@ -112,23 +124,24 @@ def string_search_KMP(text: str, pattern: str):
|
|||
return
|
||||
|
||||
lp_array = computeLPArray(pattern, len(pattern))
|
||||
i, j = 0, 0 # indices for text and pattern
|
||||
i, j = 0, 0 # indices for text and pattern
|
||||
while i < len(text):
|
||||
if pattern[j] == text[i]:
|
||||
i += 1
|
||||
j += 1
|
||||
if j == len(pattern):
|
||||
yield i-j
|
||||
j = lp_array[j-1]
|
||||
yield i - j
|
||||
j = lp_array[j - 1]
|
||||
# mismatch after j matches
|
||||
elif i < len(text) and pattern[j] != text[i]:
|
||||
# do not match lp[0...lp[j-1]] characters,
|
||||
# they will match anyways
|
||||
if j != 0:
|
||||
j = lp_array[j-1]
|
||||
j = lp_array[j - 1]
|
||||
else:
|
||||
i += 1
|
||||
|
||||
|
||||
def replace_interleave(self, new: str, maxcount: int) -> str:
|
||||
"""
|
||||
Returns a string inserting the 'new' string everywhere.
|
||||
|
@ -138,17 +151,18 @@ def replace_interleave(self, new: str, maxcount: int) -> str:
|
|||
res = List[str]()
|
||||
# insert the new string for maxcount <= len(self) times.
|
||||
for i in range(len(self)):
|
||||
if i+1 > maxcount:
|
||||
if i + 1 > maxcount:
|
||||
break
|
||||
res.append(new)
|
||||
res.append(self[i])
|
||||
j = i
|
||||
if maxcount-1 == len(self) or maxcount > len(self) or maxcount < 0:
|
||||
if maxcount - 1 == len(self) or maxcount > len(self) or maxcount < 0:
|
||||
res.append(new)
|
||||
else:
|
||||
res.append(self[j+1:])
|
||||
res.append(self[j + 1 :])
|
||||
return str.cat(res)
|
||||
|
||||
|
||||
def replace_delete_substring(self, old: str, new: str, maxcount: int) -> str:
|
||||
"""
|
||||
Returns a string deleting any instances of the 'old' string in self and
|
||||
|
@ -161,8 +175,8 @@ def replace_delete_substring(self, old: str, new: str, maxcount: int) -> str:
|
|||
return self
|
||||
|
||||
# when the whole string is replaced by ''
|
||||
if len(li)*len(old) == len(self) and maxcount >= len(self):
|
||||
return ''
|
||||
if len(li) * len(old) == len(self) and maxcount >= len(self):
|
||||
return ""
|
||||
|
||||
if maxcount > len(li):
|
||||
maxcount = len(li)
|
||||
|
@ -177,27 +191,8 @@ def replace_delete_substring(self, old: str, new: str, maxcount: int) -> str:
|
|||
# replace the old substring with the new substring
|
||||
for i in range(1, maxcount):
|
||||
res.append(new)
|
||||
res.append(self[j+len(old): li[i]])
|
||||
res.append(self[j + len(old) : li[i]])
|
||||
j = li[i]
|
||||
res.append(new)
|
||||
res.append(self[j+len(old):])
|
||||
res.append(self[j + len(old) :])
|
||||
return str.cat(res)
|
||||
|
||||
# # should get [2]
|
||||
# print rstring_search_slow('abbbc', 'bb')
|
||||
# print string_search_slow('abbba', 'bb')
|
||||
|
||||
# # should get [0, 9, 11, 14]
|
||||
# print string_search_slow('1214313141212 12', '12')
|
||||
# print string_search_rabin_karp('1214313141212 12', '12', 1001)
|
||||
# print string_search_KMP('1214313141212 12', '12')
|
||||
# #
|
||||
# # # should get [0, 9]
|
||||
# print string_search_slow('AABAACAADAABAABA', 'AABA')
|
||||
# print string_search_rabin_karp('AABAACAADAABAABA', 'AABA', 1001)
|
||||
# print string_search_KMP('AABAACAADAABAABA', 'AABA')
|
||||
# #
|
||||
# # # should get [10]
|
||||
# print string_search_slow('ABABDABACDABABCABAB', 'ABABCABAB')
|
||||
# print string_search_rabin_karp('ABABDABACDABABCABAB', 'ABABCABAB', 101)
|
||||
# print string_search_KMP('ABABDABACDABABCABAB', 'ABABCABAB')
|
||||
|
|
Loading…
Reference in New Issue