Rolling hash problems
2 min readJun 20, 2020
1044. Longest Duplicate Substring
An unoptimized solution got TLE
class Solution:
def longestDupSubstring(self, S: str) -> str:
if len(set(S))==len(S):return ""
l, r = 0, len(S)-1
def good(m):
cnt = collections.Counter()
for i in range(len(S)):
if i+m<=len(S):
cnt[S[i:i+m]] +=1
else:
break
return max([0]+list(cnt.values()))>=2
while l<r:
m = (l+r+1)//2
if good(m):
l = m
else:
r = m-1
ret = l if good(l) else 0
if ret == 0:return ""
cnt = collections.Counter()
for i in range(len(S)):
if i+ret<=len(S):
cnt[S[i:i+ret]] +=1
else:
break
return max(cnt.items(), key=lambda x:x[1])[0]
Improve the code to early stop, I still get TLE.
class Solution:
def longestDupSubstring(self, S: str) -> str:
if len(set(S))==len(S):return ""
l, r = 0, len(S)
def good(m):
seen = set()
for i in range(m, len(S)+1):
curr = S[i-m:i]
if curr in seen:return i
seen.add(curr)
return 0
res = 0
while l<r:
m = (l+r+1)//2
pos = good(m)
if pos:
l = m
res = pos
else:
r = m-1
#print(l)
return S[res-l:res] if l else ""
If we use a rolling hash to speed up, we can get it accepted.
# time complexity O(n log n)# space complexity O(n)
class Solution:
def longestDupSubstring(self, S: str) -> str:
A = [ord(c) - ord('a') for c in S]
mod = (1<<53)
def test(L):
p = (26**L)%mod
cur = functools.reduce(lambda x, y: (x * 26 + y) % mod, A[:L])
seen = {cur}
for i in range(L, len(S)):
cur = (cur * 26 + A[i] - A[i - L] * p) % mod
if cur in seen: return i - L + 1
seen.add(cur)
res, lo, hi = 0, 0, len(S)
while lo < hi:
mi = (lo + hi + 1) // 2
pos = test(mi)
if pos:
lo = mi
res = pos
else:
hi = mi - 1
return S[res:res + lo]
A more plain code without using the functools are here
class Solution:
def longestDupSubstring(self, S: str) -> str:
A = [ord(c)-ord('a') for c in S]
mod = 1<<43
def test(m):
p = (26**m)%mod
cur = 0
for val in A[:m]:
cur = (cur*26+val)%mod
seen = {cur}
for i in range(m, len(S)):
cur = (cur*26+A[i]-A[i-m]*p)%mod
if cur in seen:return i
seen.add(cur)
return 0
l, r = 0, len(S)
res = 0
while l<r:
m = (l+r+1)//2
pos = test(m)
if pos:
res = pos
l = m
else:
r = m - 1
return S[res-l+1:res+1]if l else ''
Be super careful, if we don’t add ‘%mod’ in this line, we will get TLE
p = (26**m)%mod