Rolling hash problems

2 min readJun 20, 2020

An unoptimized solution got TLE

class Solution:
    def longestDupSubstring(self, S: str) -> str:
        if len(set(S))==len(S):return ""
        l, r = 0, len(S)-1
        def good(m):
            cnt = collections.Counter()
            for i in range(len(S)):
                if i+m<=len(S):
                    cnt[S[i:i+m]] +=1
                else:
                    break
            return max([0]+list(cnt.values()))>=2
            
        while l<r:
            m = (l+r+1)//2
            if good(m):
                l = m
            else:
                r = m-1
        ret = l if good(l) else 0
        if ret == 0:return ""
        cnt = collections.Counter()
        for i in range(len(S)):
            if i+ret<=len(S):
                cnt[S[i:i+ret]] +=1
            else:
                break
        return max(cnt.items(), key=lambda x:x[1])[0]

Improve the code to early stop, I still get TLE.

class Solution:
    def longestDupSubstring(self, S: str) -> str:
        if len(set(S))==len(S):return ""
        l, r = 0, len(S)
        def good(m):
            seen = set()
            for i in range(m, len(S)+1):
                curr = S[i-m:i]
                if curr in seen:return i
                seen.add(curr)
            return 0
        res = 0    
        while l<r:
            m = (l+r+1)//2
            pos = good(m)
            if pos:
                l = m 
                res = pos
            else:
                r = m-1
        #print(l)
        return S[res-l:res] if l else ""

If we use a rolling hash to speed up, we can get it accepted.

# time complexity O(n log n)# space complexity O(n)
class Solution:
    def longestDupSubstring(self, S: str) -> str:
        A = [ord(c) - ord('a') for c in S]
        mod = (1<<53)
        def test(L):
            p = (26**L)%mod
            cur = functools.reduce(lambda x, y: (x * 26 + y) % mod, A[:L])
            seen = {cur}
            for i in range(L, len(S)):
                cur = (cur * 26 + A[i] - A[i - L] * p) % mod
                if cur in seen: return i - L + 1
                seen.add(cur)
        res, lo, hi = 0, 0, len(S)
        while lo < hi:
            mi = (lo + hi + 1) // 2
            pos = test(mi)
            if pos:
                lo = mi
                res = pos
            else:
                hi = mi - 1
        return S[res:res + lo]

A more plain code without using the functools are here

class Solution:
    def longestDupSubstring(self, S: str) -> str:
        A = [ord(c)-ord('a') for c in S]
        mod = 1<<43
        
        def test(m):
            p = (26**m)%mod
            cur = 0
            for val in A[:m]:
                cur = (cur*26+val)%mod
            seen = {cur}
            for i in range(m, len(S)):
                cur = (cur*26+A[i]-A[i-m]*p)%mod
                if cur in seen:return i
                seen.add(cur)
            return 0
        l, r = 0, len(S)
        res = 0
        while l<r:
            m = (l+r+1)//2
            pos = test(m)
            if pos:
                res = pos
                l = m
            else:
                r = m - 1
        return S[res-l+1:res+1]if l else ''

Be super careful, if we don’t add ‘%mod’ in this line, we will get TLE

p = (26**m)%mod

28. Implement strStr()

Rolling hash problems

Written by Jimmy (xiaoke) Shen

No responses yet