Merge pull request #696 from dhanashreeg368/Dhanashree

smv1999 · web-flow · commit 14bec1281ec2 · 2021-07-22T20:30:07.000+05:30
added python programs for rabinkarp and KMP
diff --git a/Strings/KMP_Algorithm.py b/Strings/KMP_Algorithm.py
@@ -0,0 +1,101 @@
+"""KMP Algorithm - Pattern Searching Algorithm
+
+KMP Algorithm is also called as Knuth, Morris, and Pratt string searching algorithm
+This algorithm uses the previous comparison data.
+It uses a partial match table to analyze the pattern structure. 
+The goal of the table is to allow the algorithm not to match any character of pattern more than once. 
+The basic idea behind KMP’s algorithm is: 
+whenever we detect a mismatch (after some matches), we already know some of the characters in the text of the next window.
+We take advantage of this information to avoid matching the characters that we know will anyway match. 
+We need to know about proper prefixes and proper suffixes first.
+
+Proper prefix - All the characters in a string, with one or more cut off the end. 
+“C”, “Co”, “Cod”, and “Codi” are all the proper prefixes of “Coding”.
+
+Profer suffix - All the characters in a string, with one or more cut off the beginning. 
+“adrid”, “drid”, “rid”, “id”, and “d” are all proper suffixes of “Madrid”.
+
+The value of the partial table is the "length of the longest proper prefix that matches a proper suffix".
+
+Pseudocode - 
+if table[partial_match_length] > 1:
+    skip ahead by partial_match_length - table[partial_match_length - 1] characters
+else:
+    don’t get to skip ahead by partial_match_length - table[partial_match_length - 1] characters. See next partial match.
+
+Let’s say we’re matching the pattern “abababca” against the text “bacbababaabcbab”.
+Here’s our partial match table again for easy reference
+
+char:  | a | b | a | b | a | b | c | a |
+index: | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 
+value: | 0 | 0 | 1 | 2 | 3 | 4 | 0 | 1 |
+
+Example - 
+1. The first match is at index 1.
+bacbababaabcbab
+ |
+ abababca
+Hence partial_match_length = 1
+See the next partial match and so on.
+Repeat the steps till the last partial match is found.
+
+Time Complexity :
+Assuming n is the length of text and m is the length of pattern.
+It can search for a pattern in O(n) time as it never re-compares a text symbol that has matched a pattern symbol. 
+Construction of a partial match table takes O(m) time. 
+Therefore, the overall time complexity of the KMP algorithm is O(m + n).
+"""
+
+# Python program for KMP Algorithm
+def KMPSearch(pat, txt):
+	plen = len(pat)
+	tlen = len(txt)
+
+	# create lps[] that will hold the longest prefix suffix
+	# values for pattern
+	lps = [0]*plen
+	j = 0 # index for pat[]
+
+	alen = 0 # length of the previous longest prefix suffix
+
+	lps[0] # lps[0] is always 0
+	i = 1
+
+	# the loop calculates lps[i] for i = 1 to M-1
+	while i < plen:
+		if pat[i]== pat[alen]:
+			alen += 1
+			lps[i] = alen
+			i += 1
+		else:
+			if len != 0:
+				len = lps[alen-1]
+				# Also, note that we do not increment i here
+			else:
+				lps[i] = 0
+				i += 1
+
+	i = 0 # index for txt[]
+	while i < tlen:
+		if pat[j] == txt[i]:
+			i += 1
+			j += 1
+
+		if j == plen:
+			print ("Found pattern at index " + str(i-j))
+			j = lps[j-1]
+
+		# mismatch after j matches
+		elif i < tlen and pat[j] != txt[i]:
+			# Do not match lps[0..lps[j-1]] characters,
+			# they will match anyway
+			if j != 0:
+				j = lps[j-1]
+			else:
+				i += 1	
+
+print("enter text: ")
+txt=input()
+print("enter pattern; ")
+pat=input()
+KMPSearch(pat, txt)
diff --git a/Strings/Rabin_Karp.py b/Strings/Rabin_Karp.py
@@ -0,0 +1,99 @@
+""" Rabin Karp Algorithm for pattern searching in Python
+
+Rabin-Karp algorithm is an algorithm used for searching/matching patterns in the text using a hash function. 
+It does not travel through every character in the initial phase rather it filters the characters that do not match
+and then performs the comparison.
+
+Working - 
+A sequence of characters is taken and checked for the possibility of the presence of the required string. 
+If the possibility is found then, character matching is performed.
+f the hash values are unequal, the algorithm will determine the hash value for next plen-character sequence. If the hash values are equal, 
+the algorithm will analyze the pattern and the plen-character sequence. 
+In this way, there is only one comparison per text subsequence, and character matching is only required when the hash values match.
+
+Features - 
+Like naive algorithm we slide the pattern over the string one by one and compare every character in pattern with the text.
+To reduce the number of comparisons, we use hashing. 
+We compare the hash values of the pattern and the current text window if the hash value match, 
+then only we proceed to compare individual characters of pattern and the text window.
+To calculate the hash value of the current window we use the concept of rolling hash. 
+In rolling hash we compute hash value of current window using the hash value of the previous window.
+
+Time Complexity - 
+Assuming n is length of text and m is length of pattern.
+Its worst-case time is O(nm). 
+Worst case of Rabin-Karp algorithm occurs when all characters of pattern and text are same as the hash values
+of all the substrings of text[] match with hash value of pattern[].
+
+Example - 
+text – “abdabc”
+pattern – “abc”
+
+a b c d 
+1 2 3 4 
+
+text – “a  b  d  a  b  c”
+plen = pattern length = 3
+t  =  4
+
+hash_0  =  1 * 4^2  +  2 * 4^1  +  4 * 4^0  =  28
+hash_1  =  4 * {hash_0 – 1 * 4^2} + 1  =  49
+hash_2  =  4 * {hash_1 – 2 * 4^2} + 2  =  70
+hash_3  =  4 * {hash_2 – 4 * 4^2} + 3  =  27
+
+Hence in general
+hash_i+1  =  t * {hash_i  –  text[i] * t^(plen-1)} + text[i+plen]
+
+"""
+
+#Program
+
+
+d = 10
+
+def Rabin_Karp(pattern, text, q):
+    m = len(pattern)    #len of pattern
+    n = len(text)       #len of text
+    p = 0
+    t = 0
+    h = 1
+    i = 0
+    j = 0
+
+    for i in range(m-1):
+        h = (h*d) % q
+
+    # Calculate hash value for pattern and text
+    for i in range(m):
+        p = (d*p + ord(pattern[i])) % q    #formula to calculate hash func for pattern
+        t = (d*t + ord(text[i])) % q       #formula to calculate hash func for text
+
+    # Find the match
+    for i in range(n-m+1):
+        if p == t:
+            #Check for characters one by one 
+            for j in range(m):
+                if text[i+j] != pattern[j]:
+                    break
+
+            j += 1
+            if j == m:
+                print("Pattern is found at position: " + str(i+1))
+
+        #Calculate hash value for next window of text: Remove leading digit, add trailing digit
+        if i < n-m:
+            #Calculate hash value of next window
+           #t = (d*(t-ord(text[i])*h) + ord(text[i+m])) % q
+            t = (d*(t-ord(text[i])*h) + ord(text[i+m])) % q
+
+            # We might get negative value of t, converting it to positive 
+            if t < 0:
+                t = t+q
+
+
+print("Enter text: ")           
+text=input()                    #input text
+print("Enter pattern: ")
+pattern=input()                 #input pattern
+q = len(text)+len(pattern)
+Rabin_Karp(pattern, text, q)