Merge pull request #428 from dhanashreeg368/Dhanashree

smv1999 · web-flow · commit 3379d2d031c3 · 2021-07-13T22:32:29.000+05:30
Added Rabin_Karp and KMP Algorithm Programs in Strings Folder
diff --git a/Strings/KMP_Algorithm.cpp b/Strings/KMP_Algorithm.cpp
@@ -0,0 +1,119 @@
+/*KMP Algorithm - Pattern Searching Algorithm
+
+KMP Algorithm is also called as Knuth, Morris, and Pratt string searching algorithm
+This algorithm uses the previous comparison data.
+It uses a partial match table to analyze the pattern structure. 
+The goal of the table is to allow the algorithm not to match any character of pattern more than once. 
+The basic idea behind KMP’s algorithm is: 
+whenever we detect a mismatch (after some matches), we already know some of the characters in the text of the next window.
+We take advantage of this information to avoid matching the characters that we know will anyway match. 
+We need to know about proper prefixes and proper suffixes first.
+
+Proper prefix - All the characters in a string, with one or more cut off the end. 
+“C”, “Co”, “Cod”, and “Codi” are all the proper prefixes of “Coding”.
+
+Profer suffix - All the characters in a string, with one or more cut off the beginning. 
+“adrid”, “drid”, “rid”, “id”, and “d” are all proper suffixes of “Madrid”.
+
+The value of the partial table is the "length of the longest proper prefix that matches a proper suffix".
+
+Pseudocode - 
+if table[partial_match_length] > 1:
+    skip ahead by partial_match_length - table[partial_match_length - 1] characters
+else:
+    don’t get to skip ahead by partial_match_length - table[partial_match_length - 1] characters. See next partial match.
+
+Let’s say we’re matching the pattern “abababca” against the text “bacbababaabcbab”.
+Here’s our partial match table again for easy reference
+
+char:  | a | b | a | b | a | b | c | a |
+index: | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 
+value: | 0 | 0 | 1 | 2 | 3 | 4 | 0 | 1 |
+
+Example - 
+1. The first match is at index 1.
+bacbababaabcbab
+ |
+ abababca
+Hence partial_match_length = 1
+See the next partial match and so on.
+Repeat the steps till the last partial match is found.
+
+Time Complexity :
+Assuming n is the length of text and m is the length of pattern.
+It can search for a pattern in O(n) time as it never re-compares a text symbol that has matched a pattern symbol. 
+Construction of a partial match table takes O(m) time. 
+Therefore, the overall time complexity of the KMP algorithm is O(m + n).
+*/
+
+#include <iostream>
+using namespace std;
+
+// Function to implement the KMP algorithm
+void KMP(string t, string p)
+{
+    int tlen = t.length();                       //tlen is length of text
+    int plen = p.length();                       //plen is length of pattern
+
+    // if `text` is an empty string
+    if (plen == 0)
+    {
+        cout << "The pattern at index 0";
+        return;
+    }
+
+    // if text's length is less than that of pattern's length then print pattern not found.
+    if (tlen < plen)
+    {
+        cout << "Pattern not found";
+        return;
+    }
+
+    // `next[i]` stores the index of the next best partial match
+    int next[plen + 1];
+
+    for (int i = 0; i < plen + 1; i++) {
+        next[i] = 0;          //initialize all the characters of array as zero.
+    }
+
+    for (int i = 1; i < plen; i++)     //iterate through pattern 
+    {
+        int j = next[i + 1];           //initialise j as next[i+1]
+
+        while (j > 0 && p[j] != p[i]) {     //while (j>0 and (pattern[j]!=pattern[i] )
+            j = next[j];              //initialize the value of j as next[j]
+        }
+
+        if (j > 0 || p[j] == p[i]) {       //if (j>0 or (the j th and i th index of pattern are equal))
+            next[i + 1] = j + 1;          //move to next position
+        }
+    }
+
+    for (int i = 0, j = 0; i < tlen; i++)       //iterate in text to search for a pattern in text.
+    {
+        if (t[i] == p[j])                      //if the letter in text matches with the letter in pattern then
+        {
+            if (++j == plen) {                 //move ahead and see if the next letter matches 
+                cout << "The pattern occurs at index " << i - j + 1 << endl;
+            }
+        }
+        else if (j > 0)                       //else if j>0
+        {
+            j = next[j];                     
+            i--;    // since `i` will be incremented in the next iteration
+        }
+    }
+}
+
+// Program to implement the KMP algorithm in C++
+int main()
+{
+    string text;                  //text 
+    cout<<"Enter text: ";
+    cin>>text;
+    string pattern;               //pattern whose index of occurrence is to be searched. 
+    cout<<"Enter pattern: ";
+    cin>>pattern;
+    KMP(text, pattern);           //call to KMP function
+    return 0;
+}
diff --git a/Strings/Rabin_Karp_Algorithm.cpp b/Strings/Rabin_Karp_Algorithm.cpp
@@ -0,0 +1,138 @@
+/* Rabin Karp Algorithm for pattern searching in C++
+
+Rabin-Karp algorithm is an algorithm used for searching/matching patterns in the text using a hash function. 
+It does not travel through every character in the initial phase rather it filters the characters that do not match
+and then performs the comparison.
+
+Working - 
+A sequence of characters is taken and checked for the possibility of the presence of the required string. 
+If the possibility is found then, character matching is performed.
+f the hash values are unequal, the algorithm will determine the hash value for next plen-character sequence. If the hash values are equal, 
+the algorithm will analyze the pattern and the plen-character sequence. 
+In this way, there is only one comparison per text subsequence, and character matching is only required when the hash values match.
+
+Features - 
+Like naive algorithm we slide the pattern over the string one by one and compare every character in pattern with the text.
+To reduce the number of comparisons, we use hashing. 
+We compare the hash values of the pattern and the current text window if the hash value match, 
+then only we proceed to compare individual characters of pattern and the text window.
+To calculate the hash value of the current window we use the concept of rolling hash. 
+In rolling hash we compute hash value of current window using the hash value of the previous window.
+
+Time Complexity - 
+Assuming n is length of text and m is length of pattern.
+Its worst-case time is O(nm). 
+Worst case of Rabin-Karp algorithm occurs when all characters of pattern and text are same as the hash values
+of all the substrings of text[] match with hash value of pattern[].
+
+Example - 
+text – “abdabc”
+pattern – “abc”
+
+a b c d 
+1 2 3 4 
+
+text – “a  b  d  a  b  c”
+plen = pattern length = 3
+t  =  4
+
+hash_0  =  1 * 4^2  +  2 * 4^1  +  4 * 4^0  =  28
+hash_1  =  4 * {hash_0 – 1 * 4^2} + 1  =  49
+hash_2  =  4 * {hash_1 – 2 * 4^2} + 2  =  70
+hash_3  =  4 * {hash_2 – 4 * 4^2} + 3  =  27
+
+Hence in general
+hash_i+1  =  t * {hash_i  –  text[i] * t^(plen-1)} + text[i+plen]
+
+*/
+
+//Program
+
+#include <bits/stdc++.h>
+using namespace std;
+const int t = 256;                    //number of characters in the character set
+const int w = 1283;                   //a prime number (a prime number of find hash location)
+void Rabin_Karp_Algo(string text,string pattern)
+{
+
+    //Length of text string.
+    int tlen = text.length();       //length of string
+
+    //Length of pattern string.
+    int plen = pattern.length();    //length of pattern
+
+    int flag;
+    int c=1,i=0;
+
+    // Calculate the hash value of pattern(hash_p) and first window of text(hash)
+    int hash_p=0,hash=0; 
+
+    //for all character index i of pattern 
+    for(int i=0;i<plen;i++)
+    {
+        hash_p=(hash_p*t + pattern[i])%w;     //formula to calculate hash function 
+        hash=(hash*t + text[i])%w;
+
+    }
+
+     //Calculate (t^(plen-1))%w
+    for(int i=1;i<=plen-1;i++)
+        c=(c*t)%w;
+
+    i=0;
+
+    // Slide the pattern over text one by one
+    while(i<=tlen-plen)
+    {
+       // Check the hash values of current window of text and pattern. 
+       //If the hash values match then only
+       // check for characters on by one
+       if(hash_p==hash)
+       {
+            flag=1;
+            /* Check for characters one by one */
+            for(int j=0;j<plen;j++)
+            {
+                // if pattern == text and pattern[0...plen - 1] = text[i, i+1, ...i+tlen - 1]
+                if(pattern[j]==text[i+j])
+                {
+                    continue;
+                }
+                else{
+                    flag = 0;   //if flag is zero break the loop
+                    break;
+                }
+           }
+            if(flag==1)
+            cout<<"Index of occurrence is "<<i<<" ";
+       }
+
+
+       
+       // Calculate hash value for next window of text: Remove leading digit, add trailing digit
+       if(i<tlen-plen)
+       {
+           //Calculate hash value of next window
+           //hash_i+1 = t * {hash_i - text[i] * t^(plen-1)} + text[i+plen]
+           hash=((t*(hash-text[i]*c))+text[i+plen])%w;
+
+           // We might get negative value of t, converting it to positive 
+            if(hash<0){
+                hash = hash + w;
+            }
+       }
+       i++;
+    }
+}
+
+int main()
+{
+    string text;                         //text
+    cout<<"Enter the text: ";
+    cin>>text;
+    string pattern;                      //pattern to be searched
+    cout<<"Enter the pattern: ";
+    cin>>pattern;
+    Rabin_Karp_Algo(text,pattern);         //call to function Rabin_Karp_Algo
+    return 0;
+}