Skip to content

Commit 3379d2d

Browse files
authored
Merge pull request #428 from dhanashreeg368/Dhanashree
Added Rabin_Karp and KMP Algorithm Programs in Strings Folder
2 parents e4f2f4b + 53a770a commit 3379d2d

File tree

2 files changed

+257
-0
lines changed

2 files changed

+257
-0
lines changed

Strings/KMP_Algorithm.cpp

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/*KMP Algorithm - Pattern Searching Algorithm
2+
3+
KMP Algorithm is also called as Knuth, Morris, and Pratt string searching algorithm
4+
This algorithm uses the previous comparison data.
5+
It uses a partial match table to analyze the pattern structure.
6+
The goal of the table is to allow the algorithm not to match any character of pattern more than once.
7+
The basic idea behind KMP’s algorithm is:
8+
whenever we detect a mismatch (after some matches), we already know some of the characters in the text of the next window.
9+
We take advantage of this information to avoid matching the characters that we know will anyway match.
10+
We need to know about proper prefixes and proper suffixes first.
11+
12+
Proper prefix - All the characters in a string, with one or more cut off the end.
13+
“C”, “Co”, “Cod”, and “Codi” are all the proper prefixes of “Coding”.
14+
15+
Profer suffix - All the characters in a string, with one or more cut off the beginning.
16+
“adrid”, “drid”, “rid”, “id”, and “d” are all proper suffixes of “Madrid”.
17+
18+
The value of the partial table is the "length of the longest proper prefix that matches a proper suffix".
19+
20+
Pseudocode -
21+
if table[partial_match_length] > 1:
22+
skip ahead by partial_match_length - table[partial_match_length - 1] characters
23+
else:
24+
don’t get to skip ahead by partial_match_length - table[partial_match_length - 1] characters. See next partial match.
25+
26+
Let’s say we’re matching the pattern “abababca” against the text “bacbababaabcbab”.
27+
Here’s our partial match table again for easy reference
28+
29+
char: | a | b | a | b | a | b | c | a |
30+
index: | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
31+
value: | 0 | 0 | 1 | 2 | 3 | 4 | 0 | 1 |
32+
33+
Example -
34+
1. The first match is at index 1.
35+
bacbababaabcbab
36+
|
37+
abababca
38+
Hence partial_match_length = 1
39+
See the next partial match and so on.
40+
Repeat the steps till the last partial match is found.
41+
42+
Time Complexity :
43+
Assuming n is the length of text and m is the length of pattern.
44+
It can search for a pattern in O(n) time as it never re-compares a text symbol that has matched a pattern symbol.
45+
Construction of a partial match table takes O(m) time.
46+
Therefore, the overall time complexity of the KMP algorithm is O(m + n).
47+
*/
48+
49+
#include <iostream>
50+
using namespace std;
51+
52+
// Function to implement the KMP algorithm
53+
void KMP(string t, string p)
54+
{
55+
int tlen = t.length(); //tlen is length of text
56+
int plen = p.length(); //plen is length of pattern
57+
58+
// if `text` is an empty string
59+
if (plen == 0)
60+
{
61+
cout << "The pattern at index 0";
62+
return;
63+
}
64+
65+
// if text's length is less than that of pattern's length then print pattern not found.
66+
if (tlen < plen)
67+
{
68+
cout << "Pattern not found";
69+
return;
70+
}
71+
72+
// `next[i]` stores the index of the next best partial match
73+
int next[plen + 1];
74+
75+
for (int i = 0; i < plen + 1; i++) {
76+
next[i] = 0; //initialize all the characters of array as zero.
77+
}
78+
79+
for (int i = 1; i < plen; i++) //iterate through pattern
80+
{
81+
int j = next[i + 1]; //initialise j as next[i+1]
82+
83+
while (j > 0 && p[j] != p[i]) { //while (j>0 and (pattern[j]!=pattern[i] )
84+
j = next[j]; //initialize the value of j as next[j]
85+
}
86+
87+
if (j > 0 || p[j] == p[i]) { //if (j>0 or (the j th and i th index of pattern are equal))
88+
next[i + 1] = j + 1; //move to next position
89+
}
90+
}
91+
92+
for (int i = 0, j = 0; i < tlen; i++) //iterate in text to search for a pattern in text.
93+
{
94+
if (t[i] == p[j]) //if the letter in text matches with the letter in pattern then
95+
{
96+
if (++j == plen) { //move ahead and see if the next letter matches
97+
cout << "The pattern occurs at index " << i - j + 1 << endl;
98+
}
99+
}
100+
else if (j > 0) //else if j>0
101+
{
102+
j = next[j];
103+
i--; // since `i` will be incremented in the next iteration
104+
}
105+
}
106+
}
107+
108+
// Program to implement the KMP algorithm in C++
109+
int main()
110+
{
111+
string text; //text
112+
cout<<"Enter text: ";
113+
cin>>text;
114+
string pattern; //pattern whose index of occurrence is to be searched.
115+
cout<<"Enter pattern: ";
116+
cin>>pattern;
117+
KMP(text, pattern); //call to KMP function
118+
return 0;
119+
}

Strings/Rabin_Karp_Algorithm.cpp

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
/* Rabin Karp Algorithm for pattern searching in C++
2+
3+
Rabin-Karp algorithm is an algorithm used for searching/matching patterns in the text using a hash function.
4+
It does not travel through every character in the initial phase rather it filters the characters that do not match
5+
and then performs the comparison.
6+
7+
Working -
8+
A sequence of characters is taken and checked for the possibility of the presence of the required string.
9+
If the possibility is found then, character matching is performed.
10+
f the hash values are unequal, the algorithm will determine the hash value for next plen-character sequence. If the hash values are equal,
11+
the algorithm will analyze the pattern and the plen-character sequence.
12+
In this way, there is only one comparison per text subsequence, and character matching is only required when the hash values match.
13+
14+
Features -
15+
Like naive algorithm we slide the pattern over the string one by one and compare every character in pattern with the text.
16+
To reduce the number of comparisons, we use hashing.
17+
We compare the hash values of the pattern and the current text window if the hash value match,
18+
then only we proceed to compare individual characters of pattern and the text window.
19+
To calculate the hash value of the current window we use the concept of rolling hash.
20+
In rolling hash we compute hash value of current window using the hash value of the previous window.
21+
22+
Time Complexity -
23+
Assuming n is length of text and m is length of pattern.
24+
Its worst-case time is O(nm).
25+
Worst case of Rabin-Karp algorithm occurs when all characters of pattern and text are same as the hash values
26+
of all the substrings of text[] match with hash value of pattern[].
27+
28+
Example -
29+
text – “abdabc”
30+
pattern – “abc”
31+
32+
a b c d
33+
1 2 3 4
34+
35+
text – “a b d a b c”
36+
plen = pattern length = 3
37+
t = 4
38+
39+
hash_0 = 1 * 4^2 + 2 * 4^1 + 4 * 4^0 = 28
40+
hash_1 = 4 * {hash_0 – 1 * 4^2} + 1 = 49
41+
hash_2 = 4 * {hash_1 – 2 * 4^2} + 2 = 70
42+
hash_3 = 4 * {hash_2 – 4 * 4^2} + 3 = 27
43+
44+
Hence in general
45+
hash_i+1 = t * {hash_i – text[i] * t^(plen-1)} + text[i+plen]
46+
47+
*/
48+
49+
//Program
50+
51+
#include <bits/stdc++.h>
52+
using namespace std;
53+
const int t = 256; //number of characters in the character set
54+
const int w = 1283; //a prime number (a prime number of find hash location)
55+
void Rabin_Karp_Algo(string text,string pattern)
56+
{
57+
58+
//Length of text string.
59+
int tlen = text.length(); //length of string
60+
61+
//Length of pattern string.
62+
int plen = pattern.length(); //length of pattern
63+
64+
int flag;
65+
int c=1,i=0;
66+
67+
// Calculate the hash value of pattern(hash_p) and first window of text(hash)
68+
int hash_p=0,hash=0;
69+
70+
//for all character index i of pattern
71+
for(int i=0;i<plen;i++)
72+
{
73+
hash_p=(hash_p*t + pattern[i])%w; //formula to calculate hash function
74+
hash=(hash*t + text[i])%w;
75+
76+
}
77+
78+
//Calculate (t^(plen-1))%w
79+
for(int i=1;i<=plen-1;i++)
80+
c=(c*t)%w;
81+
82+
i=0;
83+
84+
// Slide the pattern over text one by one
85+
while(i<=tlen-plen)
86+
{
87+
// Check the hash values of current window of text and pattern.
88+
//If the hash values match then only
89+
// check for characters on by one
90+
if(hash_p==hash)
91+
{
92+
flag=1;
93+
/* Check for characters one by one */
94+
for(int j=0;j<plen;j++)
95+
{
96+
// if pattern == text and pattern[0...plen - 1] = text[i, i+1, ...i+tlen - 1]
97+
if(pattern[j]==text[i+j])
98+
{
99+
continue;
100+
}
101+
else{
102+
flag = 0; //if flag is zero break the loop
103+
break;
104+
}
105+
}
106+
if(flag==1)
107+
cout<<"Index of occurrence is "<<i<<" ";
108+
}
109+
110+
111+
112+
// Calculate hash value for next window of text: Remove leading digit, add trailing digit
113+
if(i<tlen-plen)
114+
{
115+
//Calculate hash value of next window
116+
//hash_i+1 = t * {hash_i - text[i] * t^(plen-1)} + text[i+plen]
117+
hash=((t*(hash-text[i]*c))+text[i+plen])%w;
118+
119+
// We might get negative value of t, converting it to positive
120+
if(hash<0){
121+
hash = hash + w;
122+
}
123+
}
124+
i++;
125+
}
126+
}
127+
128+
int main()
129+
{
130+
string text; //text
131+
cout<<"Enter the text: ";
132+
cin>>text;
133+
string pattern; //pattern to be searched
134+
cout<<"Enter the pattern: ";
135+
cin>>pattern;
136+
Rabin_Karp_Algo(text,pattern); //call to function Rabin_Karp_Algo
137+
return 0;
138+
}

0 commit comments

Comments
 (0)