Skip to content

Commit 53a770a

Browse files
added comments for Rabin_Karp and KMP Algorithm
1 parent a41e2e9 commit 53a770a

File tree

2 files changed

+151
-37
lines changed

2 files changed

+151
-37
lines changed

Strings/KMP_Algorithm.cpp

Lines changed: 65 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,59 @@
1-
//KMP Algorithm - Pattern Searching Algorithm
1+
/*KMP Algorithm - Pattern Searching Algorithm
2+
3+
KMP Algorithm is also called as Knuth, Morris, and Pratt string searching algorithm
4+
This algorithm uses the previous comparison data.
5+
It uses a partial match table to analyze the pattern structure.
6+
The goal of the table is to allow the algorithm not to match any character of pattern more than once.
7+
The basic idea behind KMP’s algorithm is:
8+
whenever we detect a mismatch (after some matches), we already know some of the characters in the text of the next window.
9+
We take advantage of this information to avoid matching the characters that we know will anyway match.
10+
We need to know about proper prefixes and proper suffixes first.
11+
12+
Proper prefix - All the characters in a string, with one or more cut off the end.
13+
“C”, “Co”, “Cod”, and “Codi” are all the proper prefixes of “Coding”.
14+
15+
Profer suffix - All the characters in a string, with one or more cut off the beginning.
16+
“adrid”, “drid”, “rid”, “id”, and “d” are all proper suffixes of “Madrid”.
17+
18+
The value of the partial table is the "length of the longest proper prefix that matches a proper suffix".
19+
20+
Pseudocode -
21+
if table[partial_match_length] > 1:
22+
skip ahead by partial_match_length - table[partial_match_length - 1] characters
23+
else:
24+
don’t get to skip ahead by partial_match_length - table[partial_match_length - 1] characters. See next partial match.
25+
26+
Let’s say we’re matching the pattern “abababca” against the text “bacbababaabcbab”.
27+
Here’s our partial match table again for easy reference
28+
29+
char: | a | b | a | b | a | b | c | a |
30+
index: | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
31+
value: | 0 | 0 | 1 | 2 | 3 | 4 | 0 | 1 |
32+
33+
Example -
34+
1. The first match is at index 1.
35+
bacbababaabcbab
36+
|
37+
abababca
38+
Hence partial_match_length = 1
39+
See the next partial match and so on.
40+
Repeat the steps till the last partial match is found.
41+
42+
Time Complexity :
43+
Assuming n is the length of text and m is the length of pattern.
44+
It can search for a pattern in O(n) time as it never re-compares a text symbol that has matched a pattern symbol.
45+
Construction of a partial match table takes O(m) time.
46+
Therefore, the overall time complexity of the KMP algorithm is O(m + n).
47+
*/
248

349
#include <iostream>
450
using namespace std;
551

652
// Function to implement the KMP algorithm
753
void KMP(string t, string p)
854
{
9-
int tlen = t.length();
10-
int plen = p.length();
55+
int tlen = t.length(); //tlen is length of text
56+
int plen = p.length(); //plen is length of pattern
1157

1258
// if `text` is an empty string
1359
if (plen == 0)
@@ -16,7 +62,7 @@ void KMP(string t, string p)
1662
return;
1763
}
1864

19-
// if X's length is less than that of Y's
65+
// if text's length is less than that of pattern's length then print pattern not found.
2066
if (tlen < plen)
2167
{
2268
cout << "Pattern not found";
@@ -27,33 +73,33 @@ void KMP(string t, string p)
2773
int next[plen + 1];
2874

2975
for (int i = 0; i < plen + 1; i++) {
30-
next[i] = 0;
76+
next[i] = 0; //initialize all the characters of array as zero.
3177
}
3278

33-
for (int i = 1; i < plen; i++)
79+
for (int i = 1; i < plen; i++) //iterate through pattern
3480
{
35-
int j = next[i + 1];
81+
int j = next[i + 1]; //initialise j as next[i+1]
3682

37-
while (j > 0 && p[j] != p[i]) {
38-
j = next[j];
83+
while (j > 0 && p[j] != p[i]) { //while (j>0 and (pattern[j]!=pattern[i] )
84+
j = next[j]; //initialize the value of j as next[j]
3985
}
4086

41-
if (j > 0 || p[j] == p[i]) {
42-
next[i + 1] = j + 1;
87+
if (j > 0 || p[j] == p[i]) { //if (j>0 or (the j th and i th index of pattern are equal))
88+
next[i + 1] = j + 1; //move to next position
4389
}
4490
}
4591

46-
for (int i = 0, j = 0; i < tlen; i++)
92+
for (int i = 0, j = 0; i < tlen; i++) //iterate in text to search for a pattern in text.
4793
{
48-
if (t[i] == p[j])
94+
if (t[i] == p[j]) //if the letter in text matches with the letter in pattern then
4995
{
50-
if (++j == plen) {
96+
if (++j == plen) { //move ahead and see if the next letter matches
5197
cout << "The pattern occurs at index " << i - j + 1 << endl;
5298
}
5399
}
54-
else if (j > 0)
100+
else if (j > 0) //else if j>0
55101
{
56-
j = next[j];
102+
j = next[j];
57103
i--; // since `i` will be incremented in the next iteration
58104
}
59105
}
@@ -62,12 +108,12 @@ void KMP(string t, string p)
62108
// Program to implement the KMP algorithm in C++
63109
int main()
64110
{
65-
string text;
111+
string text; //text
66112
cout<<"Enter text: ";
67113
cin>>text;
68-
string pattern;
114+
string pattern; //pattern whose index of occurrence is to be searched.
69115
cout<<"Enter pattern: ";
70116
cin>>pattern;
71-
KMP(text, pattern);
117+
KMP(text, pattern); //call to KMP function
72118
return 0;
73119
}

Strings/Rabin_Karp_Algorithm.cpp

Lines changed: 86 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,76 @@
1+
/* Rabin Karp Algorithm for pattern searching in C++
2+
3+
Rabin-Karp algorithm is an algorithm used for searching/matching patterns in the text using a hash function.
4+
It does not travel through every character in the initial phase rather it filters the characters that do not match
5+
and then performs the comparison.
6+
7+
Working -
8+
A sequence of characters is taken and checked for the possibility of the presence of the required string.
9+
If the possibility is found then, character matching is performed.
10+
f the hash values are unequal, the algorithm will determine the hash value for next plen-character sequence. If the hash values are equal,
11+
the algorithm will analyze the pattern and the plen-character sequence.
12+
In this way, there is only one comparison per text subsequence, and character matching is only required when the hash values match.
13+
14+
Features -
15+
Like naive algorithm we slide the pattern over the string one by one and compare every character in pattern with the text.
16+
To reduce the number of comparisons, we use hashing.
17+
We compare the hash values of the pattern and the current text window if the hash value match,
18+
then only we proceed to compare individual characters of pattern and the text window.
19+
To calculate the hash value of the current window we use the concept of rolling hash.
20+
In rolling hash we compute hash value of current window using the hash value of the previous window.
21+
22+
Time Complexity -
23+
Assuming n is length of text and m is length of pattern.
24+
Its worst-case time is O(nm).
25+
Worst case of Rabin-Karp algorithm occurs when all characters of pattern and text are same as the hash values
26+
of all the substrings of text[] match with hash value of pattern[].
27+
28+
Example -
29+
text – “abdabc”
30+
pattern – “abc”
31+
32+
a b c d
33+
1 2 3 4
34+
35+
text – “a b d a b c”
36+
plen = pattern length = 3
37+
t = 4
38+
39+
hash_0 = 1 * 4^2 + 2 * 4^1 + 4 * 4^0 = 28
40+
hash_1 = 4 * {hash_0 – 1 * 4^2} + 1 = 49
41+
hash_2 = 4 * {hash_1 – 2 * 4^2} + 2 = 70
42+
hash_3 = 4 * {hash_2 – 4 * 4^2} + 3 = 27
43+
44+
Hence in general
45+
hash_i+1 = t * {hash_i – text[i] * t^(plen-1)} + text[i+plen]
46+
47+
*/
48+
49+
//Program
50+
151
#include <bits/stdc++.h>
252
using namespace std;
3-
const int t = 256;
4-
const int w = 1283;
53+
const int t = 256; //number of characters in the character set
54+
const int w = 1283; //a prime number (a prime number of find hash location)
555
void Rabin_Karp_Algo(string text,string pattern)
656
{
757

858
//Length of text string.
9-
int tlen = text.length();
59+
int tlen = text.length(); //length of string
1060

1161
//Length of pattern string.
12-
int plen = pattern.length();
62+
int plen = pattern.length(); //length of pattern
1363

1464
int flag;
1565
int c=1,i=0;
1666

17-
//Calculate hash_p(hash value of pattern) and hash_0
18-
int hash_p=0,hash=0;
67+
// Calculate the hash value of pattern(hash_p) and first window of text(hash)
68+
int hash_p=0,hash=0;
69+
70+
//for all character index i of pattern
1971
for(int i=0;i<plen;i++)
2072
{
21-
hash_p=(hash_p*t + pattern[i])%w;
73+
hash_p=(hash_p*t + pattern[i])%w; //formula to calculate hash function
2274
hash=(hash*t + text[i])%w;
2375

2476
}
@@ -28,15 +80,26 @@ void Rabin_Karp_Algo(string text,string pattern)
2880
c=(c*t)%w;
2981

3082
i=0;
31-
while(i<=tlen-plen){
32-
if(hash_p==hash){
83+
84+
// Slide the pattern over text one by one
85+
while(i<=tlen-plen)
86+
{
87+
// Check the hash values of current window of text and pattern.
88+
//If the hash values match then only
89+
// check for characters on by one
90+
if(hash_p==hash)
91+
{
3392
flag=1;
34-
for(int j=0;j<plen;j++){
35-
if(pattern[j]==text[i+j]){
93+
/* Check for characters one by one */
94+
for(int j=0;j<plen;j++)
95+
{
96+
// if pattern == text and pattern[0...plen - 1] = text[i, i+1, ...i+tlen - 1]
97+
if(pattern[j]==text[i+j])
98+
{
3699
continue;
37100
}
38101
else{
39-
flag = 0;
102+
flag = 0; //if flag is zero break the loop
40103
break;
41104
}
42105
}
@@ -45,10 +108,15 @@ void Rabin_Karp_Algo(string text,string pattern)
45108
}
46109

47110

48-
//Calculate hash value of next window
49-
//hash_i+1 = t * {hash_i - text[i] * t^(plen-1)} + text[i+plen]
50-
if(i<tlen-plen){
111+
112+
// Calculate hash value for next window of text: Remove leading digit, add trailing digit
113+
if(i<tlen-plen)
114+
{
115+
//Calculate hash value of next window
116+
//hash_i+1 = t * {hash_i - text[i] * t^(plen-1)} + text[i+plen]
51117
hash=((t*(hash-text[i]*c))+text[i+plen])%w;
118+
119+
// We might get negative value of t, converting it to positive
52120
if(hash<0){
53121
hash = hash + w;
54122
}
@@ -59,12 +127,12 @@ void Rabin_Karp_Algo(string text,string pattern)
59127

60128
int main()
61129
{
62-
string text;
130+
string text; //text
63131
cout<<"Enter the text: ";
64132
cin>>text;
65-
string pattern;
133+
string pattern; //pattern to be searched
66134
cout<<"Enter the pattern: ";
67135
cin>>pattern;
68-
Rabin_Karp_Algo(text,pattern);
136+
Rabin_Karp_Algo(text,pattern); //call to function Rabin_Karp_Algo
69137
return 0;
70138
}

0 commit comments

Comments
 (0)