1
+ /* Rabin Karp Algorithm for pattern searching in C++
2
+
3
+ Rabin-Karp algorithm is an algorithm used for searching/matching patterns in the text using a hash function.
4
+ It does not travel through every character in the initial phase rather it filters the characters that do not match
5
+ and then performs the comparison.
6
+
7
+ Working -
8
+ A sequence of characters is taken and checked for the possibility of the presence of the required string.
9
+ If the possibility is found then, character matching is performed.
10
+ f the hash values are unequal, the algorithm will determine the hash value for next plen-character sequence. If the hash values are equal,
11
+ the algorithm will analyze the pattern and the plen-character sequence.
12
+ In this way, there is only one comparison per text subsequence, and character matching is only required when the hash values match.
13
+
14
+ Features -
15
+ Like naive algorithm we slide the pattern over the string one by one and compare every character in pattern with the text.
16
+ To reduce the number of comparisons, we use hashing.
17
+ We compare the hash values of the pattern and the current text window if the hash value match,
18
+ then only we proceed to compare individual characters of pattern and the text window.
19
+ To calculate the hash value of the current window we use the concept of rolling hash.
20
+ In rolling hash we compute hash value of current window using the hash value of the previous window.
21
+
22
+ Time Complexity -
23
+ Assuming n is length of text and m is length of pattern.
24
+ Its worst-case time is O(nm).
25
+ Worst case of Rabin-Karp algorithm occurs when all characters of pattern and text are same as the hash values
26
+ of all the substrings of text[] match with hash value of pattern[].
27
+
28
+ Example -
29
+ text – “abdabc”
30
+ pattern – “abc”
31
+
32
+ a b c d
33
+ 1 2 3 4
34
+
35
+ text – “a b d a b c”
36
+ plen = pattern length = 3
37
+ t = 4
38
+
39
+ hash_0 = 1 * 4^2 + 2 * 4^1 + 4 * 4^0 = 28
40
+ hash_1 = 4 * {hash_0 – 1 * 4^2} + 1 = 49
41
+ hash_2 = 4 * {hash_1 – 2 * 4^2} + 2 = 70
42
+ hash_3 = 4 * {hash_2 – 4 * 4^2} + 3 = 27
43
+
44
+ Hence in general
45
+ hash_i+1 = t * {hash_i – text[i] * t^(plen-1)} + text[i+plen]
46
+
47
+ */
48
+
49
+ // Program
50
+
1
51
#include < bits/stdc++.h>
2
52
using namespace std ;
3
- const int t = 256 ;
4
- const int w = 1283 ;
53
+ const int t = 256 ; // number of characters in the character set
54
+ const int w = 1283 ; // a prime number (a prime number of find hash location)
5
55
void Rabin_Karp_Algo (string text,string pattern)
6
56
{
7
57
8
58
// Length of text string.
9
- int tlen = text.length ();
59
+ int tlen = text.length (); // length of string
10
60
11
61
// Length of pattern string.
12
- int plen = pattern.length ();
62
+ int plen = pattern.length (); // length of pattern
13
63
14
64
int flag;
15
65
int c=1 ,i=0 ;
16
66
17
- // Calculate hash_p(hash value of pattern) and hash_0
18
- int hash_p=0 ,hash=0 ;
67
+ // Calculate the hash value of pattern(hash_p) and first window of text(hash)
68
+ int hash_p=0 ,hash=0 ;
69
+
70
+ // for all character index i of pattern
19
71
for (int i=0 ;i<plen;i++)
20
72
{
21
- hash_p=(hash_p*t + pattern[i])%w;
73
+ hash_p=(hash_p*t + pattern[i])%w; // formula to calculate hash function
22
74
hash=(hash*t + text[i])%w;
23
75
24
76
}
@@ -28,15 +80,26 @@ void Rabin_Karp_Algo(string text,string pattern)
28
80
c=(c*t)%w;
29
81
30
82
i=0 ;
31
- while (i<=tlen-plen){
32
- if (hash_p==hash){
83
+
84
+ // Slide the pattern over text one by one
85
+ while (i<=tlen-plen)
86
+ {
87
+ // Check the hash values of current window of text and pattern.
88
+ // If the hash values match then only
89
+ // check for characters on by one
90
+ if (hash_p==hash)
91
+ {
33
92
flag=1 ;
34
- for (int j=0 ;j<plen;j++){
35
- if (pattern[j]==text[i+j]){
93
+ /* Check for characters one by one */
94
+ for (int j=0 ;j<plen;j++)
95
+ {
96
+ // if pattern == text and pattern[0...plen - 1] = text[i, i+1, ...i+tlen - 1]
97
+ if (pattern[j]==text[i+j])
98
+ {
36
99
continue ;
37
100
}
38
101
else {
39
- flag = 0 ;
102
+ flag = 0 ; // if flag is zero break the loop
40
103
break ;
41
104
}
42
105
}
@@ -45,10 +108,15 @@ void Rabin_Karp_Algo(string text,string pattern)
45
108
}
46
109
47
110
48
- // Calculate hash value of next window
49
- // hash_i+1 = t * {hash_i - text[i] * t^(plen-1)} + text[i+plen]
50
- if (i<tlen-plen){
111
+
112
+ // Calculate hash value for next window of text: Remove leading digit, add trailing digit
113
+ if (i<tlen-plen)
114
+ {
115
+ // Calculate hash value of next window
116
+ // hash_i+1 = t * {hash_i - text[i] * t^(plen-1)} + text[i+plen]
51
117
hash=((t*(hash-text[i]*c))+text[i+plen])%w;
118
+
119
+ // We might get negative value of t, converting it to positive
52
120
if (hash<0 ){
53
121
hash = hash + w;
54
122
}
@@ -59,12 +127,12 @@ void Rabin_Karp_Algo(string text,string pattern)
59
127
60
128
int main ()
61
129
{
62
- string text;
130
+ string text; // text
63
131
cout<<" Enter the text: " ;
64
132
cin>>text;
65
- string pattern;
133
+ string pattern; // pattern to be searched
66
134
cout<<" Enter the pattern: " ;
67
135
cin>>pattern;
68
- Rabin_Karp_Algo (text,pattern);
136
+ Rabin_Karp_Algo (text,pattern); // call to function Rabin_Karp_Algo
69
137
return 0 ;
70
138
}
0 commit comments