|
| 1 | +/* Rabin Karp Algorithm for pattern searching in C++ |
| 2 | +
|
| 3 | +Rabin-Karp algorithm is an algorithm used for searching/matching patterns in the text using a hash function. |
| 4 | +It does not travel through every character in the initial phase rather it filters the characters that do not match |
| 5 | +and then performs the comparison. |
| 6 | +
|
| 7 | +Working - |
| 8 | +A sequence of characters is taken and checked for the possibility of the presence of the required string. |
| 9 | +If the possibility is found then, character matching is performed. |
| 10 | +f the hash values are unequal, the algorithm will determine the hash value for next plen-character sequence. If the hash values are equal, |
| 11 | +the algorithm will analyze the pattern and the plen-character sequence. |
| 12 | +In this way, there is only one comparison per text subsequence, and character matching is only required when the hash values match. |
| 13 | +
|
| 14 | +Features - |
| 15 | +Like naive algorithm we slide the pattern over the string one by one and compare every character in pattern with the text. |
| 16 | +To reduce the number of comparisons, we use hashing. |
| 17 | +We compare the hash values of the pattern and the current text window if the hash value match, |
| 18 | +then only we proceed to compare individual characters of pattern and the text window. |
| 19 | +To calculate the hash value of the current window we use the concept of rolling hash. |
| 20 | +In rolling hash we compute hash value of current window using the hash value of the previous window. |
| 21 | +
|
| 22 | +Time Complexity - |
| 23 | +Assuming n is length of text and m is length of pattern. |
| 24 | +Its worst-case time is O(nm). |
| 25 | +Worst case of Rabin-Karp algorithm occurs when all characters of pattern and text are same as the hash values |
| 26 | +of all the substrings of text[] match with hash value of pattern[]. |
| 27 | +
|
| 28 | +Example - |
| 29 | +text – “abdabc” |
| 30 | +pattern – “abc” |
| 31 | +
|
| 32 | +a b c d |
| 33 | +1 2 3 4 |
| 34 | +
|
| 35 | +text – “a b d a b c” |
| 36 | +plen = pattern length = 3 |
| 37 | +t = 4 |
| 38 | +
|
| 39 | +hash_0 = 1 * 4^2 + 2 * 4^1 + 4 * 4^0 = 28 |
| 40 | +hash_1 = 4 * {hash_0 – 1 * 4^2} + 1 = 49 |
| 41 | +hash_2 = 4 * {hash_1 – 2 * 4^2} + 2 = 70 |
| 42 | +hash_3 = 4 * {hash_2 – 4 * 4^2} + 3 = 27 |
| 43 | +
|
| 44 | +Hence in general |
| 45 | +hash_i+1 = t * {hash_i – text[i] * t^(plen-1)} + text[i+plen] |
| 46 | +
|
| 47 | +*/ |
| 48 | + |
| 49 | +//Program |
| 50 | + |
| 51 | +#include <bits/stdc++.h> |
| 52 | +using namespace std; |
| 53 | +const int t = 256; //number of characters in the character set |
| 54 | +const int w = 1283; //a prime number (a prime number of find hash location) |
| 55 | +void Rabin_Karp_Algo(string text,string pattern) |
| 56 | +{ |
| 57 | + |
| 58 | + //Length of text string. |
| 59 | + int tlen = text.length(); //length of string |
| 60 | + |
| 61 | + //Length of pattern string. |
| 62 | + int plen = pattern.length(); //length of pattern |
| 63 | + |
| 64 | + int flag; |
| 65 | + int c=1,i=0; |
| 66 | + |
| 67 | + // Calculate the hash value of pattern(hash_p) and first window of text(hash) |
| 68 | + int hash_p=0,hash=0; |
| 69 | + |
| 70 | + //for all character index i of pattern |
| 71 | + for(int i=0;i<plen;i++) |
| 72 | + { |
| 73 | + hash_p=(hash_p*t + pattern[i])%w; //formula to calculate hash function |
| 74 | + hash=(hash*t + text[i])%w; |
| 75 | + |
| 76 | + } |
| 77 | + |
| 78 | + //Calculate (t^(plen-1))%w |
| 79 | + for(int i=1;i<=plen-1;i++) |
| 80 | + c=(c*t)%w; |
| 81 | + |
| 82 | + i=0; |
| 83 | + |
| 84 | + // Slide the pattern over text one by one |
| 85 | + while(i<=tlen-plen) |
| 86 | + { |
| 87 | + // Check the hash values of current window of text and pattern. |
| 88 | + //If the hash values match then only |
| 89 | + // check for characters on by one |
| 90 | + if(hash_p==hash) |
| 91 | + { |
| 92 | + flag=1; |
| 93 | + /* Check for characters one by one */ |
| 94 | + for(int j=0;j<plen;j++) |
| 95 | + { |
| 96 | + // if pattern == text and pattern[0...plen - 1] = text[i, i+1, ...i+tlen - 1] |
| 97 | + if(pattern[j]==text[i+j]) |
| 98 | + { |
| 99 | + continue; |
| 100 | + } |
| 101 | + else{ |
| 102 | + flag = 0; //if flag is zero break the loop |
| 103 | + break; |
| 104 | + } |
| 105 | + } |
| 106 | + if(flag==1) |
| 107 | + cout<<"Index of occurrence is "<<i<<" "; |
| 108 | + } |
| 109 | + |
| 110 | + |
| 111 | + |
| 112 | + // Calculate hash value for next window of text: Remove leading digit, add trailing digit |
| 113 | + if(i<tlen-plen) |
| 114 | + { |
| 115 | + //Calculate hash value of next window |
| 116 | + //hash_i+1 = t * {hash_i - text[i] * t^(plen-1)} + text[i+plen] |
| 117 | + hash=((t*(hash-text[i]*c))+text[i+plen])%w; |
| 118 | + |
| 119 | + // We might get negative value of t, converting it to positive |
| 120 | + if(hash<0){ |
| 121 | + hash = hash + w; |
| 122 | + } |
| 123 | + } |
| 124 | + i++; |
| 125 | + } |
| 126 | +} |
| 127 | + |
| 128 | +int main() |
| 129 | +{ |
| 130 | + string text; //text |
| 131 | + cout<<"Enter the text: "; |
| 132 | + cin>>text; |
| 133 | + string pattern; //pattern to be searched |
| 134 | + cout<<"Enter the pattern: "; |
| 135 | + cin>>pattern; |
| 136 | + Rabin_Karp_Algo(text,pattern); //call to function Rabin_Karp_Algo |
| 137 | + return 0; |
| 138 | +} |
0 commit comments