Skip to content

Commit aafe3c2

Browse files
authored
Implement Boyer–Moore string-search algorithm (#519)
1 parent 8c41795 commit aafe3c2

File tree

2 files changed

+41
-0
lines changed

2 files changed

+41
-0
lines changed

pydatastructs/strings/algorithms.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ def find(text, query, algorithm, **kwargs):
2929
'kmp' -> Knuth-Morris-Pratt as given in [1].
3030
3131
'rabin_karp' -> Rabin–Karp algorithm as given in [2].
32+
33+
'boyer_moore' -> Boyer-Moore algorithm as given in [3].
3234
backend: pydatastructs.Backend
3335
The backend to be used.
3436
Optional, by default, the best available
@@ -64,6 +66,7 @@ def find(text, query, algorithm, **kwargs):
6466
6567
.. [1] https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm
6668
.. [2] https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
69+
.. [3] https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm
6770
"""
6871
raise_if_backend_is_not_python(
6972
find, kwargs.get('backend', Backend.PYTHON))
@@ -158,3 +161,38 @@ def _rabin_karp(text, query):
158161
positions.append(i)
159162

160163
return positions
164+
165+
def _boyer_moore(text, query):
166+
positions = DynamicOneDimensionalArray(int, 0)
167+
text_length, query_length = len(text), len(query)
168+
169+
if text_length == 0 or query_length == 0:
170+
return positions
171+
172+
# Preprocessing Step
173+
bad_match_table = dict()
174+
for i in range(query_length):
175+
bad_match_table[query[i]] = i
176+
177+
shift = 0
178+
# Matching procedure
179+
while shift <= text_length-query_length:
180+
j = query_length - 1
181+
while j >= 0 and query[j] == text[shift + j]:
182+
j -= 1
183+
if j < 0:
184+
positions.append(shift)
185+
if shift + query_length < text_length:
186+
if text[shift + query_length] in bad_match_table:
187+
shift += query_length - bad_match_table[text[shift + query_length]]
188+
else:
189+
shift += query_length + 1
190+
else:
191+
shift += 1
192+
else:
193+
letter_pos = text[shift + j]
194+
if letter_pos in bad_match_table:
195+
shift += max(1, j - bad_match_table[letter_pos])
196+
else:
197+
shift += max(1, j + 1)
198+
return positions

pydatastructs/strings/tests/test_algorithms.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ def test_kmp():
88
def test_rka():
99
_test_common_string_matching('rabin_karp')
1010

11+
def test_bm():
12+
_test_common_string_matching('boyer_moore')
13+
1114
def _test_common_string_matching(algorithm):
1215
true_text_pattern_dictionary = {
1316
"Knuth-Morris-Pratt": "-Morris-",

0 commit comments

Comments
 (0)