Skip to content

Bitap Algorithm for Exact String Matching #599

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/source/pydatastructs/strings/algorithms.rst
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
Algorithms
==========

.. autofunction:: pydatastructs.find
.. autofunction:: pydatastructs.find
.. autofunction:: pydatastructs.bitap_search
3 changes: 2 additions & 1 deletion pydatastructs/strings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
__all__.extend(trie.__all__)

from .algorithms import (
find
find,
bitap_search
)

__all__.extend(algorithms.__all__)
22 changes: 21 additions & 1 deletion pydatastructs/strings/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
Backend, raise_if_backend_is_not_python)

__all__ = [
'find'
'find',
'bitap_search'
]

PRIME_NUMBER, MOD = 257, 1000000007
Expand Down Expand Up @@ -83,6 +84,25 @@ def find(text, query, algorithm, **kwargs):
%(algorithm))
return getattr(algorithms, func)(text, query)

def bitap_search(text, pattern):
"""
Bitap Algorithm (Shift-Or Algorithm) for exact string matching.
Returns the starting index of the pattern in the text, or -1 if not found.
"""
m = len(pattern)
R = ~1
pattern_mask = {}

for i in range(m):
pattern_mask[pattern[i]] = pattern_mask.get(pattern[i], ~0) & ~(1 << i)

for i in range(len(text)):
R |= pattern_mask.get(text[i], ~0)
R <<= 1
if (R & (1 << m)) == 0:
return i - m + 1

return -1

def _knuth_morris_pratt(text, query):
if len(text) == 0 or len(query) == 0:
Expand Down
11 changes: 10 additions & 1 deletion pydatastructs/strings/tests/test_algorithms.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pydatastructs.strings import find
from pydatastructs.strings import find, bitap_search

import random, string

Expand All @@ -14,6 +14,15 @@ def test_bm():
def test_zf():
_test_common_string_matching('z_function')

def test_bitap_search():
assert bitap_search("hello world", "world") == 6
assert bitap_search("abcdef", "def") == 3
assert bitap_search("abcdef", "gh") == -1
assert bitap_search("aaaaa", "aa") == 0
assert bitap_search("abababab", "bab") == 1
assert bitap_search("", "a") == -1
print("All tests passed.")

def _test_common_string_matching(algorithm):
true_text_pattern_dictionary = {
"Knuth-Morris-Pratt": "-Morris-",
Expand Down
Loading