diff --git a/docs/source/pydatastructs/strings/algorithms.rst b/docs/source/pydatastructs/strings/algorithms.rst index aec29a31a..d4d4ae6c1 100644 --- a/docs/source/pydatastructs/strings/algorithms.rst +++ b/docs/source/pydatastructs/strings/algorithms.rst @@ -1,4 +1,5 @@ Algorithms ========== -.. autofunction:: pydatastructs.find \ No newline at end of file +.. autofunction:: pydatastructs.find +.. autofunction:: pydatastructs.bitap_search \ No newline at end of file diff --git a/pydatastructs/strings/__init__.py b/pydatastructs/strings/__init__.py index 33930b426..2febc360e 100644 --- a/pydatastructs/strings/__init__.py +++ b/pydatastructs/strings/__init__.py @@ -12,7 +12,8 @@ __all__.extend(trie.__all__) from .algorithms import ( - find + find, + bitap_search ) __all__.extend(algorithms.__all__) diff --git a/pydatastructs/strings/algorithms.py b/pydatastructs/strings/algorithms.py index 1e26b9411..bbcaf0ed1 100644 --- a/pydatastructs/strings/algorithms.py +++ b/pydatastructs/strings/algorithms.py @@ -4,7 +4,8 @@ Backend, raise_if_backend_is_not_python) __all__ = [ - 'find' + 'find', + 'bitap_search' ] PRIME_NUMBER, MOD = 257, 1000000007 @@ -83,6 +84,25 @@ def find(text, query, algorithm, **kwargs): %(algorithm)) return getattr(algorithms, func)(text, query) +def bitap_search(text, pattern): + """ + Bitap Algorithm (Shift-Or Algorithm) for exact string matching. + Returns the starting index of the pattern in the text, or -1 if not found. + """ + m = len(pattern) + R = ~1 + pattern_mask = {} + + for i in range(m): + pattern_mask[pattern[i]] = pattern_mask.get(pattern[i], ~0) & ~(1 << i) + + for i in range(len(text)): + R |= pattern_mask.get(text[i], ~0) + R <<= 1 + if (R & (1 << m)) == 0: + return i - m + 1 + + return -1 def _knuth_morris_pratt(text, query): if len(text) == 0 or len(query) == 0: diff --git a/pydatastructs/strings/tests/test_algorithms.py b/pydatastructs/strings/tests/test_algorithms.py index 37622cf80..62b5da1cb 100644 --- a/pydatastructs/strings/tests/test_algorithms.py +++ b/pydatastructs/strings/tests/test_algorithms.py @@ -1,4 +1,4 @@ -from pydatastructs.strings import find +from pydatastructs.strings import find, bitap_search import random, string @@ -14,6 +14,15 @@ def test_bm(): def test_zf(): _test_common_string_matching('z_function') +def test_bitap_search(): + assert bitap_search("hello world", "world") == 6 + assert bitap_search("abcdef", "def") == 3 + assert bitap_search("abcdef", "gh") == -1 + assert bitap_search("aaaaa", "aa") == 0 + assert bitap_search("abababab", "bab") == 1 + assert bitap_search("", "a") == -1 + print("All tests passed.") + def _test_common_string_matching(algorithm): true_text_pattern_dictionary = { "Knuth-Morris-Pratt": "-Morris-",