From 36bef763f012ea46bb93c634e556607bed4b6071 Mon Sep 17 00:00:00 2001 From: idrisibrahimerten Date: Thu, 3 Jul 2025 13:09:34 +0300 Subject: [PATCH 01/10] feat(strings): add professional suffix array and LCP implementation --- strings/suffix_array.py | 106 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 strings/suffix_array.py diff --git a/strings/suffix_array.py b/strings/suffix_array.py new file mode 100644 index 000000000000..d8c0ca28769e --- /dev/null +++ b/strings/suffix_array.py @@ -0,0 +1,106 @@ +''' +suffix_array.py + +Professional implementation of Suffix Array and LCP (Longest Common Prefix) array in Python. + +Features: +- Efficient O(n log n) construction using doubling method +- Kasai's algorithm for LCP array in O(n) +- Detailed docstrings and complexity analysis +- Standalone usage example and simple unit tests + +Author: Idris Ibrahim Erten +License: MIT +''' + +def build_suffix_array(s: str) -> list[int]: + """ + Builds the suffix array of the given string using the doubling algorithm. + + Parameters: + s (str): Input string + + Returns: + list[int]: List of starting indices of suffixes in sorted order + + Complexity: + O(n log n) time and O(n) space. + """ + # Append a sentinel that is lexicographically smaller than all other characters + s += '\0' + n = len(s) + # Initial ranking by character code + ranks = [ord(c) for c in s] + sa = list(range(n)) + tmp = [0] * n + k = 1 + # Doubling loop + while k < n: + # Sort by (rank[i], rank[i+k]) pairs + sa.sort(key=lambda i: (ranks[i], ranks[i + k] if i + k < n else -1)) + # Temporary array for new ranks + tmp[sa[0]] = 0 + for i in range(1, n): + prev, curr = sa[i - 1], sa[i] + # Compare pair (rank, next rank) + r_prev = (ranks[prev], ranks[prev + k] if prev + k < n else -1) + r_curr = (ranks[curr], ranks[curr + k] if curr + k < n else -1) + tmp[curr] = tmp[prev] + (1 if r_curr != r_prev else 0) + ranks, tmp = tmp, ranks # reuse lists to save memory + k <<= 1 + if ranks[sa[-1]] == n - 1: + break + # Drop the sentinel index + return sa[1:] + + +def build_lcp_array(s: str, sa: list[int]) -> list[int]: + """ + Builds the LCP (Longest Common Prefix) array using Kasai's algorithm. + + Parameters: + s (str): Original string + sa (list[int]): Suffix array of s + + Returns: + list[int]: LCP array where lcp[i] = LCP(sa[i], sa[i-1]) + + Complexity: + O(n) time and O(n) space. + """ + n = len(sa) + # Inverse of suffix array: pos[i] gives rank of suffix at i + pos = [0] * n + for i, suf in enumerate(sa): + pos[suf] = i + lcp = [0] * n + k = 0 + for i in range(len(s)): + if pos[i] == 0: + k = 0 + continue + j = sa[pos[i] - 1] + # Compare characters starting from k + while i + k < len(s) and j + k < len(s) and s[i + k] == s[j + k]: + k += 1 + lcp[pos[i]] = k + if k: + k -= 1 + return lcp[1:] + + +if __name__ == '__main__': + # Example usage and simple tests + test_strings = ['banana', 'abracadabra', 'mississippi'] + for s in test_strings: + sa = build_suffix_array(s) + lcp = build_lcp_array(s, sa) + print(f"String: {s}") + print(f"Suffix Array: {sa}") + print(f"LCP Array : {lcp}\n") + + # Assertions for correctness + s = 'banana' + expected_sa = [5, 3, 1, 0, 4, 2] # indices of sorted suffixes + assert build_suffix_array(s) == expected_sa, 'SA test failed' + print('All tests passed!') From 732aaf27e6f0f7c00d9f83e8e10d81de25d965e5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 3 Jul 2025 10:13:17 +0000 Subject: [PATCH 02/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- strings/suffix_array.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/strings/suffix_array.py b/strings/suffix_array.py index d8c0ca28769e..b86ecd3ebebb 100644 --- a/strings/suffix_array.py +++ b/strings/suffix_array.py @@ -1,4 +1,4 @@ -''' +""" suffix_array.py Professional implementation of Suffix Array and LCP (Longest Common Prefix) array in Python. @@ -11,7 +11,8 @@ Author: Idris Ibrahim Erten License: MIT -''' +""" + def build_suffix_array(s: str) -> list[int]: """ @@ -27,7 +28,7 @@ def build_suffix_array(s: str) -> list[int]: O(n log n) time and O(n) space. """ # Append a sentinel that is lexicographically smaller than all other characters - s += '\0' + s += "\0" n = len(s) # Initial ranking by character code ranks = [ord(c) for c in s] @@ -89,9 +90,9 @@ def build_lcp_array(s: str, sa: list[int]) -> list[int]: return lcp[1:] -if __name__ == '__main__': +if __name__ == "__main__": # Example usage and simple tests - test_strings = ['banana', 'abracadabra', 'mississippi'] + test_strings = ["banana", "abracadabra", "mississippi"] for s in test_strings: sa = build_suffix_array(s) lcp = build_lcp_array(s, sa) @@ -100,7 +101,7 @@ def build_lcp_array(s: str, sa: list[int]) -> list[int]: print(f"LCP Array : {lcp}\n") # Assertions for correctness - s = 'banana' + s = "banana" expected_sa = [5, 3, 1, 0, 4, 2] # indices of sorted suffixes - assert build_suffix_array(s) == expected_sa, 'SA test failed' - print('All tests passed!') + assert build_suffix_array(s) == expected_sa, "SA test failed" + print("All tests passed!") From 9e7827300824d1460723da2a8648a01d91f04e48 Mon Sep 17 00:00:00 2001 From: idrisibrahimerten Date: Thu, 3 Jul 2025 13:18:03 +0300 Subject: [PATCH 03/10] feat(strings): add professional suffix array and LCP implementation --- strings/suffix_array.py | 28 +++------------------------- 1 file changed, 3 insertions(+), 25 deletions(-) diff --git a/strings/suffix_array.py b/strings/suffix_array.py index b86ecd3ebebb..3066630b73cd 100644 --- a/strings/suffix_array.py +++ b/strings/suffix_array.py @@ -1,3 +1,4 @@ +<<<<<<< HEAD """ suffix_array.py @@ -14,19 +15,9 @@ """ +======= +>>>>>>> c176d091 (feat(strings): add professional suffix array and LCP implementation) def build_suffix_array(s: str) -> list[int]: - """ - Builds the suffix array of the given string using the doubling algorithm. - - Parameters: - s (str): Input string - - Returns: - list[int]: List of starting indices of suffixes in sorted order - - Complexity: - O(n log n) time and O(n) space. - """ # Append a sentinel that is lexicographically smaller than all other characters s += "\0" n = len(s) @@ -56,19 +47,6 @@ def build_suffix_array(s: str) -> list[int]: def build_lcp_array(s: str, sa: list[int]) -> list[int]: - """ - Builds the LCP (Longest Common Prefix) array using Kasai's algorithm. - - Parameters: - s (str): Original string - sa (list[int]): Suffix array of s - - Returns: - list[int]: LCP array where lcp[i] = LCP(sa[i], sa[i-1]) - - Complexity: - O(n) time and O(n) space. - """ n = len(sa) # Inverse of suffix array: pos[i] gives rank of suffix at i pos = [0] * n From 42c7526df0875d761af4c1a3302bd87e7d5f450e Mon Sep 17 00:00:00 2001 From: idrisibrahimerten Date: Thu, 3 Jul 2025 13:31:01 +0300 Subject: [PATCH 04/10] feat(maths): add Sieve of Atkin prime sieve implementation --- maths/sieve_of_atkin.py | 45 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 maths/sieve_of_atkin.py diff --git a/maths/sieve_of_atkin.py b/maths/sieve_of_atkin.py new file mode 100644 index 000000000000..78f0d36b2b7f --- /dev/null +++ b/maths/sieve_of_atkin.py @@ -0,0 +1,45 @@ +from typing import List + + +def sieve_of_atkin(limit: int) -> List[int]: + if not isinstance(limit, int) or limit < 2: + raise ValueError("limit must be an integer >= 2") + + # Initialize the sieve array + sieve = [False] * (limit + 1) + results: List[int] = [] + + # Preliminary marking based on quadratic forms + from math import sqrt + + sqrt_limit = int(sqrt(limit)) + 1 + for x in range(1, sqrt_limit): + for y in range(1, sqrt_limit): + n = 4 * x * x + y * y + if n <= limit and n % 12 in (1, 5): + sieve[n] = not sieve[n] + n = 3 * x * x + y * y + if n <= limit and n % 12 == 7: + sieve[n] = not sieve[n] + n = 3 * x * x - y * y + if x > y and n <= limit and n % 12 == 11: + sieve[n] = not sieve[n] + + # Mark all multiples of squares as non-prime + for n in range(5, sqrt_limit): + if sieve[n]: + step = n * n + for k in range(step, limit + 1, step): + sieve[k] = False + + # Compile the list of primes + if limit >= 2: + results.extend([2, 3]) + results.extend([i for i in range(5, limit + 1) if sieve[i]]) + return results + + +if __name__ == "__main__": + import doctest + doctest.testmod() + print("All doctests passed!") \ No newline at end of file From 0666594b0e151cdc3d264b8299120f6e08305614 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 3 Jul 2025 10:34:23 +0000 Subject: [PATCH 05/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- maths/sieve_of_atkin.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/maths/sieve_of_atkin.py b/maths/sieve_of_atkin.py index 78f0d36b2b7f..8538b2a02b05 100644 --- a/maths/sieve_of_atkin.py +++ b/maths/sieve_of_atkin.py @@ -41,5 +41,6 @@ def sieve_of_atkin(limit: int) -> List[int]: if __name__ == "__main__": import doctest + doctest.testmod() - print("All doctests passed!") \ No newline at end of file + print("All doctests passed!") From c25cfcc8dbb4d9e5800af69ad4da13d10a222032 Mon Sep 17 00:00:00 2001 From: idrisibrahimerten Date: Thu, 3 Jul 2025 13:47:26 +0300 Subject: [PATCH 06/10] =?UTF-8?q?fix:=20stash=20sonras=C4=B1=20d=C3=BCzenl?= =?UTF-8?q?emeler?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- strings/suffix_array.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/strings/suffix_array.py b/strings/suffix_array.py index 3066630b73cd..f2b786e7bd2a 100644 --- a/strings/suffix_array.py +++ b/strings/suffix_array.py @@ -1,22 +1,3 @@ -<<<<<<< HEAD -""" -suffix_array.py - -Professional implementation of Suffix Array and LCP (Longest Common Prefix) array in Python. - -Features: -- Efficient O(n log n) construction using doubling method -- Kasai's algorithm for LCP array in O(n) -- Detailed docstrings and complexity analysis -- Standalone usage example and simple unit tests - -Author: Idris Ibrahim Erten -License: MIT -""" - - -======= ->>>>>>> c176d091 (feat(strings): add professional suffix array and LCP implementation) def build_suffix_array(s: str) -> list[int]: # Append a sentinel that is lexicographically smaller than all other characters s += "\0" From 4f52ef5dacf8b93764203e739abf374cf6c65b03 Mon Sep 17 00:00:00 2001 From: idrisibrahimerten Date: Thu, 3 Jul 2025 14:43:39 +0300 Subject: [PATCH 07/10] test(maths): add doctests and pytest for Sieve of Atkin --- maths/sieve_of_atkin.py | 31 +++++++++++++++++++++++++++++++ maths/test_sieve_of_atkin.py | 10 ++++++++++ 2 files changed, 41 insertions(+) create mode 100644 maths/test_sieve_of_atkin.py diff --git a/maths/sieve_of_atkin.py b/maths/sieve_of_atkin.py index 8538b2a02b05..6ec542697174 100644 --- a/maths/sieve_of_atkin.py +++ b/maths/sieve_of_atkin.py @@ -2,6 +2,37 @@ def sieve_of_atkin(limit: int) -> List[int]: + """ + Compute all prime numbers up to the given limit using the Sieve of Atkin. + + Parameters + ---------- + limit : int + Upper bound of primes to generate (inclusive). + + Returns + ------- + List[int] + A list of prime numbers <= limit. + + Raises + ------ + ValueError + If limit is not an integer or is less than 2. + + References + ---------- + https://en.wikipedia.org/wiki/Sieve_of_Atkin + + Examples + -------- + >>> sieve_of_atkin(10) + [2, 3, 5, 7] + >>> sieve_of_atkin(1) + Traceback (most recent call last): + ... + ValueError: limit must be an integer >= 2 + """ if not isinstance(limit, int) or limit < 2: raise ValueError("limit must be an integer >= 2") diff --git a/maths/test_sieve_of_atkin.py b/maths/test_sieve_of_atkin.py new file mode 100644 index 000000000000..510ceaa482a8 --- /dev/null +++ b/maths/test_sieve_of_atkin.py @@ -0,0 +1,10 @@ +# tests/test_sieve_of_atkin.py +import pytest +from maths.sieve_of_atkin import sieve_of_atkin + +def test_small_primes(): + assert sieve_of_atkin(10) == [2, 3, 5, 7] + +def test_invalid_limit(): + with pytest.raises(ValueError): + sieve_of_atkin(1) From 489fd987e0e8ab89faeeb1cce035ff68e9f16749 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 3 Jul 2025 11:44:07 +0000 Subject: [PATCH 08/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- maths/test_sieve_of_atkin.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/maths/test_sieve_of_atkin.py b/maths/test_sieve_of_atkin.py index 510ceaa482a8..afc52c38521a 100644 --- a/maths/test_sieve_of_atkin.py +++ b/maths/test_sieve_of_atkin.py @@ -2,9 +2,11 @@ import pytest from maths.sieve_of_atkin import sieve_of_atkin + def test_small_primes(): assert sieve_of_atkin(10) == [2, 3, 5, 7] + def test_invalid_limit(): with pytest.raises(ValueError): sieve_of_atkin(1) From 3d778d5d45e4c910966e0a9780fc8be591065917 Mon Sep 17 00:00:00 2001 From: idrisibrahimerten Date: Thu, 3 Jul 2025 14:56:06 +0300 Subject: [PATCH 09/10] test(maths): add pytest maths for Sieve of Atkin --- maths/sieve_of_atkin.py | 2 +- maths/test_sieve_of_atkin.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/maths/sieve_of_atkin.py b/maths/sieve_of_atkin.py index 6ec542697174..5199dd3d06ac 100644 --- a/maths/sieve_of_atkin.py +++ b/maths/sieve_of_atkin.py @@ -5,7 +5,7 @@ def sieve_of_atkin(limit: int) -> List[int]: """ Compute all prime numbers up to the given limit using the Sieve of Atkin. - Parameters + Parameterss ---------- limit : int Upper bound of primes to generate (inclusive). diff --git a/maths/test_sieve_of_atkin.py b/maths/test_sieve_of_atkin.py index afc52c38521a..2ea9ad5abb90 100644 --- a/maths/test_sieve_of_atkin.py +++ b/maths/test_sieve_of_atkin.py @@ -1,4 +1,4 @@ -# tests/test_sieve_of_atkin.py +# maths/test_sieve_of_atkin.py import pytest from maths.sieve_of_atkin import sieve_of_atkin From 7958c446783da1de9dfd0df23781997714d90036 Mon Sep 17 00:00:00 2001 From: idrisibrahimerten Date: Fri, 4 Jul 2025 10:02:04 +0300 Subject: [PATCH 10/10] fix(web_programming): return '- ' when no tag found --- maths/sieve_of_atkin.py | 18 ++++++++---------- maths/test_sieve_of_atkin.py | 6 +++++- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/maths/sieve_of_atkin.py b/maths/sieve_of_atkin.py index 5199dd3d06ac..e1f57706655f 100644 --- a/maths/sieve_of_atkin.py +++ b/maths/sieve_of_atkin.py @@ -1,24 +1,24 @@ -from typing import List +import math -def sieve_of_atkin(limit: int) -> List[int]: +def sieve_of_atkin(limit: int) -> list[int]: """ Compute all prime numbers up to the given limit using the Sieve of Atkin. - Parameterss + Parameters ---------- limit : int Upper bound of primes to generate (inclusive). Returns ------- - List[int] + list[int] A list of prime numbers <= limit. Raises ------ ValueError - If limit is not an integer or is less than 2. + If limit is not an integer >= 2. References ---------- @@ -30,7 +30,7 @@ def sieve_of_atkin(limit: int) -> List[int]: [2, 3, 5, 7] >>> sieve_of_atkin(1) Traceback (most recent call last): - ... + ... ValueError: limit must be an integer >= 2 """ if not isinstance(limit, int) or limit < 2: @@ -38,12 +38,10 @@ def sieve_of_atkin(limit: int) -> List[int]: # Initialize the sieve array sieve = [False] * (limit + 1) - results: List[int] = [] + results: list[int] = [] # Preliminary marking based on quadratic forms - from math import sqrt - - sqrt_limit = int(sqrt(limit)) + 1 + sqrt_limit = int(math.sqrt(limit)) + 1 for x in range(1, sqrt_limit): for y in range(1, sqrt_limit): n = 4 * x * x + y * y diff --git a/maths/test_sieve_of_atkin.py b/maths/test_sieve_of_atkin.py index 2ea9ad5abb90..4b668aa3876b 100644 --- a/maths/test_sieve_of_atkin.py +++ b/maths/test_sieve_of_atkin.py @@ -1,5 +1,5 @@ -# maths/test_sieve_of_atkin.py import pytest + from maths.sieve_of_atkin import sieve_of_atkin @@ -7,6 +7,10 @@ def test_small_primes(): assert sieve_of_atkin(10) == [2, 3, 5, 7] +def test_medium_primes(): + assert sieve_of_atkin(20) == [2, 3, 5, 7, 11, 13, 17, 19] + + def test_invalid_limit(): with pytest.raises(ValueError): sieve_of_atkin(1)