Skip to content

Fixed #1077 Add extra check for window size #1078

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 39 commits into from
Apr 6, 2025
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
87f3873
Add extra check for window size
NimaSarajpoor Mar 27, 2025
f8d6df5
update module to include extra check for self join
NimaSarajpoor Mar 27, 2025
f174ed6
add tests for warning
NimaSarajpoor Mar 28, 2025
f611339
revise comment
NimaSarajpoor Mar 28, 2025
c2073ef
ignore coverage
NimaSarajpoor Mar 28, 2025
6c69155
minor improvement in docstring
NimaSarajpoor Mar 28, 2025
e63860c
fix flake8
NimaSarajpoor Mar 28, 2025
91f767e
Revised test function using expected signature
NimaSarajpoor Mar 29, 2025
ab411aa
fixed format
NimaSarajpoor Mar 29, 2025
b7494d9
Revise function to pass test
NimaSarajpoor Mar 30, 2025
3c87e0d
Update stumpy/core.py
NimaSarajpoor Mar 30, 2025
252d52b
improve comments
NimaSarajpoor Mar 30, 2025
8e5d9af
improve readability of function
NimaSarajpoor Mar 30, 2025
113b5c5
minor improvement in the description of param
NimaSarajpoor Mar 30, 2025
82caebb
remove redundant test function
NimaSarajpoor Mar 30, 2025
f29732f
Revise logic and the comment
NimaSarajpoor Mar 31, 2025
6f308a3
improving comments
NimaSarajpoor Mar 31, 2025
77b878b
minor change
NimaSarajpoor Mar 31, 2025
2c68716
minor change in comment
NimaSarajpoor Apr 1, 2025
643b4b0
minor change in comment
NimaSarajpoor Apr 1, 2025
a15b757
update aamp for checking window size
NimaSarajpoor Apr 1, 2025
316bf07
improve docstring and comments
NimaSarajpoor Apr 2, 2025
9f71816
improve docstring
NimaSarajpoor Apr 2, 2025
445a6cb
use smaller input to make test function more understandable
NimaSarajpoor Apr 2, 2025
7751792
updated stumped and aamped
NimaSarajpoor Apr 2, 2025
8bff40b
updated maamp and maamped modules
NimaSarajpoor Apr 2, 2025
f0cbfae
update different modules to consider the change in core.check_window_…
NimaSarajpoor Apr 2, 2025
aa61b24
minor fix
NimaSarajpoor Apr 2, 2025
9349e2a
improve comments
NimaSarajpoor Apr 2, 2025
54cd2fa
improve comments
NimaSarajpoor Apr 2, 2025
917fcc4
improved the explanations
NimaSarajpoor Apr 2, 2025
be4d6bb
minor change in the description of function
NimaSarajpoor Apr 2, 2025
97e6f2b
improve the clarity of the logic
NimaSarajpoor Apr 5, 2025
32a15f3
improve comment
NimaSarajpoor Apr 5, 2025
90d3901
improve description of function
NimaSarajpoor Apr 5, 2025
17e2db9
minor change
NimaSarajpoor Apr 5, 2025
0a25af1
improve readability and consistency
NimaSarajpoor Apr 5, 2025
e29cdca
minor change
NimaSarajpoor Apr 5, 2025
949db7e
minor changes
NimaSarajpoor Apr 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 41 additions & 2 deletions stumpy/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,11 +554,12 @@ def get_max_window_size(n):
return max_m


def check_window_size(m, max_size=None):
def check_window_size(m, max_size=None, excl_zone=None, last_start_index=None):
"""
Check the window size and ensure that it is greater than or equal to 3 and, if
`max_size` is provided, ensure that the window size is less than or equal to the
`max_size`
`max_size`. Furthermore, if `excl_zone` is provided, then it will also check if the
window size is too large and could lead to meaningless results.

Parameters
----------
Expand All @@ -568,6 +569,13 @@ def check_window_size(m, max_size=None):
max_size : int, default None
The maximum window size allowed

excl_zone : int, default None
The exclusion zone. If provided, then the `last_start_index` must also be
provided.

last_start_index : int, default None
The start index of last subsequence.

Returns
-------
None
Expand All @@ -589,6 +597,37 @@ def check_window_size(m, max_size=None):
if max_size is not None and m > max_size:
raise ValueError(f"The window size must be less than or equal to {max_size}")

if excl_zone is not None:
if last_start_index is None: # pragma: no cover
raise ValueError(
"last_start_index must be provided when excl_zone is not None"
)

# Check if subsequneces have non-trivial neighbours

# Case 1:
# There is at least one subsequence with non-trivial neighbour
# i.e. For AN `i`, there exists at least one `j` such that |i - j| > excl_zone
# In this case, we just need to consider the two subsequences that are furthest
# apart from each other.
# In other words: |last_start_index - 0| > excl_zone
cond_1 = (last_start_index - 0) > excl_zone

# Case 2:
# Check if each single subsequence has at least one non-trivial neighbor
# i. e. For ANY `i`, there exists at least one `j` such that |i - j| > excl_zone
# In this case, we need to consider the subseuqence whose furthest neighbour is
# the shortest compared to other subsequences.
# In other words: |ceil(last_start_index / 2) - 0| > excl_zone
cond_2 = (math.ceil(last_start_index / 2) - 0) > excl_zone

if not cond_1 or not cond_2:
msg = (
f"The window size, 'm = {m}', may be too large and could lead to "
+ "meaningless results. Consider reducing 'm' where necessary"
)
warnings.warn(msg)


@njit(fastmath=config.STUMPY_FASTMATH_TRUE)
def _sliding_dot_product(Q, T):
Expand Down
15 changes: 10 additions & 5 deletions stumpy/stump.py
Original file line number Diff line number Diff line change
Expand Up @@ -711,20 +711,25 @@ def stump(
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
)

core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)

n_A = T_A.shape[0]
n_B = T_B.shape[0]
l = n_A - m + 1

excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))

ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
if ignore_trivial:
excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64)
else:
excl_zone = None
diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)

core.check_window_size(
m,
max_size=min(T_A.shape[0], T_B.shape[0]),
excl_zone=excl_zone,
last_start_index=l - 1,
)

P, PL, PR, I, IL, IR = _stump(
T_A,
T_B,
Expand Down
30 changes: 30 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,36 @@ def test_check_max_window_size():
core.check_window_size(m, max_size=3)


def test_check_window_size_excl_zone_case1():
# To ensure warning is raised if there is no subsequence
# with non-trivial neighbor
T = np.random.rand(64)
m = 60
last_start_index = len(T) - m

excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))

with pytest.warns(UserWarning):
core.check_window_size(
m, max_size=len(T), excl_zone=excl_zone, last_start_index=last_start_index
)


def test_check_window_size_excl_zone_case2():
# To ensure warning is raised if there is at least one subsequence
# that has no non-trivial neighbor
T = np.random.rand(64)
m = 48
last_start_index = len(T) - m

excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))

with pytest.warns(UserWarning):
core.check_window_size(
m, max_size=len(T), excl_zone=excl_zone, last_start_index=last_start_index
)


@pytest.mark.parametrize("Q, T", test_data)
def test_njit_sliding_dot_product(Q, T):
ref_mp = naive_rolling_window_dot_product(Q, T)
Expand Down