Skip to content

Fixed #1077 Add extra check for window size #1078

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 39 commits into from
Apr 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
87f3873
Add extra check for window size
NimaSarajpoor Mar 27, 2025
f8d6df5
update module to include extra check for self join
NimaSarajpoor Mar 27, 2025
f174ed6
add tests for warning
NimaSarajpoor Mar 28, 2025
f611339
revise comment
NimaSarajpoor Mar 28, 2025
c2073ef
ignore coverage
NimaSarajpoor Mar 28, 2025
6c69155
minor improvement in docstring
NimaSarajpoor Mar 28, 2025
e63860c
fix flake8
NimaSarajpoor Mar 28, 2025
91f767e
Revised test function using expected signature
NimaSarajpoor Mar 29, 2025
ab411aa
fixed format
NimaSarajpoor Mar 29, 2025
b7494d9
Revise function to pass test
NimaSarajpoor Mar 30, 2025
3c87e0d
Update stumpy/core.py
NimaSarajpoor Mar 30, 2025
252d52b
improve comments
NimaSarajpoor Mar 30, 2025
8e5d9af
improve readability of function
NimaSarajpoor Mar 30, 2025
113b5c5
minor improvement in the description of param
NimaSarajpoor Mar 30, 2025
82caebb
remove redundant test function
NimaSarajpoor Mar 30, 2025
f29732f
Revise logic and the comment
NimaSarajpoor Mar 31, 2025
6f308a3
improving comments
NimaSarajpoor Mar 31, 2025
77b878b
minor change
NimaSarajpoor Mar 31, 2025
2c68716
minor change in comment
NimaSarajpoor Apr 1, 2025
643b4b0
minor change in comment
NimaSarajpoor Apr 1, 2025
a15b757
update aamp for checking window size
NimaSarajpoor Apr 1, 2025
316bf07
improve docstring and comments
NimaSarajpoor Apr 2, 2025
9f71816
improve docstring
NimaSarajpoor Apr 2, 2025
445a6cb
use smaller input to make test function more understandable
NimaSarajpoor Apr 2, 2025
7751792
updated stumped and aamped
NimaSarajpoor Apr 2, 2025
8bff40b
updated maamp and maamped modules
NimaSarajpoor Apr 2, 2025
f0cbfae
update different modules to consider the change in core.check_window_…
NimaSarajpoor Apr 2, 2025
aa61b24
minor fix
NimaSarajpoor Apr 2, 2025
9349e2a
improve comments
NimaSarajpoor Apr 2, 2025
54cd2fa
improve comments
NimaSarajpoor Apr 2, 2025
917fcc4
improved the explanations
NimaSarajpoor Apr 2, 2025
be4d6bb
minor change in the description of function
NimaSarajpoor Apr 2, 2025
97e6f2b
improve the clarity of the logic
NimaSarajpoor Apr 5, 2025
32a15f3
improve comment
NimaSarajpoor Apr 5, 2025
90d3901
improve description of function
NimaSarajpoor Apr 5, 2025
17e2db9
minor change
NimaSarajpoor Apr 5, 2025
0a25af1
improve readability and consistency
NimaSarajpoor Apr 5, 2025
e29cdca
minor change
NimaSarajpoor Apr 5, 2025
949db7e
minor changes
NimaSarajpoor Apr 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions stumpy/aamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,17 +407,17 @@ def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
if T_B.ndim != 1: # pragma: no cover
raise ValueError(f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. ")

core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)

n_A = T_A.shape[0]
n_B = T_B.shape[0]
l = n_A - m + 1

ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
if ignore_trivial:
if ignore_trivial: # self-join
core.check_window_size(m, max_size=min(n_A, n_B), n=n_A)
diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64)
else:
else: # AB-join
core.check_window_size(m, max_size=min(n_A, n_B))
diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)

P, PL, PR, I, IL, IR = _aamp(
Expand Down
6 changes: 3 additions & 3 deletions stumpy/aamped.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,17 +386,17 @@ def aamped(client, T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
if T_B.ndim != 1: # pragma: no cover
raise ValueError(f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. ")

core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)

n_A = T_A.shape[0]
n_B = T_B.shape[0]

ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))

if ignore_trivial:
core.check_window_size(m, max_size=min(n_A, n_B), n=n_A)
diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64)
else:
core.check_window_size(m, max_size=min(n_A, n_B))
diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)

_aamped = core._client_to_func(client)
Expand Down
2 changes: 1 addition & 1 deletion stumpy/aampi.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def __init__(self, T, m, egress=True, p=2.0, k=1, mp=None):
computed internally using `stumpy.aamp`.
"""
self._T = core._preprocess(T)
core.check_window_size(m, max_size=self._T.shape[-1])
core.check_window_size(m, max_size=self._T.shape[0])
self._m = m
self._n = self._T.shape[0]
self._excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM))
Expand Down
71 changes: 65 additions & 6 deletions stumpy/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,11 +554,12 @@ def get_max_window_size(n):
return max_m


def check_window_size(m, max_size=None):
def check_window_size(m, max_size=None, n=None):
"""
Check the window size and ensure that it is greater than or equal to 3 and, if
`max_size` is provided, ensure that the window size is less than or equal to the
`max_size`
``max_size`` is provided, ensure that the window size is less than or equal to
the ``max_size``. Furthermore, if ``n`` is provided, then a self-join is assumed
and it checks whether all subsequences have at least one non-trivial neighbor.

Parameters
----------
Expand All @@ -568,6 +569,10 @@ def check_window_size(m, max_size=None):
max_size : int, default None
The maximum window size allowed

n : int, default None
The length of the time series in the case of a self-join.
``n`` should not be supplied (or set to ``None``) in the case of an AB-join.

Returns
-------
None
Expand All @@ -589,6 +594,60 @@ def check_window_size(m, max_size=None):
if max_size is not None and m > max_size:
raise ValueError(f"The window size must be less than or equal to {max_size}")

if n is not None:
# Raise warning if there is at least one subsequence with no eligible
# (non-trivial) neighbor in the case of a self-join.

# For any time series `T`, an "eligible nearest neighbor" subsequence for
# the central-most subsequence must be located outside the `excl_zone`,
# and the central-most subsequence will ALWAYS have the smallest relative
# (index-wise) distance to its farthest neighbor amongst all other subsequences.
# Therefore, we only need to check whether the `excl_zone` eliminates all
# "neighbors" for the central-most subsequence in `T`. In fact, we just need to
# verify whether the `excl_zone` eliminates the "neighbor" that is farthest
# away (index-wise) from the central-most subsequence. If it does not, this
# implies that all subsequences in `T` will have at least one "eligible nearest
# neighbor" that is located outside of their respective excl_zone.

excl_zone = int(math.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))

l = n - m + 1
# The start index of subsequences are: 0, 1, ..., l-1

# If `l` is odd
# Suppose `l == 5`. So, the start index of the subsequences
# are: 0, 1, 2, 3, 4
# The central subsequence is located at index position c=2, with two
# farthest neighbors, one located at index 0, and the other is located
# at index 4. In both cases, the relative (index-wise) distance is 2,
# which is simply `5 // 2`. In general, it can be shown that the
# (index-wise) distance from the central subsequence to its farthest
# neighbor is `l // 2`.

# If `l` is even
# Suppose `l == 6`. So, the start index of the subsequences
# are: 0, 1, 2, 3, 4, 5
# There are two central-most subsequences, located at the index
# positions c=2 and c=3. For the central-most subsequence at index
# position c=2, its farthest neighbor will be located at index 5 (to the
# right of c=2) and, for the central-most subsequence at index position
# c=3, its farthest neighbor will be located at index 0 (to the left of
# c=3). In both cases, the relative (index-wise) distance is 3,
# which is simply `6 // 2`. In general, it can be shown that the
# (index-wise) distance from the central-most subsequence to its
# farthest neighbor is `l // 2`.

# Therefore, regardless if `l` is even or odd, for the central
# subsequence for any time series, the index location of its
# farthest neighbor will always be `l // 2` index positions away.
diff_to_farthest_idx = l // 2
if diff_to_farthest_idx <= excl_zone:
msg = (
f"The window size, 'm = {m}', may be too large and could lead to "
+ "meaningless results. Consider reducing 'm' where necessary"
)
warnings.warn(msg)


@njit(fastmath=config.STUMPY_FASTMATH_TRUE)
def _sliding_dot_product(Q, T):
Expand Down Expand Up @@ -1354,7 +1413,7 @@ def mass_absolute(Q, T, T_subseq_isfinite=None, p=2.0, query_idx=None):
raise ValueError(f"`Q` is {Q.ndim}-dimensional and must be 1-dimensional. ")
Q_isfinite = np.isfinite(Q)

check_window_size(m, max_size=Q.shape[-1])
check_window_size(m, max_size=Q.shape[0])

if query_idx is not None: # pragma: no cover
query_idx = int(query_idx)
Expand Down Expand Up @@ -1701,7 +1760,7 @@ def mass(
raise ValueError(f"Q is {Q.ndim}-dimensional and must be 1-dimensional. ")
Q_isfinite = np.isfinite(Q)

check_window_size(m, max_size=Q.shape[-1])
check_window_size(m, max_size=Q.shape[0])

if query_idx is not None:
query_idx = int(query_idx)
Expand Down Expand Up @@ -1926,7 +1985,7 @@ def mass_distance_matrix(
T_subseq_isconstant=T_subseq_isconstant,
)

check_window_size(m, max_size=min(Q.shape[-1], T.shape[-1]))
check_window_size(m, max_size=min(Q.shape[0], T.shape[0]))

return _mass_distance_matrix(
Q,
Expand Down
7 changes: 6 additions & 1 deletion stumpy/gpu_aamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,8 +536,13 @@ def gpu_aamp(T_A, m, T_B=None, ignore_trivial=True, device_id=0, p=2.0, k=1):
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
)

core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
if ignore_trivial: # self-join
core.check_window_size(
m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0]
)
else: # AB-join
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))

n = T_B.shape[0]
w = T_A.shape[0] - m + 1
Expand Down
7 changes: 6 additions & 1 deletion stumpy/gpu_stump.py
Original file line number Diff line number Diff line change
Expand Up @@ -666,8 +666,13 @@ def gpu_stump(
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
)

core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
if ignore_trivial: # self-join
core.check_window_size(
m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0]
)
else: # AB-join
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))

n = T_B.shape[0]
w = T_A.shape[0] - m + 1
Expand Down
8 changes: 4 additions & 4 deletions stumpy/maamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def maamp_subspace(
returned.
"""
T = core._preprocess(T)
core.check_window_size(m, max_size=T.shape[-1])
core.check_window_size(m, max_size=T.shape[1], n=T.shape[1])

subseqs, _ = core.preprocess_non_normalized(T[:, subseq_idx : subseq_idx + m], m)
neighbors, _ = core.preprocess_non_normalized(T[:, nn_idx : nn_idx + m], m)
Expand Down Expand Up @@ -269,7 +269,7 @@ def maamp_mdl(
A list of numpy.ndarrays that contains the `k`th-dimensional subspaces
"""
T = core._preprocess(T)
core.check_window_size(m, max_size=T.shape[-1])
core.check_window_size(m, max_size=T.shape[1], n=T.shape[1])

if discretize_func is None:
T_isfinite = np.isfinite(T)
Expand Down Expand Up @@ -441,7 +441,7 @@ def maamp_multi_distance_profile(query_idx, T, m, include=None, discords=False,
err = f"T is {T.ndim}-dimensional and must be at least 1-dimensional"
raise ValueError(f"{err}")

core.check_window_size(m, max_size=T.shape[1])
core.check_window_size(m, max_size=T.shape[1], n=T.shape[1])

if include is not None: # pragma: no cover
include = core._preprocess_include(include)
Expand Down Expand Up @@ -933,7 +933,7 @@ def maamp(T, m, include=None, discords=False, p=2.0):
err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional"
raise ValueError(f"{err}")

core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]))
core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1])

if include is not None:
include = core._preprocess_include(include)
Expand Down
2 changes: 1 addition & 1 deletion stumpy/maamped.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ def maamped(client, T, m, include=None, discords=False, p=2.0):
err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional"
raise ValueError(f"{err}")

core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]))
core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1])

if include is not None:
include = core._preprocess_include(include)
Expand Down
8 changes: 5 additions & 3 deletions stumpy/mstump.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def subspace(
array([0, 1])
"""
T = core._preprocess(T)
core.check_window_size(m, max_size=T.shape[-1])
core.check_window_size(m, max_size=T.shape[1], n=T.shape[1])
T_subseq_isconstant = core.process_isconstant(T, m, T_subseq_isconstant)

if discretize_func is None:
Expand Down Expand Up @@ -409,7 +409,7 @@ def mdl(
(array([ 80. , 111.509775]), [array([1]), array([0, 1])])
"""
T = core._preprocess(T)
core.check_window_size(m, max_size=T.shape[-1])
core.check_window_size(m, max_size=T.shape[1], n=T.shape[1])
T_subseq_isconstant = core.process_isconstant(T, m, T_subseq_isconstant)

if discretize_func is None:
Expand Down Expand Up @@ -1228,7 +1228,9 @@ def mstump(
err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional"
raise ValueError(f"{err}")

core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]))
# mstump currently only supports self-join. Therefore, the argument `n=T_A.shape[1]`
# must be passed to the function `core.check_window_size`.
core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1])

if include is not None:
include = core._preprocess_include(include)
Expand Down
4 changes: 3 additions & 1 deletion stumpy/mstumped.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,9 @@ def mstumped(
err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional"
raise ValueError(f"{err}")

core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]))
# mstump currently only supports self-join. Therefore, the argument `n=T_A.shape[1]`
# must be passed to the function `core.check_window_size`.
core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1])

if include is not None:
include = core._preprocess_include(include)
Expand Down
7 changes: 6 additions & 1 deletion stumpy/scraamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,10 +646,15 @@ def __init__(
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
)

core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
self._ignore_trivial = core.check_ignore_trivial(
self._T_A, self._T_B, self._ignore_trivial
)
if self._ignore_trivial: # self-join
core.check_window_size(
m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0]
)
else: # AB-join
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))

self._n_A = self._T_A.shape[0]
self._n_B = self._T_B.shape[0]
Expand Down
7 changes: 6 additions & 1 deletion stumpy/scrump.py
Original file line number Diff line number Diff line change
Expand Up @@ -905,10 +905,15 @@ def __init__(
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
)

core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
self._ignore_trivial = core.check_ignore_trivial(
self._T_A, self._T_B, self._ignore_trivial
)
if self._ignore_trivial:
core.check_window_size(
m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0]
)
else:
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))

self._n_A = self._T_A.shape[0]
self._n_B = self._T_B.shape[0]
Expand Down
6 changes: 4 additions & 2 deletions stumpy/stamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,13 +208,14 @@ def stamp(
if T_B.ndim != 1: # pragma: no cover
raise ValueError(f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. ")

core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))

subseq_T_A = core.rolling_window(T_A, m)
excl_zone = int(np.ceil(m / 2))

# Add exclusionary zone
if ignore_trivial:
core.check_window_size(
m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0]
)
out = [
_mass_PI(
subseq,
Expand All @@ -229,6 +230,7 @@ def stamp(
for i, subseq in enumerate(subseq_T_A)
]
else:
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
out = [
_mass_PI(
subseq,
Expand Down
7 changes: 6 additions & 1 deletion stumpy/stomp.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,13 @@ def _stomp(T_A, m, T_B=None, ignore_trivial=True):
if T_B.ndim != 1: # pragma: no cover
raise ValueError(f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. ")

core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
if ignore_trivial: # self-join
core.check_window_size(
m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0]
)
else: # AB-join
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))

n = T_A.shape[0]
l = n - m + 1
Expand Down
11 changes: 5 additions & 6 deletions stumpy/stump.py
Original file line number Diff line number Diff line change
Expand Up @@ -711,18 +711,17 @@ def stump(
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
)

core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)

n_A = T_A.shape[0]
n_B = T_B.shape[0]
l = n_A - m + 1

ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))

if ignore_trivial:
if ignore_trivial: # self-join
core.check_window_size(m, max_size=min(n_A, n_B), n=n_A)
diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64)
else:
else: # AB-join
core.check_window_size(m, max_size=min(n_A, n_B))
diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)

P, PL, PR, I, IL, IR = _stump(
Expand Down
Loading