Skip to content

Commit 10b2672

Browse files
committed
Added STIMP prototype, config.STUMPY_EXCL_ZONE_DENOM
1 parent 8105b9b commit 10b2672

28 files changed

+963
-193
lines changed

docs/Tutorial_Pan_Matrix_Profile.ipynb

Lines changed: 407 additions & 0 deletions
Large diffs are not rendered by default.

stumpy/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from .aamp_motifs import aamp_motifs, aamp_match # noqa: F401
2424
from .snippets import snippets # noqa: F401
2525
from .aampdist_snippets import aampdist_snippets # noqa: F401
26+
from .stimp import stimp # noqa: F401
2627
from numba import cuda
2728

2829
if cuda.is_available():

stumpy/aamp.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from numba import njit, prange, config
99

1010
from . import core
11-
from stumpy.config import STUMPY_D_SQUARED_THRESHOLD
11+
from stumpy.config import STUMPY_D_SQUARED_THRESHOLD, STUMPY_EXCL_ZONE_DENOM
1212

1313
logger = logging.getLogger(__name__)
1414

@@ -286,7 +286,7 @@ def aamp(T_A, m, T_B=None, ignore_trivial=True):
286286
n_B = T_B.shape[0]
287287
l = n_A - m + 1
288288

289-
excl_zone = int(np.ceil(m / 4))
289+
excl_zone = int(np.ceil(m / STUMPY_EXCL_ZONE_DENOM))
290290
out = np.empty((l, 4), dtype=object)
291291

292292
if ignore_trivial:

stumpy/aamp_motifs.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import numpy as np
88

99
from . import core
10+
from .config import STUMPY_EXCL_ZONE_DENOM
1011

1112
logger = logging.getLogger(__name__)
1213

@@ -215,7 +216,7 @@ def aamp_motifs(
215216

216217
m = T.shape[-1] - P.shape[-1] + 1
217218
if excl_zone is None: # pragma: no cover
218-
excl_zone = int(np.ceil(m / 4))
219+
excl_zone = int(np.ceil(m / STUMPY_EXCL_ZONE_DENOM))
219220
if max_matches is None: # pragma: no cover
220221
max_matches = np.inf
221222
if cutoff is None: # pragma: no cover
@@ -298,16 +299,13 @@ def aamp_match(
298299
m = Q.shape[1]
299300

300301
if excl_zone is None: # pragma: no cover
301-
excl_zone = int(np.ceil(m / 4))
302+
excl_zone = int(np.ceil(m / STUMPY_EXCL_ZONE_DENOM))
302303
if max_matches is None: # pragma: no cover
303304
max_matches = np.inf
304305

305306
if np.any(np.isnan(Q)) or np.any(np.isinf(Q)): # pragma: no cover
306307
raise ValueError("Q contains illegal values (NaN or inf)")
307308

308-
if excl_zone is None: # pragma: no cover
309-
excl_zone = int(np.ceil(m / 4))
310-
311309
if max_distance is None: # pragma: no cover
312310

313311
def max_distance(D):

stumpy/aamped.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import numpy as np
88

99
from . import core
10+
from .config import STUMPY_EXCL_ZONE_DENOM
1011
from .aamp import _aamp
1112

1213
logger = logging.getLogger(__name__)
@@ -85,7 +86,7 @@ def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True):
8586
n_B = T_B.shape[0]
8687
l = n_A - m + 1
8788

88-
excl_zone = int(np.ceil(m / 4))
89+
excl_zone = int(np.ceil(m / STUMPY_EXCL_ZONE_DENOM))
8990
out = np.empty((l, 4), dtype=object)
9091

9192
hosts = list(dask_client.ncores().keys())

stumpy/aampi.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
# STUMPY is a trademark of TD Ameritrade IP Company, Inc. All rights reserved.
44

55
import numpy as np
6-
from stumpy import core
7-
import stumpy
6+
from . import core
7+
from .aamp import aamp
8+
from .config import STUMPY_EXCL_ZONE_DENOM
89

910

1011
class aampi:
@@ -92,10 +93,10 @@ def __init__(self, T, m, excl_zone=None, egress=True):
9293
if excl_zone is not None: # pragma: no cover
9394
self._excl_zone = excl_zone
9495
else:
95-
self._excl_zone = int(np.ceil(self._m / 4))
96+
self._excl_zone = int(np.ceil(self._m / STUMPY_EXCL_ZONE_DENOM))
9697
self._egress = egress
9798

98-
mp = stumpy.aamp(self._T, self._m)
99+
mp = aamp(self._T, self._m)
99100
self._P = mp[:, 0]
100101
self._I = mp[:, 1]
101102
self._left_I = mp[:, 2]

stumpy/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@
1313
STUMPY_TEST_PRECISION = 5
1414
STUMPY_MAX_SQUARED_DISTANCE = np.finfo(np.float64).max
1515
STUMPY_MAX_DISTANCE = np.sqrt(STUMPY_MAX_SQUARED_DISTANCE)
16+
STUMPY_EXCL_ZONE_DENOM = 4

stumpy/core.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,34 @@ def are_distances_too_small(a, threshold=10e-6): # pragma: no cover
373373
return False
374374

375375

376+
def get_max_window_size(n):
377+
"""
378+
Get the maximum window size for a self-join
379+
380+
Parameters
381+
----------
382+
n : int
383+
The length of the time series
384+
385+
Returns
386+
-------
387+
max_m : int
388+
The maximum window size allowed given `config.STUMPY_EXCL_ZONE_DENOM`
389+
"""
390+
max_m = (
391+
int(
392+
n
393+
- np.floor(
394+
(n + (config.STUMPY_EXCL_ZONE_DENOM - 1))
395+
// (config.STUMPY_EXCL_ZONE_DENOM + 1)
396+
)
397+
)
398+
- 1
399+
)
400+
401+
return max_m
402+
403+
376404
def check_window_size(m, max_size=None):
377405
"""
378406
Check the window size and ensure that it is greater than or equal to 3 and, if

stumpy/floss.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import scipy.stats
99

1010
from . import core
11+
from .config import STUMPY_EXCL_ZONE_DENOM
1112

1213

1314
def _nnmark(I):
@@ -519,7 +520,7 @@ def update(self, t):
519520
if not np.isfinite(t):
520521
self._finite_T[-1] = 0.0
521522
self._finite_Q[-1] = self._finite_T[-1]
522-
excl_zone = int(np.ceil(self._m / 4))
523+
excl_zone = int(np.ceil(self._m / STUMPY_EXCL_ZONE_DENOM))
523524
# Note that the start of the exclusion zone is relative to
524525
# the unchanging length of the matrix profile index
525526
zone_start = max(0, self._k - excl_zone)

stumpy/gpu_aamp.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,9 @@ def gpu_aamp(T_A, m, T_B=None, ignore_trivial=True, device_id=0):
444444
n = T_B.shape[0]
445445
k = T_A.shape[0] - m + 1
446446
l = n - m + 1
447-
excl_zone = int(np.ceil(m / 4)) # See Definition 3 and Figure 3
447+
excl_zone = int(
448+
np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)
449+
) # See Definition 3 and Figure 3
448450

449451
T_A_fname = core.array_to_temp_file(T_A)
450452
T_B_fname = core.array_to_temp_file(T_B)

stumpy/gpu_stump.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,9 @@ def gpu_stump(T_A, m, T_B=None, ignore_trivial=True, device_id=0, normalize=True
476476
n = T_B.shape[0]
477477
k = T_A.shape[0] - m + 1
478478
l = n - m + 1
479-
excl_zone = int(np.ceil(m / 4)) # See Definition 3 and Figure 3
479+
excl_zone = int(
480+
np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)
481+
) # See Definition 3 and Figure 3
480482

481483
T_A_fname = core.array_to_temp_file(T_A)
482484
T_B_fname = core.array_to_temp_file(T_B)

stumpy/maamp.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -619,7 +619,9 @@ def maamp(T, m, include=None, discords=False):
619619

620620
d, n = T_B.shape
621621
k = n - m + 1
622-
excl_zone = int(np.ceil(m / 4)) # See Definition 3 and Figure 3
622+
excl_zone = int(
623+
np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)
624+
) # See Definition 3 and Figure 3
623625

624626
P = np.empty((d, k), dtype="float64")
625627
I = np.empty((d, k), dtype="int64")

stumpy/maamped.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from .maamp import _maamp, _get_first_maamp_profile
1010
from .mstump import _get_multi_QT, _preprocess_include
1111
from . import core
12+
from .config import STUMPY_EXCL_ZONE_DENOM
1213

1314
logger = logging.getLogger(__name__)
1415

@@ -90,7 +91,9 @@ def maamped(dask_client, T, m, include=None, discords=False):
9091

9192
d, n = T_B.shape
9293
k = n - m + 1
93-
excl_zone = int(np.ceil(m / 4)) # See Definition 3 and Figure 3
94+
excl_zone = int(
95+
np.ceil(m / STUMPY_EXCL_ZONE_DENOM)
96+
) # See Definition 3 and Figure 3
9497

9598
P = np.empty((d, k), dtype="float64")
9699
I = np.empty((d, k), dtype="int64")

stumpy/motifs.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from .aamp_motifs import aamp_motifs, aamp_match
99

1010
from . import core
11+
from .config import STUMPY_EXCL_ZONE_DENOM
1112

1213
logger = logging.getLogger(__name__)
1314

@@ -225,7 +226,7 @@ def motifs(
225226

226227
m = T.shape[-1] - P.shape[-1] + 1
227228
if excl_zone is None: # pragma: no cover
228-
excl_zone = int(np.ceil(m / 4))
229+
excl_zone = int(np.ceil(m / STUMPY_EXCL_ZONE_DENOM))
229230
if max_matches is None: # pragma: no cover
230231
max_matches = np.inf
231232
if cutoff is None: # pragma: no cover
@@ -324,16 +325,13 @@ def match(
324325
m = Q.shape[1]
325326

326327
if excl_zone is None: # pragma: no cover
327-
excl_zone = int(np.ceil(m / 4))
328+
excl_zone = int(np.ceil(m / STUMPY_EXCL_ZONE_DENOM))
328329
if max_matches is None: # pragma: no cover
329330
max_matches = np.inf
330331

331332
if np.any(np.isnan(Q)) or np.any(np.isinf(Q)): # pragma: no cover
332333
raise ValueError("Q contains illegal values (NaN or inf)")
333334

334-
if excl_zone is None: # pragma: no cover
335-
excl_zone = int(np.ceil(m / 4))
336-
337335
if max_distance is None: # pragma: no cover
338336

339337
def max_distance(D):

stumpy/mstump.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from . import core
1313
from .maamp import maamp, maamp_subspace
14+
from .config import STUMPY_EXCL_ZONE_DENOM
1415

1516
logger = logging.getLogger(__name__)
1617

@@ -876,7 +877,9 @@ def mstump(T, m, include=None, discords=False, normalize=True):
876877

877878
d, n = T_B.shape
878879
k = n - m + 1
879-
excl_zone = int(np.ceil(m / 4)) # See Definition 3 and Figure 3
880+
excl_zone = int(
881+
np.ceil(m / STUMPY_EXCL_ZONE_DENOM)
882+
) # See Definition 3 and Figure 3
880883

881884
P = np.empty((d, k), dtype="float64")
882885
I = np.empty((d, k), dtype="int64")

stumpy/mstumped.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
)
1515
from . import core
1616
from .maamped import maamped
17+
from .config import STUMPY_EXCL_ZONE_DENOM
1718

1819
logger = logging.getLogger(__name__)
1920

@@ -99,7 +100,9 @@ def mstumped(dask_client, T, m, include=None, discords=False, normalize=True):
99100

100101
d, n = T_B.shape
101102
k = n - m + 1
102-
excl_zone = int(np.ceil(m / 4)) # See Definition 3 and Figure 3
103+
excl_zone = int(
104+
np.ceil(m / STUMPY_EXCL_ZONE_DENOM)
105+
) # See Definition 3 and Figure 3
103106

104107
P = np.empty((d, k), dtype="float64")
105108
I = np.empty((d, k), dtype="int64")

stumpy/scraamp.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from . import core
1111
from .aamp import _aamp
12-
from .config import STUMPY_D_SQUARED_THRESHOLD
12+
from .config import STUMPY_D_SQUARED_THRESHOLD, STUMPY_EXCL_ZONE_DENOM
1313

1414
logger = logging.getLogger(__name__)
1515

@@ -68,7 +68,8 @@ def _prescraamp(
6868
The subsequence index in `T_B` that corresponds to `Q`
6969
7070
s : int
71-
The sampling interval that defaults to `int(np.ceil(m / 4))`
71+
The sampling interval that defaults to
72+
`int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))`
7273
7374
squared_distance_profile : ndarray
7475
A reusable array to store the computed squared distance profile
@@ -175,7 +176,8 @@ def prescraamp(T_A, m, T_B=None, s=None):
175176
subsequence in T_A, its nearest neighbor in T_B will be recorded.
176177
177178
s : int, default None
178-
The sampling interval that defaults to `int(np.ceil(m / 4))`
179+
The sampling interval that defaults to
180+
`int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))`
179181
180182
Returns
181183
-------
@@ -199,7 +201,7 @@ def prescraamp(T_A, m, T_B=None, s=None):
199201

200202
if T_B is None:
201203
T_B = T_A
202-
excl_zone = int(np.ceil(m / 4))
204+
excl_zone = int(np.ceil(m / STUMPY_EXCL_ZONE_DENOM))
203205
else:
204206
excl_zone = None
205207

@@ -283,8 +285,9 @@ class scraamp:
283285
284286
s : int
285287
The size of the PreSCRIMP fixed interval. If `pre_scraamp=True` and `s=None`,
286-
then `s` will automatically be set to `s=int(np.ceil(m/4))`, the size of
287-
the exclusion zone.
288+
then `s` will automatically be set to
289+
`s=int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))`, the size of the exclusion
290+
zone.
288291
289292
Attributes
290293
----------
@@ -348,7 +351,8 @@ def __init__(
348351
349352
s : int, default None
350353
The size of the PreSCRIMP fixed interval. If `pre_scraamp=True` and
351-
`s=None`, then `s` will automatically be set to `s=int(np.ceil(m/4))`, the
354+
`s=None`, then `s` will automatically be set to
355+
`s=int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))`, the
352356
size of the exclusion zone.
353357
"""
354358
self._ignore_trivial = ignore_trivial
@@ -402,7 +406,7 @@ def __init__(
402406
self._P[:, :] = np.inf
403407
self._I[:, :] = -1
404408

405-
self._excl_zone = int(np.ceil(self._m / 4))
409+
self._excl_zone = int(np.ceil(self._m / STUMPY_EXCL_ZONE_DENOM))
406410

407411
if s is None:
408412
s = self._excl_zone
@@ -421,6 +425,12 @@ def __init__(
421425
self._diags = np.random.permutation(
422426
range(self._excl_zone + 1, self._n_A - self._m + 1)
423427
)
428+
if self._diags.shape[0] == 0: # pragma: no cover
429+
max_m = core.get_max_window_size(self._T_A.shape[0])
430+
raise ValueError(
431+
f"The window size, `m = {self._m}`, is too long for a self join. "
432+
f"Please try a value of `m <= {max_m}`"
433+
)
424434
else:
425435
self._diags = np.random.permutation(
426436
range(-(self._n_A - self._m + 1) + 1, self._n_B - self._m + 1)

0 commit comments

Comments
 (0)