Skip to content

Commit a163862

Browse files
[ENH, REF]Refactored time-point based ROCKAD implementation (#2804)
* mockup * init * decorator * correct import and tag * base docs * wrappers * tests * docs and imports * test params * register * big refactor * smoothing refactor * Revert "smoothing refactor" This reverts commit c245ccb. * fixes * final bits and docs for refactor * move predict_proba logic into inner_predict for code consistency * Added reference and example code, updated description * Adapted tests to be semi-supervised * Added semi-supervised labeling to description * moved Attributes above References and Examples, updated path of collection based rockad implementation * merge 2 * CI errors * maintainer * doctest --------- Co-authored-by: MatthewMiddlehurst <m.middlehurst@uea.ac.uk> Co-authored-by: MatthewMiddlehurst <pfm15hbu@gmail.com>
1 parent 620979e commit a163862

File tree

5 files changed

+67
-37
lines changed

5 files changed

+67
-37
lines changed

.github/workflows/periodic_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ jobs:
178178
if: runner.os == 'Linux'
179179
uses: pierotofy/set-swap-space@v1.0
180180
with:
181-
swap-size-gb: 8
181+
swap-size-gb: 4
182182

183183
- name: Use numba cache to set env variables but not restore cache
184184
uses: ./.github/actions/numba_cache

.github/workflows/pr_pytest.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ jobs:
7676
if: runner.os == 'Linux'
7777
uses: pierotofy/set-swap-space@v1.0
7878
with:
79-
swap-size-gb: 8
79+
swap-size-gb: 4
8080

8181
- if: ${{ github.event_name != 'pull_request' || !contains(github.event.pull_request.labels.*.name, 'no numba cache') }}
8282
name: Restore numba cache

.github/workflows/release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ jobs:
6969
if: runner.os == 'Linux'
7070
uses: pierotofy/set-swap-space@v1.0
7171
with:
72-
swap-size-gb: 8
72+
swap-size-gb: 4
7373

7474
- uses: actions/download-artifact@v4
7575
with:

aeon/anomaly_detection/series/distance_based/_rockad.py

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""ROCKAD anomaly detector."""
22

3+
__maintainer__ = []
34
__all__ = ["ROCKAD"]
45

56
import warnings
@@ -17,14 +18,16 @@
1718

1819
class ROCKAD(BaseSeriesAnomalyDetector):
1920
"""
20-
ROCKET-based Anomaly Detector (ROCKAD).
21+
ROCKET-based Semi-Supervised Anomaly Detector (ROCKAD).
2122
23+
Adapted ROCKAD [1]_ version to detect anomalies on time-points.
2224
ROCKAD leverages the ROCKET transformation for feature extraction from
2325
time series data and applies the scikit learn k-nearest neighbors (k-NN)
2426
approach with bootstrap aggregation for robust anomaly detection.
2527
After windowing, the data gets transformed into the ROCKET feature space.
2628
Then the windows are compared based on the feature space by
27-
finding the nearest neighbours.
29+
finding the nearest neighbours. Whole-series based ROCKAD as proposed in
30+
[1]_ can be found at aeon/anomaly_detection/collection/_rockad.py
2831
2932
This class supports both univariate and multivariate time series and
3033
provides options for normalizing features, applying power transformations,
@@ -61,6 +64,31 @@ class ROCKAD(BaseSeriesAnomalyDetector):
6164
List containing k-NN estimators used for anomaly scoring, set after fitting.
6265
power_transformer_ : PowerTransformer
6366
Transformer used to apply power transformation to the features.
67+
68+
References
69+
----------
70+
.. [1] Theissler, A., Wengert, M., Gerschner, F. (2023).
71+
ROCKAD: Transferring ROCKET to Whole Time Series Anomaly Detection.
72+
In: Crémilleux, B., Hess, S., Nijssen, S. (eds) Advances in Intelligent
73+
Data Analysis XXI. IDA 2023. Lecture Notes in Computer Science,
74+
vol 13876. Springer, Cham. https://doi.org/10.1007/978-3-031-30047-9_33
75+
76+
Examples
77+
--------
78+
>>> import numpy as np
79+
>>> from aeon.anomaly_detection.series.distance_based import ROCKAD
80+
>>> rng = np.random.default_rng(seed=42)
81+
>>> X_train = rng.normal(loc=0.0, scale=1.0, size=(1000,))
82+
>>> X_test = rng.normal(loc=0.0, scale=1.0, size=(20,))
83+
>>> X_test[15:20] -= 5
84+
>>> detector = ROCKAD(window_size=15,n_estimators=10,n_kernels=10,n_neighbors=3)
85+
>>> detector.fit(X_train)
86+
ROCKAD(...)
87+
>>> detector.predict(X_test)
88+
array([0. , 0.00554713, 0.0699094 , 0.22881059, 0.32382585,
89+
0.43652154, 0.43652154, 0.43652154, 0.43652154, 0.43652154,
90+
0.43652154, 0.43652154, 0.43652154, 0.43652154, 0.43652154,
91+
0.52382585, 0.65200875, 0.80313368, 0.85194345, 1. ])
6492
"""
6593

6694
_tags = {
@@ -86,7 +114,6 @@ def __init__(
86114
n_jobs=1,
87115
random_state=42,
88116
):
89-
90117
self.n_estimators = n_estimators
91118
self.n_kernels = n_kernels
92119
self.normalise = normalise
@@ -136,7 +163,6 @@ def _check_params(self, X: np.ndarray) -> None:
136163
)
137164

138165
def _inner_fit(self, X: np.ndarray) -> None:
139-
140166
self.rocket_transformer_ = Rocket(
141167
n_kernels=self.n_kernels,
142168
normalise=self.normalise,
@@ -189,7 +215,6 @@ def _inner_fit(self, X: np.ndarray) -> None:
189215
self.list_baggers_.append(estimator)
190216

191217
def _predict(self, X) -> np.ndarray:
192-
193218
_X, padding = sliding_windows(
194219
X, window_size=self.window_size, stride=self.stride, axis=0
195220
)
@@ -209,22 +234,8 @@ def _fit_predict(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndar
209234
return point_anomaly_scores
210235

211236
def _inner_predict(self, X: np.ndarray, padding: int) -> np.ndarray:
212-
213-
anomaly_scores = self._predict_proba(X)
214-
215-
point_anomaly_scores = reverse_windowing(
216-
anomaly_scores, self.window_size, np.nanmean, self.stride, padding
217-
)
218-
219-
point_anomaly_scores = (point_anomaly_scores - point_anomaly_scores.min()) / (
220-
point_anomaly_scores.max() - point_anomaly_scores.min()
221-
)
222-
223-
return point_anomaly_scores
224-
225-
def _predict_proba(self, X):
226237
"""
227-
Predicts the probability of anomalies for the input data.
238+
Predict the anomaly score for each time-point in the input data.
228239
229240
Parameters
230241
----------
@@ -259,6 +270,14 @@ def _predict_proba(self, X):
259270
y_scores[:, idx] = scores
260271

261272
# Average the scores to get the final score for each time series
262-
y_scores = y_scores.mean(axis=1)
273+
anomaly_scores = y_scores.mean(axis=1)
263274

264-
return y_scores
275+
point_anomaly_scores = reverse_windowing(
276+
anomaly_scores, self.window_size, np.nanmean, self.stride, padding
277+
)
278+
279+
point_anomaly_scores = (point_anomaly_scores - point_anomaly_scores.min()) / (
280+
point_anomaly_scores.max() - point_anomaly_scores.min()
281+
)
282+
283+
return point_anomaly_scores

aeon/anomaly_detection/series/distance_based/tests/test_rockad.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@
1010
def test_rockad_univariate():
1111
"""Test ROCKAD univariate output."""
1212
rng = check_random_state(seed=2)
13-
series = rng.normal(size=(100,))
14-
series[50:58] -= 5
13+
train_series = rng.normal(size=(100,))
14+
test_series = rng.normal(size=(100,))
15+
test_series[50:58] -= 5
1516

1617
ad = ROCKAD(
1718
n_estimators=100,
@@ -22,7 +23,8 @@ def test_rockad_univariate():
2223
stride=1,
2324
)
2425

25-
pred = ad.fit_predict(series, axis=0)
26+
ad.fit(train_series, axis=0)
27+
pred = ad.predict(test_series, axis=0)
2628

2729
assert pred.shape == (100,)
2830
assert pred.dtype == np.float64
@@ -32,9 +34,10 @@ def test_rockad_univariate():
3234
def test_rockad_multivariate():
3335
"""Test ROCKAD multivariate output."""
3436
rng = check_random_state(seed=2)
35-
series = rng.normal(size=(100, 3))
36-
series[50:58, 0] -= 5
37-
series[87:90, 1] += 0.1
37+
train_series = rng.normal(size=(100, 3))
38+
test_series = rng.normal(size=(100, 3))
39+
test_series[50:58, 0] -= 5
40+
test_series[87:90, 1] += 0.1
3841

3942
ad = ROCKAD(
4043
n_estimators=1000,
@@ -45,7 +48,8 @@ def test_rockad_multivariate():
4548
stride=1,
4649
)
4750

48-
pred = ad.fit_predict(series, axis=0)
51+
ad.fit(train_series, axis=0)
52+
pred = ad.predict(test_series, axis=0)
4953

5054
assert pred.shape == (100,)
5155
assert pred.dtype == np.float64
@@ -55,21 +59,28 @@ def test_rockad_multivariate():
5559
def test_rockad_incorrect_input():
5660
"""Test ROCKAD incorrect input."""
5761
rng = check_random_state(seed=2)
58-
series = rng.normal(size=(100,))
62+
train_series = rng.normal(size=(100,))
63+
test_series = rng.normal(size=(5,))
5964

6065
with pytest.raises(ValueError, match="The window size must be at least 1"):
6166
ad = ROCKAD(window_size=0)
62-
ad.fit_predict(series)
67+
ad.fit(train_series)
6368
with pytest.raises(ValueError, match="The stride must be at least 1"):
6469
ad = ROCKAD(stride=0)
65-
ad.fit_predict(series)
70+
ad.fit(train_series)
6671
with pytest.raises(
6772
ValueError, match=r"Window count .* has to be larger than n_neighbors .*"
6873
):
6974
ad = ROCKAD(stride=1, window_size=100)
70-
ad.fit_predict(series)
75+
ad.fit(train_series)
7176
with pytest.warns(
7277
UserWarning, match=r"Power Transform failed and thus has been disabled."
7378
):
7479
ad = ROCKAD(stride=1, window_size=5)
75-
ad.fit_predict(series)
80+
ad.fit(train_series)
81+
with pytest.raises(
82+
ValueError, match=r"window shape cannot be larger than input array shape"
83+
):
84+
ad = ROCKAD(stride=1, window_size=10)
85+
ad.fit(train_series)
86+
ad.predict(test_series)

0 commit comments

Comments
 (0)