diff --git a/.github/workflows/periodic_tests.yml b/.github/workflows/periodic_tests.yml index ef941711d7..4b9829b44e 100644 --- a/.github/workflows/periodic_tests.yml +++ b/.github/workflows/periodic_tests.yml @@ -178,7 +178,7 @@ jobs: if: runner.os == 'Linux' uses: pierotofy/set-swap-space@v1.0 with: - swap-size-gb: 8 + swap-size-gb: 4 - name: Use numba cache to set env variables but not restore cache uses: ./.github/actions/numba_cache diff --git a/.github/workflows/pr_pytest.yml b/.github/workflows/pr_pytest.yml index 77ff1d2f58..6aba9e83c4 100644 --- a/.github/workflows/pr_pytest.yml +++ b/.github/workflows/pr_pytest.yml @@ -76,7 +76,7 @@ jobs: if: runner.os == 'Linux' uses: pierotofy/set-swap-space@v1.0 with: - swap-size-gb: 8 + swap-size-gb: 4 - if: ${{ github.event_name != 'pull_request' || !contains(github.event.pull_request.labels.*.name, 'no numba cache') }} name: Restore numba cache diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 590c4f9342..02ebeea529 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -69,7 +69,7 @@ jobs: if: runner.os == 'Linux' uses: pierotofy/set-swap-space@v1.0 with: - swap-size-gb: 8 + swap-size-gb: 4 - uses: actions/download-artifact@v4 with: diff --git a/aeon/anomaly_detection/series/distance_based/_rockad.py b/aeon/anomaly_detection/series/distance_based/_rockad.py index 696392286c..1fd0fa8f30 100644 --- a/aeon/anomaly_detection/series/distance_based/_rockad.py +++ b/aeon/anomaly_detection/series/distance_based/_rockad.py @@ -1,5 +1,6 @@ """ROCKAD anomaly detector.""" +__maintainer__ = [] __all__ = ["ROCKAD"] import warnings @@ -17,14 +18,16 @@ class ROCKAD(BaseSeriesAnomalyDetector): """ - ROCKET-based Anomaly Detector (ROCKAD). + ROCKET-based Semi-Supervised Anomaly Detector (ROCKAD). + Adapted ROCKAD [1]_ version to detect anomalies on time-points. ROCKAD leverages the ROCKET transformation for feature extraction from time series data and applies the scikit learn k-nearest neighbors (k-NN) approach with bootstrap aggregation for robust anomaly detection. After windowing, the data gets transformed into the ROCKET feature space. Then the windows are compared based on the feature space by - finding the nearest neighbours. + finding the nearest neighbours. Whole-series based ROCKAD as proposed in + [1]_ can be found at aeon/anomaly_detection/collection/_rockad.py This class supports both univariate and multivariate time series and provides options for normalizing features, applying power transformations, @@ -61,6 +64,31 @@ class ROCKAD(BaseSeriesAnomalyDetector): List containing k-NN estimators used for anomaly scoring, set after fitting. power_transformer_ : PowerTransformer Transformer used to apply power transformation to the features. + + References + ---------- + .. [1] Theissler, A., Wengert, M., Gerschner, F. (2023). + ROCKAD: Transferring ROCKET to Whole Time Series Anomaly Detection. + In: Crémilleux, B., Hess, S., Nijssen, S. (eds) Advances in Intelligent + Data Analysis XXI. IDA 2023. Lecture Notes in Computer Science, + vol 13876. Springer, Cham. https://doi.org/10.1007/978-3-031-30047-9_33 + + Examples + -------- + >>> import numpy as np + >>> from aeon.anomaly_detection.series.distance_based import ROCKAD + >>> rng = np.random.default_rng(seed=42) + >>> X_train = rng.normal(loc=0.0, scale=1.0, size=(1000,)) + >>> X_test = rng.normal(loc=0.0, scale=1.0, size=(20,)) + >>> X_test[15:20] -= 5 + >>> detector = ROCKAD(window_size=15,n_estimators=10,n_kernels=10,n_neighbors=3) + >>> detector.fit(X_train) + ROCKAD(...) + >>> detector.predict(X_test) + array([0. , 0.00554713, 0.0699094 , 0.22881059, 0.32382585, + 0.43652154, 0.43652154, 0.43652154, 0.43652154, 0.43652154, + 0.43652154, 0.43652154, 0.43652154, 0.43652154, 0.43652154, + 0.52382585, 0.65200875, 0.80313368, 0.85194345, 1. ]) """ _tags = { @@ -86,7 +114,6 @@ def __init__( n_jobs=1, random_state=42, ): - self.n_estimators = n_estimators self.n_kernels = n_kernels self.normalise = normalise @@ -136,7 +163,6 @@ def _check_params(self, X: np.ndarray) -> None: ) def _inner_fit(self, X: np.ndarray) -> None: - self.rocket_transformer_ = Rocket( n_kernels=self.n_kernels, normalise=self.normalise, @@ -189,7 +215,6 @@ def _inner_fit(self, X: np.ndarray) -> None: self.list_baggers_.append(estimator) def _predict(self, X) -> np.ndarray: - _X, padding = sliding_windows( X, window_size=self.window_size, stride=self.stride, axis=0 ) @@ -209,22 +234,8 @@ def _fit_predict(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndar return point_anomaly_scores def _inner_predict(self, X: np.ndarray, padding: int) -> np.ndarray: - - anomaly_scores = self._predict_proba(X) - - point_anomaly_scores = reverse_windowing( - anomaly_scores, self.window_size, np.nanmean, self.stride, padding - ) - - point_anomaly_scores = (point_anomaly_scores - point_anomaly_scores.min()) / ( - point_anomaly_scores.max() - point_anomaly_scores.min() - ) - - return point_anomaly_scores - - def _predict_proba(self, X): """ - Predicts the probability of anomalies for the input data. + Predict the anomaly score for each time-point in the input data. Parameters ---------- @@ -259,6 +270,14 @@ def _predict_proba(self, X): y_scores[:, idx] = scores # Average the scores to get the final score for each time series - y_scores = y_scores.mean(axis=1) + anomaly_scores = y_scores.mean(axis=1) - return y_scores + point_anomaly_scores = reverse_windowing( + anomaly_scores, self.window_size, np.nanmean, self.stride, padding + ) + + point_anomaly_scores = (point_anomaly_scores - point_anomaly_scores.min()) / ( + point_anomaly_scores.max() - point_anomaly_scores.min() + ) + + return point_anomaly_scores diff --git a/aeon/anomaly_detection/series/distance_based/tests/test_rockad.py b/aeon/anomaly_detection/series/distance_based/tests/test_rockad.py index 51d2425505..16f60edff7 100644 --- a/aeon/anomaly_detection/series/distance_based/tests/test_rockad.py +++ b/aeon/anomaly_detection/series/distance_based/tests/test_rockad.py @@ -10,8 +10,9 @@ def test_rockad_univariate(): """Test ROCKAD univariate output.""" rng = check_random_state(seed=2) - series = rng.normal(size=(100,)) - series[50:58] -= 5 + train_series = rng.normal(size=(100,)) + test_series = rng.normal(size=(100,)) + test_series[50:58] -= 5 ad = ROCKAD( n_estimators=100, @@ -22,7 +23,8 @@ def test_rockad_univariate(): stride=1, ) - pred = ad.fit_predict(series, axis=0) + ad.fit(train_series, axis=0) + pred = ad.predict(test_series, axis=0) assert pred.shape == (100,) assert pred.dtype == np.float64 @@ -32,9 +34,10 @@ def test_rockad_univariate(): def test_rockad_multivariate(): """Test ROCKAD multivariate output.""" rng = check_random_state(seed=2) - series = rng.normal(size=(100, 3)) - series[50:58, 0] -= 5 - series[87:90, 1] += 0.1 + train_series = rng.normal(size=(100, 3)) + test_series = rng.normal(size=(100, 3)) + test_series[50:58, 0] -= 5 + test_series[87:90, 1] += 0.1 ad = ROCKAD( n_estimators=1000, @@ -45,7 +48,8 @@ def test_rockad_multivariate(): stride=1, ) - pred = ad.fit_predict(series, axis=0) + ad.fit(train_series, axis=0) + pred = ad.predict(test_series, axis=0) assert pred.shape == (100,) assert pred.dtype == np.float64 @@ -55,21 +59,28 @@ def test_rockad_multivariate(): def test_rockad_incorrect_input(): """Test ROCKAD incorrect input.""" rng = check_random_state(seed=2) - series = rng.normal(size=(100,)) + train_series = rng.normal(size=(100,)) + test_series = rng.normal(size=(5,)) with pytest.raises(ValueError, match="The window size must be at least 1"): ad = ROCKAD(window_size=0) - ad.fit_predict(series) + ad.fit(train_series) with pytest.raises(ValueError, match="The stride must be at least 1"): ad = ROCKAD(stride=0) - ad.fit_predict(series) + ad.fit(train_series) with pytest.raises( ValueError, match=r"Window count .* has to be larger than n_neighbors .*" ): ad = ROCKAD(stride=1, window_size=100) - ad.fit_predict(series) + ad.fit(train_series) with pytest.warns( UserWarning, match=r"Power Transform failed and thus has been disabled." ): ad = ROCKAD(stride=1, window_size=5) - ad.fit_predict(series) + ad.fit(train_series) + with pytest.raises( + ValueError, match=r"window shape cannot be larger than input array shape" + ): + ad = ROCKAD(stride=1, window_size=10) + ad.fit(train_series) + ad.predict(test_series)