From 775e1e88d3158cd248accbf19781aed1fc12dafb Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Fri, 8 Nov 2024 16:12:31 +0000 Subject: [PATCH 01/24] mockup --- .../whole_series/__init__.py | 1 + .../whole_series/_outlier_detection.py | 34 +++++++ aeon/anomaly_detection/whole_series/base.py | 92 +++++++++++++++++++ ...eld_collection_anomaly_detection_checks.py | 9 ++ .../_yield_estimator_checks.py | 9 ++ aeon/testing/testing_data.py | 2 + aeon/utils/base/_register.py | 2 + 7 files changed, 149 insertions(+) create mode 100644 aeon/anomaly_detection/whole_series/__init__.py create mode 100644 aeon/anomaly_detection/whole_series/_outlier_detection.py create mode 100644 aeon/anomaly_detection/whole_series/base.py create mode 100644 aeon/testing/estimator_checking/_yield_collection_anomaly_detection_checks.py diff --git a/aeon/anomaly_detection/whole_series/__init__.py b/aeon/anomaly_detection/whole_series/__init__.py new file mode 100644 index 0000000000..68f149dc2e --- /dev/null +++ b/aeon/anomaly_detection/whole_series/__init__.py @@ -0,0 +1 @@ +"""Whole-series anomaly detection methods.""" diff --git a/aeon/anomaly_detection/whole_series/_outlier_detection.py b/aeon/anomaly_detection/whole_series/_outlier_detection.py new file mode 100644 index 0000000000..a228b6b644 --- /dev/null +++ b/aeon/anomaly_detection/whole_series/_outlier_detection.py @@ -0,0 +1,34 @@ +"""Basic outlier detection classifier.""" + +from aeon.anomaly_detection import IsolationForest +from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector +from aeon.base._base import _clone_estimator + + +class OutlierDetectionClassifier(BaseCollectionAnomalyDetector): + """Basic outlier detection classifier.""" + + _tags = { + "X_inner_type": "numpy2D", + } + + def __init__(self, estimator, random_state=None): + self.estimator = estimator + self.random_state = random_state + + super().__init__() + + def _fit(self, X, y=None): + self.estimator_ = _clone_estimator( + self.estimator, random_state=self.random_state + ) + self.estimator_.fit(X, y) + return self + + def _predict(self, X): + pred = self.estimator_.predict(X) + pred[pred == -1] = 0 + return pred + + def _get_test_params(cls, parameter_set="default"): + return {"estimator": IsolationForest()} diff --git a/aeon/anomaly_detection/whole_series/base.py b/aeon/anomaly_detection/whole_series/base.py new file mode 100644 index 0000000000..305053795c --- /dev/null +++ b/aeon/anomaly_detection/whole_series/base.py @@ -0,0 +1,92 @@ +"""Abstract base class for whole-series/collection anomaly detectors.""" + +__maintainer__ = ["MatthewMiddlehurst"] +__all__ = ["BaseCollectionAnomalyDetector"] + +from abc import abstractmethod +from typing import final + +import numpy as np +import pandas as pd + +from aeon.base import BaseCollectionEstimator + + +class BaseCollectionAnomalyDetector(BaseCollectionEstimator): + """Collection anomaly detector base class.""" + + _tags = { + "fit_is_empty": False, + "requires_y": False, + } + + def __init__(self): + super().__init__() + + @final + def fit(self, X, y=None): + """Fit.""" + if self.get_tag("fit_is_empty"): + self.is_fitted = True + return self + + if self.get_tag("requires_y"): + if y is None: + raise ValueError("Tag requires_y is true, but fit called with y=None") + + # reset estimator at the start of fit + self.reset() + + X = self._preprocess_collection(X) + if y is not None: + y = self._check_y(y, self.metadata_["n_cases"]) + + self._fit(X, y) + + # this should happen last + self.is_fitted = True + return self + + @final + def predict(self, X): + """Predict.""" + fit_empty = self.get_tag("fit_is_empty") + if not fit_empty: + self._check_is_fitted() + + X = self._preprocess_collection(X, store_metadata=False) + # Check if X has the correct shape seen during fitting + self._check_shape(X) + + return self._predict(X) + + @abstractmethod + def _fit(self, X, y=None): ... + + @abstractmethod + def _predict(self, X): ... + + def _check_y(self, y, n_cases): + if not isinstance(y, (pd.Series, np.ndarray)): + raise TypeError( + f"y must be a np.array or a pd.Series, but found type: {type(y)}" + ) + if isinstance(y, np.ndarray) and y.ndim > 1: + raise TypeError(f"y must be 1-dimensional, found {y.ndim} dimensions") + + if not all([x == 0 or x == 1 for x in y]): + raise ValueError( + "y input must only contain 0 (not anomalous) or 1 (anomalous) values." + ) + + # Check matching number of labels + n_labels = y.shape[0] + if n_cases != n_labels: + raise ValueError( + f"Mismatch in number of cases. Found X = {n_cases} and y = {n_labels}" + ) + + if isinstance(y, pd.Series): + y = pd.Series.to_numpy(y) + + return y diff --git a/aeon/testing/estimator_checking/_yield_collection_anomaly_detection_checks.py b/aeon/testing/estimator_checking/_yield_collection_anomaly_detection_checks.py new file mode 100644 index 0000000000..40f7fe7aaf --- /dev/null +++ b/aeon/testing/estimator_checking/_yield_collection_anomaly_detection_checks.py @@ -0,0 +1,9 @@ +"""Tests for all collection anomaly detectors.""" + + +def _yield_collection_anomaly_detection_checks( + estimator_class, estimator_instances, datatypes +): + """Yield all collection anomaly detection checks for an aeon estimator.""" + # nothing currently! + return [] diff --git a/aeon/testing/estimator_checking/_yield_estimator_checks.py b/aeon/testing/estimator_checking/_yield_estimator_checks.py index 20664bea73..0a1aecd9f8 100644 --- a/aeon/testing/estimator_checking/_yield_estimator_checks.py +++ b/aeon/testing/estimator_checking/_yield_estimator_checks.py @@ -14,6 +14,7 @@ from sklearn.utils.estimator_checks import check_get_params_invariance from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector from aeon.base import BaseAeonEstimator from aeon.base._base import _clone_estimator from aeon.classification import BaseClassifier @@ -34,6 +35,9 @@ from aeon.testing.estimator_checking._yield_clustering_checks import ( _yield_clustering_checks, ) +from aeon.testing.estimator_checking._yield_collection_anomaly_detection_checks import ( + _yield_collection_anomaly_detection_checks, +) from aeon.testing.estimator_checking._yield_collection_transformation_checks import ( _yield_collection_transformation_checks, ) @@ -152,6 +156,11 @@ def _yield_all_aeon_checks( estimator_class, estimator_instances, datatypes ) + if issubclass(estimator_class, BaseCollectionAnomalyDetector): + yield from _yield_collection_anomaly_detection_checks( + estimator_class, estimator_instances, datatypes + ) + if issubclass(estimator_class, BaseSimilaritySearch): yield from _yield_similarity_search_checks( estimator_class, estimator_instances, datatypes diff --git a/aeon/testing/testing_data.py b/aeon/testing/testing_data.py index 55b9092443..70bc12908a 100644 --- a/aeon/testing/testing_data.py +++ b/aeon/testing/testing_data.py @@ -3,6 +3,7 @@ import numpy as np from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector from aeon.base import BaseCollectionEstimator, BaseSeriesEstimator from aeon.classification import BaseClassifier from aeon.classification.early_classification import BaseEarlyClassifier @@ -821,6 +822,7 @@ def _get_label_type_for_estimator(estimator): or isinstance(estimator, BaseClusterer) or isinstance(estimator, BaseCollectionTransformer) or isinstance(estimator, BaseSimilaritySearch) + or isinstance(estimator, BaseCollectionAnomalyDetector) ): label_type = "Classification" elif isinstance(estimator, BaseRegressor): diff --git a/aeon/utils/base/_register.py b/aeon/utils/base/_register.py index 1327d626ef..fce96c6ca2 100644 --- a/aeon/utils/base/_register.py +++ b/aeon/utils/base/_register.py @@ -17,6 +17,7 @@ from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector from aeon.base import BaseAeonEstimator, BaseCollectionEstimator, BaseSeriesEstimator from aeon.classification.base import BaseClassifier from aeon.classification.early_classification import BaseEarlyClassifier @@ -37,6 +38,7 @@ "transformer": BaseTransformer, # estimator types "anomaly-detector": BaseAnomalyDetector, + "collection-anomaly-detector": BaseCollectionAnomalyDetector, "collection-transformer": BaseCollectionTransformer, "classifier": BaseClassifier, "clusterer": BaseClusterer, From ba1c55e3fc65936aee4b3c84647cf8e100b9dcaf Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Fri, 8 Nov 2024 16:19:25 +0000 Subject: [PATCH 02/24] init --- aeon/anomaly_detection/whole_series/__init__.py | 10 ++++++++++ aeon/utils/base/_register.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/aeon/anomaly_detection/whole_series/__init__.py b/aeon/anomaly_detection/whole_series/__init__.py index 68f149dc2e..14d0e5c466 100644 --- a/aeon/anomaly_detection/whole_series/__init__.py +++ b/aeon/anomaly_detection/whole_series/__init__.py @@ -1 +1,11 @@ """Whole-series anomaly detection methods.""" + +__all__ = [ + "BaseCollectionAnomalyDetector", + "OutlierDetectionClassifier", +] + +from aeon.anomaly_detection.whole_series._outlier_detection import ( + OutlierDetectionClassifier, +) +from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector diff --git a/aeon/utils/base/_register.py b/aeon/utils/base/_register.py index fce96c6ca2..1b986b1bf9 100644 --- a/aeon/utils/base/_register.py +++ b/aeon/utils/base/_register.py @@ -38,7 +38,7 @@ "transformer": BaseTransformer, # estimator types "anomaly-detector": BaseAnomalyDetector, - "collection-anomaly-detector": BaseCollectionAnomalyDetector, + "collection_anomaly_detector": BaseCollectionAnomalyDetector, "collection-transformer": BaseCollectionTransformer, "classifier": BaseClassifier, "clusterer": BaseClusterer, From 8eb67c883ebb00f19d23743dee6ec6c6b4aadab7 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Fri, 8 Nov 2024 16:33:13 +0000 Subject: [PATCH 03/24] decorator --- aeon/anomaly_detection/whole_series/_outlier_detection.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aeon/anomaly_detection/whole_series/_outlier_detection.py b/aeon/anomaly_detection/whole_series/_outlier_detection.py index a228b6b644..7c415587ff 100644 --- a/aeon/anomaly_detection/whole_series/_outlier_detection.py +++ b/aeon/anomaly_detection/whole_series/_outlier_detection.py @@ -30,5 +30,6 @@ def _predict(self, X): pred[pred == -1] = 0 return pred + @classmethod def _get_test_params(cls, parameter_set="default"): return {"estimator": IsolationForest()} From a6d33abbe25089f6c01f68f7c628f613abe5be30 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Fri, 8 Nov 2024 17:10:27 +0000 Subject: [PATCH 04/24] correct import and tag --- aeon/anomaly_detection/whole_series/_outlier_detection.py | 5 +++-- aeon/utils/tags/_tags.py | 7 ++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/aeon/anomaly_detection/whole_series/_outlier_detection.py b/aeon/anomaly_detection/whole_series/_outlier_detection.py index 7c415587ff..53ac4bea29 100644 --- a/aeon/anomaly_detection/whole_series/_outlier_detection.py +++ b/aeon/anomaly_detection/whole_series/_outlier_detection.py @@ -1,6 +1,7 @@ """Basic outlier detection classifier.""" -from aeon.anomaly_detection import IsolationForest +from sklearn.ensemble import IsolationForest + from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector from aeon.base._base import _clone_estimator @@ -32,4 +33,4 @@ def _predict(self, X): @classmethod def _get_test_params(cls, parameter_set="default"): - return {"estimator": IsolationForest()} + return {"estimator": IsolationForest(n_estimators=3)} diff --git a/aeon/utils/tags/_tags.py b/aeon/utils/tags/_tags.py index 650a7c4dc4..75fc77223b 100644 --- a/aeon/utils/tags/_tags.py +++ b/aeon/utils/tags/_tags.py @@ -130,7 +130,12 @@ class : identifier for the base class of objects this tag applies to "point belongs to.", }, "requires_y": { - "class": ["transformer", "anomaly-detector", "segmenter"], + "class": [ + "transformer", + "anomaly-detector", + "collection_anomaly_detector", + "segmenter", + ], "type": "bool", "description": "Does this estimator require y to be passed in its methods?", }, From 2db96f26237c9d311acebb6702e87459ba464687 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Mon, 14 Apr 2025 14:29:22 +0100 Subject: [PATCH 05/24] base docs --- aeon/anomaly_detection/whole_series/base.py | 115 +++++++++++++++++++- aeon/classification/base.py | 2 - 2 files changed, 111 insertions(+), 6 deletions(-) diff --git a/aeon/anomaly_detection/whole_series/base.py b/aeon/anomaly_detection/whole_series/base.py index 305053795c..9194ae557b 100644 --- a/aeon/anomaly_detection/whole_series/base.py +++ b/aeon/anomaly_detection/whole_series/base.py @@ -1,4 +1,27 @@ -"""Abstract base class for whole-series/collection anomaly detectors.""" +""" +Abstract base class for whole-series/collection anomaly detectors. + + class name: BaseCollectionAnomalyDetector + +Defining methods: + fitting - fit(self, X, y) + predicting - predict(self, X) + +Data validation: + data processing - _preprocess_collection(self, X, store_metadata=True) + shape verification - _check_shape(self, X) + +State: + fitted model/strategy - by convention, any attributes ending in "_" + fitted state flag - is_fitted + train input metadata - metadata_ + resetting state - reset(self) + +Tags: + default estimator tags - _tags + tag retrieval - get_tag(self, tag_name) + tag setting - set_tag(self, tag_name, value) +""" __maintainer__ = ["MatthewMiddlehurst"] __all__ = ["BaseCollectionAnomalyDetector"] @@ -13,7 +36,23 @@ class BaseCollectionAnomalyDetector(BaseCollectionEstimator): - """Collection anomaly detector base class.""" + """ + Abstract base class for collection anomaly detectors. + + The base detector specifies the methods and method signatures that all + collection anomaly detectors have to implement. Attributes with an underscore + suffix are set in the method fit. + + Attributes + ---------- + is_fitted : bool + True if the estimator has been fitted, False otherwise. + Unused if ``"fit_is_empty"`` tag is set to True. + metadata_ : dict + Dictionary containing metadata about the `fit` input data. + _tags_dynamic : dict + Dictionary containing dynamic tag values which have been set at runtime. + """ _tags = { "fit_is_empty": False, @@ -25,7 +64,42 @@ def __init__(self): @final def fit(self, X, y=None): - """Fit.""" + """Fit collection anomaly detector to training data. + + Parameters + ---------- + X : np.ndarray or list + Input data, any number of channels, equal length series of shape ``( + n_cases, n_channels, n_timepoints)`` + or 2D np.array (univariate, equal length series) of shape + ``(n_cases, n_timepoints)`` + or list of numpy arrays (any number of channels, unequal length series) + of shape ``[n_cases]``, 2D np.array ``(n_channels, n_timepoints_i)``, + where ``n_timepoints_i`` is length of series ``i``. Other types are + allowed and converted into one of the above. + + Different estimators have different capabilities to handle different + types of input. If ``self.get_tag("capability:multivariate")`` is False, + they cannot handle multivariate series, so either ``n_channels == 1`` is + true or X is 2D of shape ``(n_cases, n_timepoints)``. If ``self.get_tag( + "capability:unequal_length")`` is False, they cannot handle unequal + length input. In both situations, a ``ValueError`` is raised if X has a + characteristic that the estimator does not have the capability for is + passed. + y : np.ndarray + 1D np.array of int, of shape ``(n_cases)`` - anomaly labels + (ground truth) for fitting indices corresponding to instance indices in X. + + Returns + ------- + self : BaseCollectionAnomalyDetector + Reference to self. + + Notes + ----- + Changes state by creating a fitted model that updates attributes + ending in "_" and sets is_fitted flag to True. + """ if self.get_tag("fit_is_empty"): self.is_fitted = True return self @@ -49,7 +123,35 @@ def fit(self, X, y=None): @final def predict(self, X): - """Predict.""" + """Predicts anomalies for time series in X. + + Parameters + ---------- + X : np.ndarray or list + Input data, any number of channels, equal length series of shape ``( + n_cases, n_channels, n_timepoints)`` + or 2D np.array (univariate, equal length series) of shape + ``(n_cases, n_timepoints)`` + or list of numpy arrays (any number of channels, unequal length series) + of shape ``[n_cases]``, 2D np.array ``(n_channels, n_timepoints_i)``, + where ``n_timepoints_i`` is length of series ``i`` + other types are allowed and converted into one of the above. + + Different estimators have different capabilities to handle different + types of input. If ``self.get_tag("capability:multivariate")`` is False, + they cannot handle multivariate series, so either ``n_channels == 1`` is + true or X is 2D of shape ``(n_cases, n_timepoints)``. If ``self.get_tag( + "capability:unequal_length")`` is False, they cannot handle unequal + length input. In both situations, a ``ValueError`` is raised if X has a + characteristic that the estimator does not have the capability for is + passed. + + Returns + ------- + predictions : np.ndarray + 1D np.array of float, of shape (n_cases) - predicted anomalies + indices correspond to instance indices in X + """ fit_empty = self.get_tag("fit_is_empty") if not fit_empty: self._check_is_fitted() @@ -67,6 +169,11 @@ def _fit(self, X, y=None): ... def _predict(self, X): ... def _check_y(self, y, n_cases): + """Check y input is valid. + + Must be 1-dimensional and contain only 0s (no anomaly) and 1s (anomaly). + Must match the number of cases in X. + """ if not isinstance(y, (pd.Series, np.ndarray)): raise TypeError( f"y must be a np.array or a pd.Series, but found type: {type(y)}" diff --git a/aeon/classification/base.py b/aeon/classification/base.py index 92d3b304a8..fa7ed89cb6 100644 --- a/aeon/classification/base.py +++ b/aeon/classification/base.py @@ -52,8 +52,6 @@ class BaseClassifier(ClassifierMixin, BaseCollectionEstimator): Number of classes (length of ``classes_``). _class_dictionary : dict Mapping of classes_ onto integers ``0 ... n_classes_-1``. - _estimator_type : string - The type of estimator. Required by some ``sklearn`` tools, set to "classifier". """ _tags = { From 1920ee49131f4bb0a8e5a4b261466cc3e32c137e Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Mon, 14 Apr 2025 15:18:11 +0100 Subject: [PATCH 06/24] wrappers --- .../whole_series/_classification.py | 65 +++++++++++++++++++ .../whole_series/_outlier_detection.py | 42 +++++++++--- 2 files changed, 97 insertions(+), 10 deletions(-) create mode 100644 aeon/anomaly_detection/whole_series/_classification.py diff --git a/aeon/anomaly_detection/whole_series/_classification.py b/aeon/anomaly_detection/whole_series/_classification.py new file mode 100644 index 0000000000..dbe50a127c --- /dev/null +++ b/aeon/anomaly_detection/whole_series/_classification.py @@ -0,0 +1,65 @@ +"""Adapter to use classification algorithms for collection anomaly detection.""" + +__maintainer__ = [] + + +from sklearn.base import ClassifierMixin +from sklearn.ensemble import RandomForestClassifier + +from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector +from aeon.base._base import _clone_estimator +from aeon.classification.feature_based import SummaryClassifier + + +class ClassificationAdapter(BaseCollectionAnomalyDetector): + """ + Basic classifier adapter for collection anomaly detection. + + This class wraps a classification algorithm to be used as an anomaly detector. + Anomaly labels are required for training. + + Parameters + ---------- + classifier : aeon classifier or ClassifierMixin + The classification algorithm to be adapted. + random_state : int, RandomState instance or None, default=None + If `int`, random_state is the seed used by the random number generator; + If `RandomState` instance, random_state is the random number generator; + If `None`, the random number generator is the `RandomState` instance used + by `np.random`. + """ + + _tags = { + "X_inner_type": "numpy2D", + "requires_y": True, + } + + def __init__(self, classifier, random_state=None): + self.classifier = classifier + self.random_state = random_state + + super().__init__() + + def _fit(self, X, y=None): + if not isinstance(self.classifier, ClassifierMixin): + raise ValueError( + "The estimator must be an aeon classification algorithm " + "or class that implements the ClassifierMixin interface." + ) + + self.classifier_ = _clone_estimator( + self.classifier, random_state=self.random_state + ) + self.classifier_.fit(X, y) + return self + + def _predict(self, X): + return self.classifier_.predict(X) + + @classmethod + def _get_test_params(cls, parameter_set="default"): + return { + "estimator": SummaryClassifier( + estimator=RandomForestClassifier(n_estimators=5) + ) + } diff --git a/aeon/anomaly_detection/whole_series/_outlier_detection.py b/aeon/anomaly_detection/whole_series/_outlier_detection.py index 53ac4bea29..95a7aade88 100644 --- a/aeon/anomaly_detection/whole_series/_outlier_detection.py +++ b/aeon/anomaly_detection/whole_series/_outlier_detection.py @@ -1,33 +1,55 @@ -"""Basic outlier detection classifier.""" +"""Adapter to use outlier detection algorithms for collection anomaly detection.""" +__maintainer__ = [] + +from sklearn.base import OutlierMixin from sklearn.ensemble import IsolationForest from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector from aeon.base._base import _clone_estimator -class OutlierDetectionClassifier(BaseCollectionAnomalyDetector): - """Basic outlier detection classifier.""" +class OutlierDetectionAdapter(BaseCollectionAnomalyDetector): + """ + Basic outlier detection adapter for collection anomaly detection. + + This class wraps an sklearn outlier detection algorithm to be used as an anomaly + detector. + + Parameters + ---------- + detector : OutlierMixin + The outlier detection algorithm to be adapted. + random_state : int, RandomState instance or None, default=None + If `int`, random_state is the seed used by the random number generator; + If `RandomState` instance, random_state is the random number generator; + If `None`, the random number generator is the `RandomState` instance used + by `np.random`. + """ _tags = { "X_inner_type": "numpy2D", } - def __init__(self, estimator, random_state=None): - self.estimator = estimator + def __init__(self, detector, random_state=None): + self.detector = detector self.random_state = random_state super().__init__() def _fit(self, X, y=None): - self.estimator_ = _clone_estimator( - self.estimator, random_state=self.random_state - ) - self.estimator_.fit(X, y) + if not isinstance(self.detector, OutlierMixin): + raise ValueError( + "The estimator must be an outlier detection algorithm " + "that implements the OutlierMixin interface." + ) + + self.detector_ = _clone_estimator(self.detector, random_state=self.random_state) + self.detector_.fit(X, y) return self def _predict(self, X): - pred = self.estimator_.predict(X) + pred = self.detector_.predict(X) pred[pred == -1] = 0 return pred From debc38b6f78dcb534b2b4eab72f07bf4cb06f275 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Thu, 17 Apr 2025 22:02:49 +0100 Subject: [PATCH 07/24] tests --- ...eld_collection_anomaly_detection_checks.py | 64 ++++++++++++++++++- 1 file changed, 62 insertions(+), 2 deletions(-) diff --git a/aeon/testing/estimator_checking/_yield_collection_anomaly_detection_checks.py b/aeon/testing/estimator_checking/_yield_collection_anomaly_detection_checks.py index 40f7fe7aaf..76c91e312e 100644 --- a/aeon/testing/estimator_checking/_yield_collection_anomaly_detection_checks.py +++ b/aeon/testing/estimator_checking/_yield_collection_anomaly_detection_checks.py @@ -1,9 +1,69 @@ """Tests for all collection anomaly detectors.""" +from functools import partial + +from aeon.base._base import _clone_estimator +from aeon.testing.testing_data import FULL_TEST_DATA_DICT +from aeon.testing.utils.estimator_checks import _assert_predict_labels +from aeon.utils.data_types import COLLECTIONS_DATA_TYPES + def _yield_collection_anomaly_detection_checks( estimator_class, estimator_instances, datatypes ): """Yield all collection anomaly detection checks for an aeon estimator.""" - # nothing currently! - return [] + # only class required + yield partial( + check_collection_detector_overrides_and_tags, estimator_class=estimator_class + ) + + # test class instances + for i, estimator in enumerate(estimator_instances): + # test all data types + for datatype in datatypes[i]: + yield partial( + check_collection_detector_output, estimator=estimator, datatype=datatype + ) + + +def check_collection_detector_overrides_and_tags(estimator_class): + """Test compliance with the detector base class contract.""" + # Test they don't override final methods, because Python does not enforce this + final_methods = [ + "fit", + "predict", + ] + for method in final_methods: + if method in estimator_class.__dict__: + raise ValueError( + f"Collection anomaly detector {estimator_class} overrides the " + f"method {method}. Override _{method} instead." + ) + + # Test valid tag for X_inner_type + X_inner_type = estimator_class.get_class_tag(tag_name="X_inner_type") + if isinstance(X_inner_type, str): + assert X_inner_type in COLLECTIONS_DATA_TYPES + else: # must be a list + assert all([t in COLLECTIONS_DATA_TYPES for t in X_inner_type]) + + # one of X_inner_types must be capable of storing unequal length + if estimator_class.get_class_tag("capability:unequal_length"): + valid_unequal_types = ["np-list", "df-list", "pd-multiindex"] + if isinstance(X_inner_type, str): + assert X_inner_type in valid_unequal_types + else: # must be a list + assert any([t in valid_unequal_types for t in X_inner_type]) + + +def check_collection_detector_output(estimator, datatype): + """Test detector outputs the correct data types and values.""" + estimator = _clone_estimator(estimator) + + # run fit and predict + estimator.fit( + FULL_TEST_DATA_DICT[datatype]["train"][0], + FULL_TEST_DATA_DICT[datatype]["train"][1], + ) + y_pred = estimator.predict(FULL_TEST_DATA_DICT[datatype]["test"][0]) + _assert_predict_labels(y_pred, datatype, unique_labels=[0, 1]) From 7179b2602f4af0da22ee57d6ac6dc47fcb18ed7d Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Fri, 18 Apr 2025 13:55:47 +0100 Subject: [PATCH 08/24] docs and imports --- aeon/anomaly_detection/whole_series/__init__.py | 6 ++++-- docs/api_reference/anomaly_detection.rst | 13 +++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/aeon/anomaly_detection/whole_series/__init__.py b/aeon/anomaly_detection/whole_series/__init__.py index 14d0e5c466..7084ff40a9 100644 --- a/aeon/anomaly_detection/whole_series/__init__.py +++ b/aeon/anomaly_detection/whole_series/__init__.py @@ -2,10 +2,12 @@ __all__ = [ "BaseCollectionAnomalyDetector", - "OutlierDetectionClassifier", + "ClassificationAdapter", + "OutlierDetectionAdapter", ] +from aeon.anomaly_detection.whole_series._classification import ClassificationAdapter from aeon.anomaly_detection.whole_series._outlier_detection import ( - OutlierDetectionClassifier, + OutlierDetectionAdapter, ) from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector diff --git a/docs/api_reference/anomaly_detection.rst b/docs/api_reference/anomaly_detection.rst index 082c082fc4..baa8ee0532 100644 --- a/docs/api_reference/anomaly_detection.rst +++ b/docs/api_reference/anomaly_detection.rst @@ -36,6 +36,19 @@ Detectors STOMP STRAY +Whole-series +------------ + +.. currentmodule:: aeon.anomaly_detection.whole_series + +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst + + BaseCollectionAnomalyDetector + ClassificationAdapter + OutlierDetectionAdapter + Base ---- From 3d37a81793b321c35258d6399225dada4bc58b22 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Fri, 18 Apr 2025 13:59:37 +0100 Subject: [PATCH 09/24] test params --- aeon/anomaly_detection/whole_series/_classification.py | 2 +- aeon/anomaly_detection/whole_series/_outlier_detection.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/aeon/anomaly_detection/whole_series/_classification.py b/aeon/anomaly_detection/whole_series/_classification.py index dbe50a127c..849312a9a2 100644 --- a/aeon/anomaly_detection/whole_series/_classification.py +++ b/aeon/anomaly_detection/whole_series/_classification.py @@ -59,7 +59,7 @@ def _predict(self, X): @classmethod def _get_test_params(cls, parameter_set="default"): return { - "estimator": SummaryClassifier( + "classifier": SummaryClassifier( estimator=RandomForestClassifier(n_estimators=5) ) } diff --git a/aeon/anomaly_detection/whole_series/_outlier_detection.py b/aeon/anomaly_detection/whole_series/_outlier_detection.py index 95a7aade88..ac702c7297 100644 --- a/aeon/anomaly_detection/whole_series/_outlier_detection.py +++ b/aeon/anomaly_detection/whole_series/_outlier_detection.py @@ -55,4 +55,4 @@ def _predict(self, X): @classmethod def _get_test_params(cls, parameter_set="default"): - return {"estimator": IsolationForest(n_estimators=3)} + return {"detector": IsolationForest(n_estimators=3)} From 4ba76925647bff4804b288219fcaf3649560dec1 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Fri, 18 Apr 2025 14:16:38 +0100 Subject: [PATCH 10/24] register --- aeon/utils/base/_register.py | 2 +- aeon/utils/tags/_tags.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/aeon/utils/base/_register.py b/aeon/utils/base/_register.py index 78372b04cb..a4d8b2d303 100644 --- a/aeon/utils/base/_register.py +++ b/aeon/utils/base/_register.py @@ -39,7 +39,7 @@ "transformer": BaseTransformer, # estimator types "anomaly-detector": BaseAnomalyDetector, - "collection_anomaly_detector": BaseCollectionAnomalyDetector, + "collection-anomaly-detector": BaseCollectionAnomalyDetector, "collection-transformer": BaseCollectionTransformer, "classifier": BaseClassifier, "clusterer": BaseClusterer, diff --git a/aeon/utils/tags/_tags.py b/aeon/utils/tags/_tags.py index ba7456c617..1e973b5a4f 100644 --- a/aeon/utils/tags/_tags.py +++ b/aeon/utils/tags/_tags.py @@ -141,7 +141,7 @@ class : identifier for the base class of objects this tag applies to "class": [ "transformer", "anomaly-detector", - "collection_anomaly_detector", + "collection-anomaly-detector", "segmenter", ], "type": "bool", From d47b53720acdd48d326affc15b64cc1337c293b9 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Mon, 5 May 2025 21:49:58 +0100 Subject: [PATCH 11/24] big refactor --- aeon/anomaly_detection/__init__.py | 2 - aeon/anomaly_detection/base.py | 209 +------------ aeon/anomaly_detection/collection/__init__.py | 11 + .../_classification.py | 2 +- .../_outlier_detection.py | 2 +- .../{whole_series => collection}/base.py | 0 .../distance_based/__init__.py | 19 -- .../distribution_based/__init__.py | 9 - .../outlier_detection/__init__.py | 11 - aeon/anomaly_detection/series/__init__.py | 9 + .../_pyodadapter.py | 4 +- aeon/anomaly_detection/series/base.py | 293 ++++++++++++++++++ .../series/distance_based/__init__.py | 21 ++ .../{ => series}/distance_based/_cblof.py | 2 +- .../{ => series}/distance_based/_kmeans.py | 4 +- .../distance_based/_left_stampi.py | 4 +- .../{ => series}/distance_based/_lof.py | 2 +- .../{ => series}/distance_based/_merlin.py | 4 +- .../distance_based}/_rockad.py | 4 +- .../{ => series}/distance_based/_stomp.py | 4 +- .../distance_based/tests/__init__.py | 0 .../distance_based/tests/test_cblof.py | 2 +- .../distance_based/tests/test_kmeans.py | 2 +- .../distance_based/tests/test_left_stampi.py | 2 +- .../distance_based/tests/test_lof.py | 2 +- .../distance_based/tests/test_merlin.py | 2 +- .../distance_based}/tests/test_rockad.py | 2 +- .../distance_based/tests/test_stomp.py | 2 +- .../series/distribution_based/__init__.py | 9 + .../{ => series}/distribution_based/_copod.py | 2 +- .../distribution_based/_dwt_mlead.py | 4 +- .../distribution_based/tests/__init__.py | 0 .../distribution_based/tests/test_copod.py | 2 +- .../tests/test_dwt_mlead.py | 2 +- .../series/outlier_detection/__init__.py | 9 + .../outlier_detection/_iforest.py | 2 +- .../outlier_detection}/_one_class_svm.py | 4 +- .../{ => series}/outlier_detection/_stray.py | 4 +- .../outlier_detection/tests/__init__.py | 0 .../outlier_detection/tests/test_iforest.py | 2 +- .../tests/test_one_class_svm.py | 2 +- .../outlier_detection/tests/test_stray.py | 2 +- .../{ => series}/tests/__init__.py | 0 .../{ => series}/tests/test_base.py | 0 .../tests/test_pyod_adapter.py | 2 +- .../whole_series/__init__.py | 13 - .../whole_series/tests/__init__.py | 1 - .../_yield_estimator_checks.py | 4 +- .../_mock_anomaly_detectors.py | 4 +- aeon/testing/testing_data.py | 6 +- aeon/utils/base/_identifier.py | 2 + aeon/utils/base/_register.py | 15 +- aeon/utils/base/tests/test_identifier.py | 2 +- aeon/utils/tags/_tags.py | 1 - aeon/utils/tags/tests/test_discovery.py | 6 +- aeon/utils/tests/test_discovery.py | 4 +- docs/developer_guide/adding_typehints.md | 12 +- .../anomaly_detection/anomaly_detection.ipynb | 12 +- 58 files changed, 442 insertions(+), 316 deletions(-) create mode 100644 aeon/anomaly_detection/collection/__init__.py rename aeon/anomaly_detection/{whole_series => collection}/_classification.py (96%) rename aeon/anomaly_detection/{whole_series => collection}/_outlier_detection.py (95%) rename aeon/anomaly_detection/{whole_series => collection}/base.py (100%) delete mode 100644 aeon/anomaly_detection/distance_based/__init__.py delete mode 100644 aeon/anomaly_detection/distribution_based/__init__.py delete mode 100644 aeon/anomaly_detection/outlier_detection/__init__.py create mode 100644 aeon/anomaly_detection/series/__init__.py rename aeon/anomaly_detection/{outlier_detection => series}/_pyodadapter.py (98%) create mode 100644 aeon/anomaly_detection/series/base.py create mode 100644 aeon/anomaly_detection/series/distance_based/__init__.py rename aeon/anomaly_detection/{ => series}/distance_based/_cblof.py (98%) rename aeon/anomaly_detection/{ => series}/distance_based/_kmeans.py (98%) rename aeon/anomaly_detection/{ => series}/distance_based/_left_stampi.py (97%) rename aeon/anomaly_detection/{ => series}/distance_based/_lof.py (98%) rename aeon/anomaly_detection/{ => series}/distance_based/_merlin.py (98%) rename aeon/anomaly_detection/{whole_series => series/distance_based}/_rockad.py (98%) rename aeon/anomaly_detection/{ => series}/distance_based/_stomp.py (97%) rename aeon/anomaly_detection/{ => series}/distance_based/tests/__init__.py (100%) rename aeon/anomaly_detection/{ => series}/distance_based/tests/test_cblof.py (97%) rename aeon/anomaly_detection/{ => series}/distance_based/tests/test_kmeans.py (95%) rename aeon/anomaly_detection/{ => series}/distance_based/tests/test_left_stampi.py (99%) rename aeon/anomaly_detection/{ => series}/distance_based/tests/test_lof.py (99%) rename aeon/anomaly_detection/{ => series}/distance_based/tests/test_merlin.py (96%) rename aeon/anomaly_detection/{whole_series => series/distance_based}/tests/test_rockad.py (96%) rename aeon/anomaly_detection/{ => series}/distance_based/tests/test_stomp.py (95%) create mode 100644 aeon/anomaly_detection/series/distribution_based/__init__.py rename aeon/anomaly_detection/{ => series}/distribution_based/_copod.py (97%) rename aeon/anomaly_detection/{ => series}/distribution_based/_dwt_mlead.py (98%) rename aeon/anomaly_detection/{ => series}/distribution_based/tests/__init__.py (100%) rename aeon/anomaly_detection/{ => series}/distribution_based/tests/test_copod.py (96%) rename aeon/anomaly_detection/{ => series}/distribution_based/tests/test_dwt_mlead.py (95%) create mode 100644 aeon/anomaly_detection/series/outlier_detection/__init__.py rename aeon/anomaly_detection/{ => series}/outlier_detection/_iforest.py (98%) rename aeon/anomaly_detection/{distance_based => series/outlier_detection}/_one_class_svm.py (98%) rename aeon/anomaly_detection/{ => series}/outlier_detection/_stray.py (98%) rename aeon/anomaly_detection/{ => series}/outlier_detection/tests/__init__.py (100%) rename aeon/anomaly_detection/{ => series}/outlier_detection/tests/test_iforest.py (98%) rename aeon/anomaly_detection/{distance_based => series/outlier_detection}/tests/test_one_class_svm.py (95%) rename aeon/anomaly_detection/{ => series}/outlier_detection/tests/test_stray.py (98%) rename aeon/anomaly_detection/{ => series}/tests/__init__.py (100%) rename aeon/anomaly_detection/{ => series}/tests/test_base.py (100%) rename aeon/anomaly_detection/{outlier_detection => series}/tests/test_pyod_adapter.py (98%) delete mode 100644 aeon/anomaly_detection/whole_series/__init__.py delete mode 100644 aeon/anomaly_detection/whole_series/tests/__init__.py diff --git a/aeon/anomaly_detection/__init__.py b/aeon/anomaly_detection/__init__.py index 65343cd774..878e29fd32 100644 --- a/aeon/anomaly_detection/__init__.py +++ b/aeon/anomaly_detection/__init__.py @@ -3,5 +3,3 @@ __all__ = [ "BaseAnomalyDetector", ] - -from aeon.anomaly_detection.base import BaseAnomalyDetector diff --git a/aeon/anomaly_detection/base.py b/aeon/anomaly_detection/base.py index 2e333cf755..60d35f0e2e 100644 --- a/aeon/anomaly_detection/base.py +++ b/aeon/anomaly_detection/base.py @@ -4,87 +4,23 @@ __all__ = ["BaseAnomalyDetector"] from abc import abstractmethod -from typing import final import numpy as np -import pandas as pd -from aeon.base import BaseSeriesEstimator -from aeon.base._base_series import VALID_SERIES_INPUT_TYPES +from aeon.base import BaseAeonEstimator -class BaseAnomalyDetector(BaseSeriesEstimator): - """Base class for anomaly detection algorithms. - - Anomaly detection algorithms are used to identify anomalous subsequences in time - series data. These algorithms take a series of length m and return a boolean, int or - float array of length m, where each element indicates whether the corresponding - subsequence is anomalous or its anomaly score. - - Input and internal data format (where m is the number of time points and d is the - number of channels): - Univariate series (default): - np.ndarray, shape ``(m,)``, ``(m, 1)`` or ``(1, m)`` depending on axis. - This is converted to a 2D np.ndarray internally. - pd.DataFrame, shape ``(m, 1)`` or ``(1, m)`` depending on axis. - pd.Series, shape ``(m,)``. - Multivariate series: - np.ndarray array, shape ``(m, d)`` or ``(d, m)`` depending on axis. - pd.DataFrame ``(m, d)`` or ``(d, m)`` depending on axis. - - Output data format (one of the following): - Anomaly scores (default): - np.ndarray, shape ``(m,)`` of type float. For each point of the input time - series, the anomaly score is a float value indicating the degree of - anomalousness. The higher the score, the more anomalous the point. - Binary classification: - np.ndarray, shape ``(m,)`` of type bool or int. For each point of the input - time series, the output is a boolean or integer value indicating whether the - point is anomalous (``True``/``1``) or not (``False``/``0``). - - Detector learning types: - Unsupervised (default): - Unsupervised detectors do not require any training data and can directly be - used on the target time series. Their tags are set to ``fit_is_empty=True`` - and ``requires_y=False``. You would usually call the ``fit_predict`` method - on these detectors. - Semi-supervised: - Semi-supervised detectors require a training step on a time series without - anomalies (normal behaving time series). The target value ``y`` would - consist of only zeros. Thus, these algorithms have logic in the ``fit`` - method, but do not require the target values. Their tags are set to - ``fit_is_empty=False`` and ``requires_y=False``. You would usually first - call the ``fit`` method on the training data and then the ``predict`` - method for your target time series. - Supervised: - Supervised detectors require a training step on a time series with known - anomalies (anomalies should be present and must be annotated). The detector - implements the ``fit`` method, and the target value ``y`` consists of zeros - and ones. Their tags are, thus, set to ``fit_is_empty=False`` and - ``requires_y=True``. You would usually first call the ``fit`` method on the - training data and then the ``predict`` method for your target time series. - - Parameters - ---------- - axis : int - The time point axis of the input series if it is 2D. If ``axis==0``, it is - assumed each column is a time series and each row is a time point. i.e. the - shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates - the time series are in rows, i.e. the shape of the data is - ``(n_channels, n_timepoints)``. - Setting this class variable will convert the input data to the chosen axis. - """ +class BaseAnomalyDetector(BaseAeonEstimator): + """todo base class docs.""" _tags = { - "X_inner_type": "np.ndarray", # One of VALID_SERIES_INNER_TYPES - "fit_is_empty": True, - "requires_y": False, + # todo } def __init__(self, axis): - super().__init__(axis=axis) + super().__init__() - @final + @abstractmethod def fit(self, X, y=None, axis=1): """Fit time series anomaly detector to X. @@ -112,31 +48,12 @@ def fit(self, X, y=None, axis=1): Returns ------- - BaseAnomalyDetector + BaseSeriesAnomalyDetector The fitted estimator, reference to self. """ - if self.get_tag("fit_is_empty"): - self.is_fitted = True - return self - - if self.get_tag("requires_y"): - if y is None: - raise ValueError("Tag requires_y is true, but fit called with y=None") - - # reset estimator at the start of fit - self.reset() - - X = self._preprocess_series(X, axis, True) - if y is not None: - y = self._check_y(y) + ... - self._fit(X=X, y=y) - - # this should happen last - self.is_fitted = True - return self - - @final + @abstractmethod def predict(self, X, axis=1) -> np.ndarray: """Find anomalies in X. @@ -159,15 +76,9 @@ def predict(self, X, axis=1) -> np.ndarray: A boolean, int or float array of length len(X), where each element indicates whether the corresponding subsequence is anomalous or its anomaly score. """ - fit_empty = self.get_tag("fit_is_empty") - if not fit_empty: - self._check_is_fitted() - - X = self._preprocess_series(X, axis, False) + ... - return self._predict(X) - - @final + @abstractmethod def fit_predict(self, X, y=None, axis=1) -> np.ndarray: """Fit time series anomaly detector and find anomalies for X. @@ -194,100 +105,4 @@ def fit_predict(self, X, y=None, axis=1) -> np.ndarray: A boolean, int or float array of length len(X), where each element indicates whether the corresponding subsequence is anomalous or its anomaly score. """ - if self.get_tag("requires_y"): - if y is None: - raise ValueError("Tag requires_y is true, but fit called with y=None") - - # reset estimator at the start of fit - self.reset() - - X = self._preprocess_series(X, axis, True) - - if self.get_tag("fit_is_empty"): - self.is_fitted = True - return self._predict(X) - - if y is not None: - y = self._check_y(y) - - pred = self._fit_predict(X, y) - - # this should happen last - self.is_fitted = True - return pred - - def _fit(self, X, y): - return self - - @abstractmethod - def _predict(self, X) -> np.ndarray: ... - - def _fit_predict(self, X, y): - self._fit(X, y) - return self._predict(X) - - def _check_y(self, y: VALID_SERIES_INPUT_TYPES) -> np.ndarray: - # Remind user if y is not required for this estimator on failure - req_msg = ( - f"{self.__class__.__name__} does not require a y input." - if self.get_tag("requires_y") - else "" - ) - new_y = y - - # must be a valid input type, see VALID_SERIES_INPUT_TYPES in - # BaseSeriesEstimator - if isinstance(y, np.ndarray): - # check valid shape - if y.ndim > 1: - raise ValueError( - "Error in input type for y: y input as np.ndarray should be 1D." - + req_msg - ) - - # check valid dtype - fail = False - if issubclass(y.dtype.type, np.integer): - new_y = y.astype(bool) - fail = not np.array_equal(y, new_y) - elif not issubclass(y.dtype.type, np.bool_): - fail = True - - if fail: - raise ValueError( - "Error in input type for y: y input type must be an integer array " - "containing 0 and 1 or a boolean array." + req_msg - ) - elif isinstance(y, pd.Series): - # check series is of boolean dtype - if not pd.api.types.is_bool_dtype(y): - raise ValueError( - "Error in input type for y: y input as pd.Series must have a " - "boolean dtype." + req_msg - ) - - new_y = y.values - elif isinstance(y, pd.DataFrame): - # only accept size 1 dataframe - if y.shape[1] > 1: - raise ValueError( - "Error in input type for y: y input as pd.DataFrame should have a " - "single column series." - ) - - # check column is of boolean dtype - if not all(pd.api.types.is_bool_dtype(y[col]) for col in y.columns): - raise ValueError( - "Error in input type for y: y input as pd.DataFrame must have a " - "boolean dtype." + req_msg - ) - - new_y = y.squeeze().values - else: - raise ValueError( - f"Error in input type for y: it should be one of " - f"{VALID_SERIES_INPUT_TYPES}, saw {type(y)}" - ) - - new_y = new_y.astype(bool) - return new_y + ... diff --git a/aeon/anomaly_detection/collection/__init__.py b/aeon/anomaly_detection/collection/__init__.py new file mode 100644 index 0000000000..4fc14ffd1f --- /dev/null +++ b/aeon/anomaly_detection/collection/__init__.py @@ -0,0 +1,11 @@ +"""Whole-series anomaly detection methods.""" + +__all__ = [ + "BaseCollectionAnomalyDetector", + "ClassificationAdapter", + "OutlierDetectionAdapter", +] + +from aeon.anomaly_detection.collection._classification import ClassificationAdapter +from aeon.anomaly_detection.collection._outlier_detection import OutlierDetectionAdapter +from aeon.anomaly_detection.collection.base import BaseCollectionAnomalyDetector diff --git a/aeon/anomaly_detection/whole_series/_classification.py b/aeon/anomaly_detection/collection/_classification.py similarity index 96% rename from aeon/anomaly_detection/whole_series/_classification.py rename to aeon/anomaly_detection/collection/_classification.py index 849312a9a2..d8aa34ebea 100644 --- a/aeon/anomaly_detection/whole_series/_classification.py +++ b/aeon/anomaly_detection/collection/_classification.py @@ -6,7 +6,7 @@ from sklearn.base import ClassifierMixin from sklearn.ensemble import RandomForestClassifier -from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector +from aeon.anomaly_detection.collection.base import BaseCollectionAnomalyDetector from aeon.base._base import _clone_estimator from aeon.classification.feature_based import SummaryClassifier diff --git a/aeon/anomaly_detection/whole_series/_outlier_detection.py b/aeon/anomaly_detection/collection/_outlier_detection.py similarity index 95% rename from aeon/anomaly_detection/whole_series/_outlier_detection.py rename to aeon/anomaly_detection/collection/_outlier_detection.py index ac702c7297..db40a8e749 100644 --- a/aeon/anomaly_detection/whole_series/_outlier_detection.py +++ b/aeon/anomaly_detection/collection/_outlier_detection.py @@ -5,7 +5,7 @@ from sklearn.base import OutlierMixin from sklearn.ensemble import IsolationForest -from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector +from aeon.anomaly_detection.collection.base import BaseCollectionAnomalyDetector from aeon.base._base import _clone_estimator diff --git a/aeon/anomaly_detection/whole_series/base.py b/aeon/anomaly_detection/collection/base.py similarity index 100% rename from aeon/anomaly_detection/whole_series/base.py rename to aeon/anomaly_detection/collection/base.py diff --git a/aeon/anomaly_detection/distance_based/__init__.py b/aeon/anomaly_detection/distance_based/__init__.py deleted file mode 100644 index 5eb342b780..0000000000 --- a/aeon/anomaly_detection/distance_based/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -"""Distance basedTime Series Anomaly Detection.""" - -__all__ = [ - "CBLOF", - "KMeansAD", - "LeftSTAMPi", - "LOF", - "MERLIN", - "OneClassSVM", - "STOMP", -] - -from aeon.anomaly_detection.distance_based._cblof import CBLOF -from aeon.anomaly_detection.distance_based._kmeans import KMeansAD -from aeon.anomaly_detection.distance_based._left_stampi import LeftSTAMPi -from aeon.anomaly_detection.distance_based._lof import LOF -from aeon.anomaly_detection.distance_based._merlin import MERLIN -from aeon.anomaly_detection.distance_based._one_class_svm import OneClassSVM -from aeon.anomaly_detection.distance_based._stomp import STOMP diff --git a/aeon/anomaly_detection/distribution_based/__init__.py b/aeon/anomaly_detection/distribution_based/__init__.py deleted file mode 100644 index e52a7512ba..0000000000 --- a/aeon/anomaly_detection/distribution_based/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -"""Distribution based Time Series Anomaly Detection.""" - -__all__ = [ - "COPOD", - "DWT_MLEAD", -] - -from aeon.anomaly_detection.distribution_based._copod import COPOD -from aeon.anomaly_detection.distribution_based._dwt_mlead import DWT_MLEAD diff --git a/aeon/anomaly_detection/outlier_detection/__init__.py b/aeon/anomaly_detection/outlier_detection/__init__.py deleted file mode 100644 index ad9b7868e5..0000000000 --- a/aeon/anomaly_detection/outlier_detection/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -"""Time Series Outlier Detection.""" - -__all__ = [ - "IsolationForest", - "PyODAdapter", - "STRAY", -] - -from aeon.anomaly_detection.outlier_detection._iforest import IsolationForest -from aeon.anomaly_detection.outlier_detection._pyodadapter import PyODAdapter -from aeon.anomaly_detection.outlier_detection._stray import STRAY diff --git a/aeon/anomaly_detection/series/__init__.py b/aeon/anomaly_detection/series/__init__.py new file mode 100644 index 0000000000..a4d2052d1c --- /dev/null +++ b/aeon/anomaly_detection/series/__init__.py @@ -0,0 +1,9 @@ +"""Single series Time Series Anomaly Detection.""" + +__all__ = [ + "BaseSeriesAnomalyDetector", + "PyODAdapter", +] + +from aeon.anomaly_detection.series._pyodadapter import PyODAdapter +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector diff --git a/aeon/anomaly_detection/outlier_detection/_pyodadapter.py b/aeon/anomaly_detection/series/_pyodadapter.py similarity index 98% rename from aeon/anomaly_detection/outlier_detection/_pyodadapter.py rename to aeon/anomaly_detection/series/_pyodadapter.py index 5a068857c6..aa4e1f9779 100644 --- a/aeon/anomaly_detection/outlier_detection/_pyodadapter.py +++ b/aeon/anomaly_detection/series/_pyodadapter.py @@ -10,7 +10,7 @@ import numpy as np from sklearn import clone -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.utils.validation._dependencies import _check_soft_dependencies from aeon.utils.windowing import reverse_windowing, sliding_windows @@ -18,7 +18,7 @@ from pyod.models.base import BaseDetector -class PyODAdapter(BaseAnomalyDetector): +class PyODAdapter(BaseSeriesAnomalyDetector): """Adapter for PyOD anomaly detection models to be used in the Aeon framework. This adapter allows the use of PyOD models in the Aeon framework. The adapter diff --git a/aeon/anomaly_detection/series/base.py b/aeon/anomaly_detection/series/base.py new file mode 100644 index 0000000000..57863d9f3d --- /dev/null +++ b/aeon/anomaly_detection/series/base.py @@ -0,0 +1,293 @@ +"""Abstract base class for series time series anomaly detectors.""" + +__maintainer__ = ["MatthewMiddlehurst"] +__all__ = ["BaseSeriesAnomalyDetector"] + +from abc import abstractmethod +from typing import final + +import numpy as np +import pandas as pd + +from aeon.base import BaseSeriesEstimator +from aeon.base._base_series import VALID_SERIES_INPUT_TYPES + + +class BaseSeriesAnomalyDetector(BaseSeriesEstimator): + """Base class for series anomaly detection algorithms. + + Anomaly detection algorithms are used to identify anomalous subsequences in time + series data. These algorithms take a series of length m and return a boolean, int or + float array of length m, where each element indicates whether the corresponding + subsequence is anomalous or its anomaly score. + + Input and internal data format (where m is the number of time points and d is the + number of channels): + Univariate series (default): + np.ndarray, shape ``(m,)``, ``(m, 1)`` or ``(1, m)`` depending on axis. + This is converted to a 2D np.ndarray internally. + pd.DataFrame, shape ``(m, 1)`` or ``(1, m)`` depending on axis. + pd.Series, shape ``(m,)``. + Multivariate series: + np.ndarray array, shape ``(m, d)`` or ``(d, m)`` depending on axis. + pd.DataFrame ``(m, d)`` or ``(d, m)`` depending on axis. + + Output data format (one of the following): + Anomaly scores (default): + np.ndarray, shape ``(m,)`` of type float. For each point of the input time + series, the anomaly score is a float value indicating the degree of + anomalousness. The higher the score, the more anomalous the point. + Binary classification: + np.ndarray, shape ``(m,)`` of type bool or int. For each point of the input + time series, the output is a boolean or integer value indicating whether the + point is anomalous (``True``/``1``) or not (``False``/``0``). + + Detector learning types: + Unsupervised (default): + Unsupervised detectors do not require any training data and can directly be + used on the target time series. Their tags are set to ``fit_is_empty=True`` + and ``requires_y=False``. You would usually call the ``fit_predict`` method + on these detectors. + Semi-supervised: + Semi-supervised detectors require a training step on a time series without + anomalies (normal behaving time series). The target value ``y`` would + consist of only zeros. Thus, these algorithms have logic in the ``fit`` + method, but do not require the target values. Their tags are set to + ``fit_is_empty=False`` and ``requires_y=False``. You would usually first + call the ``fit`` method on the training data and then the ``predict`` + method for your target time series. + Supervised: + Supervised detectors require a training step on a time series with known + anomalies (anomalies should be present and must be annotated). The detector + implements the ``fit`` method, and the target value ``y`` consists of zeros + and ones. Their tags are, thus, set to ``fit_is_empty=False`` and + ``requires_y=True``. You would usually first call the ``fit`` method on the + training data and then the ``predict`` method for your target time series. + + Parameters + ---------- + axis : int + The time point axis of the input series if it is 2D. If ``axis==0``, it is + assumed each column is a time series and each row is a time point. i.e. the + shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates + the time series are in rows, i.e. the shape of the data is + ``(n_channels, n_timepoints)``. + Setting this class variable will convert the input data to the chosen axis. + """ + + _tags = { + "X_inner_type": "np.ndarray", # One of VALID_SERIES_INNER_TYPES + "fit_is_empty": True, + "requires_y": False, + } + + def __init__(self, axis): + super().__init__(axis=axis) + + @final + def fit(self, X, y=None, axis=1): + """Fit time series anomaly detector to X. + + If the tag ``fit_is_empty`` is true, this just sets the ``is_fitted`` tag to + true. Otherwise, it checks ``self`` can handle ``X``, formats ``X`` into + the structure required by ``self`` then passes ``X`` (and possibly ``y``) to + ``_fit``. + + Parameters + ---------- + X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES + The time series to fit the model to. + A valid aeon time series data structure. See + aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. + y : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES, default=None + The target values for the time series. + A valid aeon time series data structure. See + aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. + axis : int + The time point axis of the input series if it is 2D. If ``axis==0``, it is + assumed each column is a time series and each row is a time point. i.e. the + shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates + the time series are in rows, i.e. the shape of the data is + ``(n_channels, n_timepoints)``. + + Returns + ------- + BaseSeriesAnomalyDetector + The fitted estimator, reference to self. + """ + if self.get_tag("fit_is_empty"): + self.is_fitted = True + return self + + if self.get_tag("requires_y"): + if y is None: + raise ValueError("Tag requires_y is true, but fit called with y=None") + + # reset estimator at the start of fit + self.reset() + + X = self._preprocess_series(X, axis, True) + if y is not None: + y = self._check_y(y) + + self._fit(X=X, y=y) + + # this should happen last + self.is_fitted = True + return self + + @final + def predict(self, X, axis=1) -> np.ndarray: + """Find anomalies in X. + + Parameters + ---------- + X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES + The time series to fit the model to. + A valid aeon time series data structure. See + aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. + axis : int, default=1 + The time point axis of the input series if it is 2D. If ``axis==0``, it is + assumed each column is a time series and each row is a time point. i.e. the + shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates + the time series are in rows, i.e. the shape of the data is + ``(n_channels, n_timepoints)``. + + Returns + ------- + np.ndarray + A boolean, int or float array of length len(X), where each element indicates + whether the corresponding subsequence is anomalous or its anomaly score. + """ + fit_empty = self.get_tag("fit_is_empty") + if not fit_empty: + self._check_is_fitted() + + X = self._preprocess_series(X, axis, False) + + return self._predict(X) + + @final + def fit_predict(self, X, y=None, axis=1) -> np.ndarray: + """Fit time series anomaly detector and find anomalies for X. + + Parameters + ---------- + X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES + The time series to fit the model to. + A valid aeon time series data structure. See + aeon.base._base_series.VALID_INPUT_TYPES for aeon supported types. + y : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES, default=None + The target values for the time series. + A valid aeon time series data structure. See + aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. + axis : int, default=1 + The time point axis of the input series if it is 2D. If ``axis==0``, it is + assumed each column is a time series and each row is a time point. i.e. the + shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates + the time series are in rows, i.e. the shape of the data is + ``(n_channels, n_timepoints)``. + + Returns + ------- + np.ndarray + A boolean, int or float array of length len(X), where each element indicates + whether the corresponding subsequence is anomalous or its anomaly score. + """ + if self.get_tag("requires_y"): + if y is None: + raise ValueError("Tag requires_y is true, but fit called with y=None") + + # reset estimator at the start of fit + self.reset() + + X = self._preprocess_series(X, axis, True) + + if self.get_tag("fit_is_empty"): + self.is_fitted = True + return self._predict(X) + + if y is not None: + y = self._check_y(y) + + pred = self._fit_predict(X, y) + + # this should happen last + self.is_fitted = True + return pred + + def _fit(self, X, y): + return self + + @abstractmethod + def _predict(self, X) -> np.ndarray: ... + + def _fit_predict(self, X, y): + self._fit(X, y) + return self._predict(X) + + def _check_y(self, y: VALID_SERIES_INPUT_TYPES) -> np.ndarray: + # Remind user if y is not required for this estimator on failure + req_msg = ( + f"{self.__class__.__name__} does not require a y input." + if self.get_tag("requires_y") + else "" + ) + new_y = y + + # must be a valid input type, see VALID_SERIES_INPUT_TYPES in + # BaseSeriesEstimator + if isinstance(y, np.ndarray): + # check valid shape + if y.ndim > 1: + raise ValueError( + "Error in input type for y: y input as np.ndarray should be 1D." + + req_msg + ) + + # check valid dtype + fail = False + if issubclass(y.dtype.type, np.integer): + new_y = y.astype(bool) + fail = not np.array_equal(y, new_y) + elif not issubclass(y.dtype.type, np.bool_): + fail = True + + if fail: + raise ValueError( + "Error in input type for y: y input type must be an integer array " + "containing 0 and 1 or a boolean array." + req_msg + ) + elif isinstance(y, pd.Series): + # check series is of boolean dtype + if not pd.api.types.is_bool_dtype(y): + raise ValueError( + "Error in input type for y: y input as pd.Series must have a " + "boolean dtype." + req_msg + ) + + new_y = y.values + elif isinstance(y, pd.DataFrame): + # only accept size 1 dataframe + if y.shape[1] > 1: + raise ValueError( + "Error in input type for y: y input as pd.DataFrame should have a " + "single column series." + ) + + # check column is of boolean dtype + if not all(pd.api.types.is_bool_dtype(y[col]) for col in y.columns): + raise ValueError( + "Error in input type for y: y input as pd.DataFrame must have a " + "boolean dtype." + req_msg + ) + + new_y = y.squeeze().values + else: + raise ValueError( + f"Error in input type for y: it should be one of " + f"{VALID_SERIES_INPUT_TYPES}, saw {type(y)}" + ) + + new_y = new_y.astype(bool) + return new_y diff --git a/aeon/anomaly_detection/series/distance_based/__init__.py b/aeon/anomaly_detection/series/distance_based/__init__.py new file mode 100644 index 0000000000..df02c8cd92 --- /dev/null +++ b/aeon/anomaly_detection/series/distance_based/__init__.py @@ -0,0 +1,21 @@ +"""Distance-based Time Series Anomaly Detection.""" + +__all__ = [ + "CBLOF", + "KMeansAD", + "LeftSTAMPi", + "LOF", + "MERLIN", + "OneClassSVM", + "STOMP", + "ROCKAD", +] + +from aeon.anomaly_detection.series.distance_based._cblof import CBLOF +from aeon.anomaly_detection.series.distance_based._kmeans import KMeansAD +from aeon.anomaly_detection.series.distance_based._left_stampi import LeftSTAMPi +from aeon.anomaly_detection.series.distance_based._lof import LOF +from aeon.anomaly_detection.series.distance_based._merlin import MERLIN +from aeon.anomaly_detection.series.distance_based._rockad import ROCKAD +from aeon.anomaly_detection.series.distance_based._stomp import STOMP +from aeon.anomaly_detection.series.outlier_detection._one_class_svm import OneClassSVM diff --git a/aeon/anomaly_detection/distance_based/_cblof.py b/aeon/anomaly_detection/series/distance_based/_cblof.py similarity index 98% rename from aeon/anomaly_detection/distance_based/_cblof.py rename to aeon/anomaly_detection/series/distance_based/_cblof.py index 18bb044c14..016a9c18ba 100644 --- a/aeon/anomaly_detection/distance_based/_cblof.py +++ b/aeon/anomaly_detection/series/distance_based/_cblof.py @@ -7,7 +7,7 @@ import numpy as np -from aeon.anomaly_detection.outlier_detection._pyodadapter import PyODAdapter +from aeon.anomaly_detection.series._pyodadapter import PyODAdapter from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/distance_based/_kmeans.py b/aeon/anomaly_detection/series/distance_based/_kmeans.py similarity index 98% rename from aeon/anomaly_detection/distance_based/_kmeans.py rename to aeon/anomaly_detection/series/distance_based/_kmeans.py index bb8f188a1d..aea82ee21a 100644 --- a/aeon/anomaly_detection/distance_based/_kmeans.py +++ b/aeon/anomaly_detection/series/distance_based/_kmeans.py @@ -8,11 +8,11 @@ import numpy as np from sklearn.cluster import KMeans -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.utils.windowing import reverse_windowing, sliding_windows -class KMeansAD(BaseAnomalyDetector): +class KMeansAD(BaseSeriesAnomalyDetector): """KMeans anomaly detector. The k-Means anomaly detector uses k-Means clustering to detect anomalies in time diff --git a/aeon/anomaly_detection/distance_based/_left_stampi.py b/aeon/anomaly_detection/series/distance_based/_left_stampi.py similarity index 97% rename from aeon/anomaly_detection/distance_based/_left_stampi.py rename to aeon/anomaly_detection/series/distance_based/_left_stampi.py index 43078ce021..cbeba2c5a5 100644 --- a/aeon/anomaly_detection/distance_based/_left_stampi.py +++ b/aeon/anomaly_detection/series/distance_based/_left_stampi.py @@ -6,11 +6,11 @@ import numpy as np -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.utils.windowing import reverse_windowing -class LeftSTAMPi(BaseAnomalyDetector): +class LeftSTAMPi(BaseSeriesAnomalyDetector): """LeftSTAMPi anomaly detector. LeftSTAMPi [1]_ calculates the left matrix profile of a time series, diff --git a/aeon/anomaly_detection/distance_based/_lof.py b/aeon/anomaly_detection/series/distance_based/_lof.py similarity index 98% rename from aeon/anomaly_detection/distance_based/_lof.py rename to aeon/anomaly_detection/series/distance_based/_lof.py index 2c3615d906..1a914583a9 100644 --- a/aeon/anomaly_detection/distance_based/_lof.py +++ b/aeon/anomaly_detection/series/distance_based/_lof.py @@ -7,7 +7,7 @@ import numpy as np -from aeon.anomaly_detection.outlier_detection._pyodadapter import PyODAdapter +from aeon.anomaly_detection.series._pyodadapter import PyODAdapter from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/distance_based/_merlin.py b/aeon/anomaly_detection/series/distance_based/_merlin.py similarity index 98% rename from aeon/anomaly_detection/distance_based/_merlin.py rename to aeon/anomaly_detection/series/distance_based/_merlin.py index b63224acd5..be0d2a9ead 100644 --- a/aeon/anomaly_detection/distance_based/_merlin.py +++ b/aeon/anomaly_detection/series/distance_based/_merlin.py @@ -8,13 +8,13 @@ import numpy as np from numba import njit -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.distances import squared_distance from aeon.utils.numba.general import AEON_NUMBA_STD_THRESHOLD from aeon.utils.numba.stats import mean, std -class MERLIN(BaseAnomalyDetector): +class MERLIN(BaseSeriesAnomalyDetector): """MERLIN anomaly detector. MERLIN is a discord discovery algorithm that uses a sliding window to find the diff --git a/aeon/anomaly_detection/whole_series/_rockad.py b/aeon/anomaly_detection/series/distance_based/_rockad.py similarity index 98% rename from aeon/anomaly_detection/whole_series/_rockad.py rename to aeon/anomaly_detection/series/distance_based/_rockad.py index 603a8732a3..15e2016e18 100644 --- a/aeon/anomaly_detection/whole_series/_rockad.py +++ b/aeon/anomaly_detection/series/distance_based/_rockad.py @@ -10,12 +10,12 @@ from sklearn.preprocessing import PowerTransformer from sklearn.utils import resample -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.transformations.collection.convolution_based import Rocket from aeon.utils.windowing import reverse_windowing, sliding_windows -class ROCKAD(BaseAnomalyDetector): +class ROCKAD(BaseSeriesAnomalyDetector): """ ROCKET-based Anomaly Detector (ROCKAD). diff --git a/aeon/anomaly_detection/distance_based/_stomp.py b/aeon/anomaly_detection/series/distance_based/_stomp.py similarity index 97% rename from aeon/anomaly_detection/distance_based/_stomp.py rename to aeon/anomaly_detection/series/distance_based/_stomp.py index 3f8be36432..7298b438c9 100644 --- a/aeon/anomaly_detection/distance_based/_stomp.py +++ b/aeon/anomaly_detection/series/distance_based/_stomp.py @@ -7,11 +7,11 @@ import numpy as np -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.utils.windowing import reverse_windowing -class STOMP(BaseAnomalyDetector): +class STOMP(BaseSeriesAnomalyDetector): """STOMP anomaly detector. STOMP calculates the matrix profile of a time series which is the distance to the diff --git a/aeon/anomaly_detection/distance_based/tests/__init__.py b/aeon/anomaly_detection/series/distance_based/tests/__init__.py similarity index 100% rename from aeon/anomaly_detection/distance_based/tests/__init__.py rename to aeon/anomaly_detection/series/distance_based/tests/__init__.py diff --git a/aeon/anomaly_detection/distance_based/tests/test_cblof.py b/aeon/anomaly_detection/series/distance_based/tests/test_cblof.py similarity index 97% rename from aeon/anomaly_detection/distance_based/tests/test_cblof.py rename to aeon/anomaly_detection/series/distance_based/tests/test_cblof.py index d1472af6a2..ce7579734f 100644 --- a/aeon/anomaly_detection/distance_based/tests/test_cblof.py +++ b/aeon/anomaly_detection/series/distance_based/tests/test_cblof.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from aeon.anomaly_detection.distance_based import CBLOF +from aeon.anomaly_detection.series.distance_based import CBLOF from aeon.testing.data_generation import make_example_1d_numpy from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/distance_based/tests/test_kmeans.py b/aeon/anomaly_detection/series/distance_based/tests/test_kmeans.py similarity index 95% rename from aeon/anomaly_detection/distance_based/tests/test_kmeans.py rename to aeon/anomaly_detection/series/distance_based/tests/test_kmeans.py index 2647411b88..bc966929b0 100644 --- a/aeon/anomaly_detection/distance_based/tests/test_kmeans.py +++ b/aeon/anomaly_detection/series/distance_based/tests/test_kmeans.py @@ -6,7 +6,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.distance_based import KMeansAD +from aeon.anomaly_detection.series.distance_based import KMeansAD def test_kmeansad_univariate(): diff --git a/aeon/anomaly_detection/distance_based/tests/test_left_stampi.py b/aeon/anomaly_detection/series/distance_based/tests/test_left_stampi.py similarity index 99% rename from aeon/anomaly_detection/distance_based/tests/test_left_stampi.py rename to aeon/anomaly_detection/series/distance_based/tests/test_left_stampi.py index 6444bccdfe..2e14928625 100644 --- a/aeon/anomaly_detection/distance_based/tests/test_left_stampi.py +++ b/aeon/anomaly_detection/series/distance_based/tests/test_left_stampi.py @@ -8,7 +8,7 @@ import numpy as np import pytest -from aeon.anomaly_detection.distance_based._left_stampi import LeftSTAMPi +from aeon.anomaly_detection.series.distance_based._left_stampi import LeftSTAMPi from aeon.testing.data_generation import make_example_1d_numpy from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/distance_based/tests/test_lof.py b/aeon/anomaly_detection/series/distance_based/tests/test_lof.py similarity index 99% rename from aeon/anomaly_detection/distance_based/tests/test_lof.py rename to aeon/anomaly_detection/series/distance_based/tests/test_lof.py index 033d11295b..a9107705fa 100644 --- a/aeon/anomaly_detection/distance_based/tests/test_lof.py +++ b/aeon/anomaly_detection/series/distance_based/tests/test_lof.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from aeon.anomaly_detection.distance_based import LOF +from aeon.anomaly_detection.series.distance_based import LOF from aeon.testing.data_generation import make_example_1d_numpy from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/distance_based/tests/test_merlin.py b/aeon/anomaly_detection/series/distance_based/tests/test_merlin.py similarity index 96% rename from aeon/anomaly_detection/distance_based/tests/test_merlin.py rename to aeon/anomaly_detection/series/distance_based/tests/test_merlin.py index ccf7e3300d..0ef5aafb8a 100644 --- a/aeon/anomaly_detection/distance_based/tests/test_merlin.py +++ b/aeon/anomaly_detection/series/distance_based/tests/test_merlin.py @@ -4,7 +4,7 @@ import numpy as np -from aeon.anomaly_detection.distance_based import MERLIN +from aeon.anomaly_detection.series.distance_based import MERLIN TEST_DATA = np.array( [ diff --git a/aeon/anomaly_detection/whole_series/tests/test_rockad.py b/aeon/anomaly_detection/series/distance_based/tests/test_rockad.py similarity index 96% rename from aeon/anomaly_detection/whole_series/tests/test_rockad.py rename to aeon/anomaly_detection/series/distance_based/tests/test_rockad.py index 7d3694b2c8..51d2425505 100644 --- a/aeon/anomaly_detection/whole_series/tests/test_rockad.py +++ b/aeon/anomaly_detection/series/distance_based/tests/test_rockad.py @@ -4,7 +4,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.whole_series import ROCKAD +from aeon.anomaly_detection.series.distance_based import ROCKAD def test_rockad_univariate(): diff --git a/aeon/anomaly_detection/distance_based/tests/test_stomp.py b/aeon/anomaly_detection/series/distance_based/tests/test_stomp.py similarity index 95% rename from aeon/anomaly_detection/distance_based/tests/test_stomp.py rename to aeon/anomaly_detection/series/distance_based/tests/test_stomp.py index b506c89ea0..f8225d3c7b 100644 --- a/aeon/anomaly_detection/distance_based/tests/test_stomp.py +++ b/aeon/anomaly_detection/series/distance_based/tests/test_stomp.py @@ -6,7 +6,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.distance_based import STOMP +from aeon.anomaly_detection.series.distance_based import STOMP from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/series/distribution_based/__init__.py b/aeon/anomaly_detection/series/distribution_based/__init__.py new file mode 100644 index 0000000000..7338cb740d --- /dev/null +++ b/aeon/anomaly_detection/series/distribution_based/__init__.py @@ -0,0 +1,9 @@ +"""Distribution-based Time Series Anomaly Detection.""" + +__all__ = [ + "COPOD", + "DWT_MLEAD", +] + +from aeon.anomaly_detection.series.distribution_based._copod import COPOD +from aeon.anomaly_detection.series.distribution_based._dwt_mlead import DWT_MLEAD diff --git a/aeon/anomaly_detection/distribution_based/_copod.py b/aeon/anomaly_detection/series/distribution_based/_copod.py similarity index 97% rename from aeon/anomaly_detection/distribution_based/_copod.py rename to aeon/anomaly_detection/series/distribution_based/_copod.py index bd2af0e084..3e72614f17 100644 --- a/aeon/anomaly_detection/distribution_based/_copod.py +++ b/aeon/anomaly_detection/series/distribution_based/_copod.py @@ -7,7 +7,7 @@ import numpy as np -from aeon.anomaly_detection.outlier_detection._pyodadapter import PyODAdapter +from aeon.anomaly_detection.series._pyodadapter import PyODAdapter from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/distribution_based/_dwt_mlead.py b/aeon/anomaly_detection/series/distribution_based/_dwt_mlead.py similarity index 98% rename from aeon/anomaly_detection/distribution_based/_dwt_mlead.py rename to aeon/anomaly_detection/series/distribution_based/_dwt_mlead.py index cb0de0c015..2154abee22 100644 --- a/aeon/anomaly_detection/distribution_based/_dwt_mlead.py +++ b/aeon/anomaly_detection/series/distribution_based/_dwt_mlead.py @@ -11,7 +11,7 @@ from numpy.lib.stride_tricks import sliding_window_view from sklearn.covariance import EmpiricalCovariance -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.utils.numba.wavelets import multilevel_haar_transform @@ -30,7 +30,7 @@ def _combine_alternating(xs: list[Any], ys: list[Any]) -> Iterable[Any]: yield y -class DWT_MLEAD(BaseAnomalyDetector): +class DWT_MLEAD(BaseSeriesAnomalyDetector): """DWT-MLEAD anomaly detector. DWT-MLEAD is an anomaly detection algorithm that uses the Discrete Wavelet Transform diff --git a/aeon/anomaly_detection/distribution_based/tests/__init__.py b/aeon/anomaly_detection/series/distribution_based/tests/__init__.py similarity index 100% rename from aeon/anomaly_detection/distribution_based/tests/__init__.py rename to aeon/anomaly_detection/series/distribution_based/tests/__init__.py diff --git a/aeon/anomaly_detection/distribution_based/tests/test_copod.py b/aeon/anomaly_detection/series/distribution_based/tests/test_copod.py similarity index 96% rename from aeon/anomaly_detection/distribution_based/tests/test_copod.py rename to aeon/anomaly_detection/series/distribution_based/tests/test_copod.py index 40969da0e7..8732b33c96 100644 --- a/aeon/anomaly_detection/distribution_based/tests/test_copod.py +++ b/aeon/anomaly_detection/series/distribution_based/tests/test_copod.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from aeon.anomaly_detection.distribution_based import COPOD +from aeon.anomaly_detection.series.distribution_based import COPOD from aeon.testing.data_generation import make_example_1d_numpy from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/distribution_based/tests/test_dwt_mlead.py b/aeon/anomaly_detection/series/distribution_based/tests/test_dwt_mlead.py similarity index 95% rename from aeon/anomaly_detection/distribution_based/tests/test_dwt_mlead.py rename to aeon/anomaly_detection/series/distribution_based/tests/test_dwt_mlead.py index 664d715122..3a714570b3 100644 --- a/aeon/anomaly_detection/distribution_based/tests/test_dwt_mlead.py +++ b/aeon/anomaly_detection/series/distribution_based/tests/test_dwt_mlead.py @@ -6,7 +6,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.distribution_based import DWT_MLEAD +from aeon.anomaly_detection.series.distribution_based import DWT_MLEAD def test_dwt_mlead_output(): diff --git a/aeon/anomaly_detection/series/outlier_detection/__init__.py b/aeon/anomaly_detection/series/outlier_detection/__init__.py new file mode 100644 index 0000000000..33a41b84c7 --- /dev/null +++ b/aeon/anomaly_detection/series/outlier_detection/__init__.py @@ -0,0 +1,9 @@ +"""Time Series Outlier Detection.""" + +__all__ = [ + "IsolationForest", + "STRAY", +] + +from aeon.anomaly_detection.series.outlier_detection._iforest import IsolationForest +from aeon.anomaly_detection.series.outlier_detection._stray import STRAY diff --git a/aeon/anomaly_detection/outlier_detection/_iforest.py b/aeon/anomaly_detection/series/outlier_detection/_iforest.py similarity index 98% rename from aeon/anomaly_detection/outlier_detection/_iforest.py rename to aeon/anomaly_detection/series/outlier_detection/_iforest.py index f13152d0e7..a0d62261e6 100644 --- a/aeon/anomaly_detection/outlier_detection/_iforest.py +++ b/aeon/anomaly_detection/series/outlier_detection/_iforest.py @@ -7,7 +7,7 @@ import numpy as np -from aeon.anomaly_detection.outlier_detection._pyodadapter import PyODAdapter +from aeon.anomaly_detection.series._pyodadapter import PyODAdapter from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/distance_based/_one_class_svm.py b/aeon/anomaly_detection/series/outlier_detection/_one_class_svm.py similarity index 98% rename from aeon/anomaly_detection/distance_based/_one_class_svm.py rename to aeon/anomaly_detection/series/outlier_detection/_one_class_svm.py index 9e654ee326..667ec32f9a 100644 --- a/aeon/anomaly_detection/distance_based/_one_class_svm.py +++ b/aeon/anomaly_detection/series/outlier_detection/_one_class_svm.py @@ -7,11 +7,11 @@ import numpy as np from sklearn.svm import OneClassSVM as OCSVM -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.utils.windowing import reverse_windowing, sliding_windows -class OneClassSVM(BaseAnomalyDetector): +class OneClassSVM(BaseSeriesAnomalyDetector): """OneClassSVM for anomaly detection. This class implements the OneClassSVM algorithm for anomaly detection diff --git a/aeon/anomaly_detection/outlier_detection/_stray.py b/aeon/anomaly_detection/series/outlier_detection/_stray.py similarity index 98% rename from aeon/anomaly_detection/outlier_detection/_stray.py rename to aeon/anomaly_detection/series/outlier_detection/_stray.py index e7512e2d24..3d78be8643 100644 --- a/aeon/anomaly_detection/outlier_detection/_stray.py +++ b/aeon/anomaly_detection/series/outlier_detection/_stray.py @@ -8,10 +8,10 @@ import numpy.typing as npt from sklearn.neighbors import NearestNeighbors -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector -class STRAY(BaseAnomalyDetector): +class STRAY(BaseSeriesAnomalyDetector): """STRAY: robust anomaly detection in data streams with concept drift. This is based on STRAY (Search TRace AnomalY) [1]_, which is a modification diff --git a/aeon/anomaly_detection/outlier_detection/tests/__init__.py b/aeon/anomaly_detection/series/outlier_detection/tests/__init__.py similarity index 100% rename from aeon/anomaly_detection/outlier_detection/tests/__init__.py rename to aeon/anomaly_detection/series/outlier_detection/tests/__init__.py diff --git a/aeon/anomaly_detection/outlier_detection/tests/test_iforest.py b/aeon/anomaly_detection/series/outlier_detection/tests/test_iforest.py similarity index 98% rename from aeon/anomaly_detection/outlier_detection/tests/test_iforest.py rename to aeon/anomaly_detection/series/outlier_detection/tests/test_iforest.py index a66d1003fb..07e0a085c3 100644 --- a/aeon/anomaly_detection/outlier_detection/tests/test_iforest.py +++ b/aeon/anomaly_detection/series/outlier_detection/tests/test_iforest.py @@ -4,7 +4,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.outlier_detection import IsolationForest +from aeon.anomaly_detection.series.outlier_detection import IsolationForest from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/distance_based/tests/test_one_class_svm.py b/aeon/anomaly_detection/series/outlier_detection/tests/test_one_class_svm.py similarity index 95% rename from aeon/anomaly_detection/distance_based/tests/test_one_class_svm.py rename to aeon/anomaly_detection/series/outlier_detection/tests/test_one_class_svm.py index 7a3aca2042..6395291617 100644 --- a/aeon/anomaly_detection/distance_based/tests/test_one_class_svm.py +++ b/aeon/anomaly_detection/series/outlier_detection/tests/test_one_class_svm.py @@ -4,7 +4,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.distance_based import OneClassSVM +from aeon.anomaly_detection.series.distance_based import OneClassSVM def test_one_class_svm_univariate(): diff --git a/aeon/anomaly_detection/outlier_detection/tests/test_stray.py b/aeon/anomaly_detection/series/outlier_detection/tests/test_stray.py similarity index 98% rename from aeon/anomaly_detection/outlier_detection/tests/test_stray.py rename to aeon/anomaly_detection/series/outlier_detection/tests/test_stray.py index 8429a8a3c5..76ef9ef915 100644 --- a/aeon/anomaly_detection/outlier_detection/tests/test_stray.py +++ b/aeon/anomaly_detection/series/outlier_detection/tests/test_stray.py @@ -5,7 +5,7 @@ import numpy as np from sklearn.preprocessing import MinMaxScaler -from aeon.anomaly_detection.outlier_detection import STRAY +from aeon.anomaly_detection.series.outlier_detection import STRAY def test_default_1D(): diff --git a/aeon/anomaly_detection/tests/__init__.py b/aeon/anomaly_detection/series/tests/__init__.py similarity index 100% rename from aeon/anomaly_detection/tests/__init__.py rename to aeon/anomaly_detection/series/tests/__init__.py diff --git a/aeon/anomaly_detection/tests/test_base.py b/aeon/anomaly_detection/series/tests/test_base.py similarity index 100% rename from aeon/anomaly_detection/tests/test_base.py rename to aeon/anomaly_detection/series/tests/test_base.py diff --git a/aeon/anomaly_detection/outlier_detection/tests/test_pyod_adapter.py b/aeon/anomaly_detection/series/tests/test_pyod_adapter.py similarity index 98% rename from aeon/anomaly_detection/outlier_detection/tests/test_pyod_adapter.py rename to aeon/anomaly_detection/series/tests/test_pyod_adapter.py index ee75078133..84906c245d 100644 --- a/aeon/anomaly_detection/outlier_detection/tests/test_pyod_adapter.py +++ b/aeon/anomaly_detection/series/tests/test_pyod_adapter.py @@ -6,7 +6,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.outlier_detection import PyODAdapter +from aeon.anomaly_detection.series.outlier_detection import PyODAdapter from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/whole_series/__init__.py b/aeon/anomaly_detection/whole_series/__init__.py deleted file mode 100644 index 7084ff40a9..0000000000 --- a/aeon/anomaly_detection/whole_series/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -"""Whole-series anomaly detection methods.""" - -__all__ = [ - "BaseCollectionAnomalyDetector", - "ClassificationAdapter", - "OutlierDetectionAdapter", -] - -from aeon.anomaly_detection.whole_series._classification import ClassificationAdapter -from aeon.anomaly_detection.whole_series._outlier_detection import ( - OutlierDetectionAdapter, -) -from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector diff --git a/aeon/anomaly_detection/whole_series/tests/__init__.py b/aeon/anomaly_detection/whole_series/tests/__init__.py deleted file mode 100644 index 9292e8d9bd..0000000000 --- a/aeon/anomaly_detection/whole_series/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Whole series anomaly detection tests.""" diff --git a/aeon/testing/estimator_checking/_yield_estimator_checks.py b/aeon/testing/estimator_checking/_yield_estimator_checks.py index 363a0ce24d..643118793d 100644 --- a/aeon/testing/estimator_checking/_yield_estimator_checks.py +++ b/aeon/testing/estimator_checking/_yield_estimator_checks.py @@ -11,7 +11,7 @@ import numpy as np from sklearn.exceptions import NotFittedError -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector from aeon.base import BaseAeonEstimator from aeon.base._base import _clone_estimator @@ -138,7 +138,7 @@ def _yield_all_aeon_checks( estimator_class, estimator_instances, datatypes ) - if issubclass(estimator_class, BaseAnomalyDetector): + if issubclass(estimator_class, BaseSeriesAnomalyDetector): yield from _yield_anomaly_detection_checks( estimator_class, estimator_instances, datatypes ) diff --git a/aeon/testing/mock_estimators/_mock_anomaly_detectors.py b/aeon/testing/mock_estimators/_mock_anomaly_detectors.py index 4ec14d35fa..d48ebfcfb5 100644 --- a/aeon/testing/mock_estimators/_mock_anomaly_detectors.py +++ b/aeon/testing/mock_estimators/_mock_anomaly_detectors.py @@ -10,10 +10,10 @@ import numpy as np -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector -class MockAnomalyDetector(BaseAnomalyDetector): +class MockAnomalyDetector(BaseSeriesAnomalyDetector): """Mock anomaly detector.""" _tags = { diff --git a/aeon/testing/testing_data.py b/aeon/testing/testing_data.py index bdb5cea37d..1458d4a0d9 100644 --- a/aeon/testing/testing_data.py +++ b/aeon/testing/testing_data.py @@ -2,8 +2,8 @@ import numpy as np -from aeon.anomaly_detection.base import BaseAnomalyDetector -from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector +from aeon.anomaly_detection.collection.base import BaseCollectionAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.base import BaseCollectionEstimator, BaseSeriesEstimator from aeon.classification import BaseClassifier from aeon.classification.early_classification import BaseEarlyClassifier @@ -1023,7 +1023,7 @@ def _get_task_for_estimator(estimator): data_label = "SimilaritySearch" # series data with no secondary input elif ( - isinstance(estimator, BaseAnomalyDetector) + isinstance(estimator, BaseSeriesAnomalyDetector) or isinstance(estimator, BaseSegmenter) or isinstance(estimator, BaseSeriesTransformer) or isinstance(estimator, BaseForecaster) diff --git a/aeon/utils/base/_identifier.py b/aeon/utils/base/_identifier.py index cf2722cfcb..2857b45bd1 100644 --- a/aeon/utils/base/_identifier.py +++ b/aeon/utils/base/_identifier.py @@ -47,6 +47,8 @@ def get_identifier(estimator): if len(identifiers) == 0: raise TypeError("Error, no identifiers could be determined for estimator") + if len(identifiers) > 1 and "anomaly-detector" in identifiers: + identifiers.remove("anomaly-detector") if len(identifiers) > 1 and "estimator" in identifiers: identifiers.remove("estimator") if len(identifiers) > 1 and "series-estimator" in identifiers: diff --git a/aeon/utils/base/_register.py b/aeon/utils/base/_register.py index a4d8b2d303..749c005e5f 100644 --- a/aeon/utils/base/_register.py +++ b/aeon/utils/base/_register.py @@ -15,9 +15,9 @@ "VALID_ESTIMATOR_BASES", ] - from aeon.anomaly_detection.base import BaseAnomalyDetector -from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector +from aeon.anomaly_detection.collection.base import BaseCollectionAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.base import BaseAeonEstimator, BaseCollectionEstimator, BaseSeriesEstimator from aeon.classification.base import BaseClassifier from aeon.classification.early_classification import BaseEarlyClassifier @@ -33,12 +33,12 @@ # all base classes BASE_CLASS_REGISTER = { # abstract - no estimator directly inherits from these + "anomaly-detector": BaseAnomalyDetector, "collection-estimator": BaseCollectionEstimator, "estimator": BaseAeonEstimator, "series-estimator": BaseSeriesEstimator, "transformer": BaseTransformer, # estimator types - "anomaly-detector": BaseAnomalyDetector, "collection-anomaly-detector": BaseCollectionAnomalyDetector, "collection-transformer": BaseCollectionTransformer, "classifier": BaseClassifier, @@ -47,6 +47,7 @@ "regressor": BaseRegressor, "segmenter": BaseSegmenter, "similarity_searcher": BaseSimilaritySearch, + "series-anomaly-detector": BaseSeriesAnomalyDetector, "series-transformer": BaseSeriesTransformer, "forecaster": BaseForecaster, } @@ -55,5 +56,11 @@ VALID_ESTIMATOR_BASES = { k: BASE_CLASS_REGISTER[k] for k in BASE_CLASS_REGISTER.keys() - - {"estimator", "collection-estimator", "series-estimator", "transformer"} + - { + "anomaly-detector", + "estimator", + "collection-estimator", + "series-estimator", + "transformer", + } } diff --git a/aeon/utils/base/tests/test_identifier.py b/aeon/utils/base/tests/test_identifier.py index 8084492599..99eed77dad 100644 --- a/aeon/utils/base/tests/test_identifier.py +++ b/aeon/utils/base/tests/test_identifier.py @@ -34,7 +34,7 @@ def test_get_identifier(): assert ( get_identifier(MockAnomalyDetector) == get_identifier(MockAnomalyDetector()) - == "anomaly-detector" + == "series-anomaly-detector" ) assert ( get_identifier(MockSeriesTransformer) diff --git a/aeon/utils/tags/_tags.py b/aeon/utils/tags/_tags.py index 1e973b5a4f..7edb50f132 100644 --- a/aeon/utils/tags/_tags.py +++ b/aeon/utils/tags/_tags.py @@ -141,7 +141,6 @@ class : identifier for the base class of objects this tag applies to "class": [ "transformer", "anomaly-detector", - "collection-anomaly-detector", "segmenter", ], "type": "bool", diff --git a/aeon/utils/tags/tests/test_discovery.py b/aeon/utils/tags/tests/test_discovery.py index fd3d57fa16..cc9dca0e36 100644 --- a/aeon/utils/tags/tests/test_discovery.py +++ b/aeon/utils/tags/tests/test_discovery.py @@ -2,7 +2,7 @@ import pytest -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.classification import BaseClassifier from aeon.testing.mock_estimators import MockClassifier from aeon.testing.mock_estimators._mock_anomaly_detectors import MockAnomalyDetector @@ -42,8 +42,8 @@ def test_all_tags_for_estimator_anomaly_detection(): assert "capability:contractable" not in tags assert tags == all_tags_for_estimator(MockAnomalyDetector) - assert tags == all_tags_for_estimator(BaseAnomalyDetector) - assert tags == all_tags_for_estimator("anomaly-detector") + assert tags == all_tags_for_estimator(BaseSeriesAnomalyDetector) + assert tags == all_tags_for_estimator("series-anomaly-detector") tag_names = all_tags_for_estimator(MockAnomalyDetector(), names_only=True) assert isinstance(tag_names, list) diff --git a/aeon/utils/tests/test_discovery.py b/aeon/utils/tests/test_discovery.py index 9a0fcd6ee7..f71727320c 100644 --- a/aeon/utils/tests/test_discovery.py +++ b/aeon/utils/tests/test_discovery.py @@ -3,7 +3,7 @@ import pytest from sklearn.base import BaseEstimator -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.base import BaseAeonEstimator from aeon.classification import BaseClassifier, DummyClassifier from aeon.clustering import BaseClusterer @@ -62,7 +62,7 @@ def test_all_estimators_by_type(item): [ [BaseTransformer, BaseClassifier], [BaseClassifier, "segmenter"], - [BaseClassifier, BaseAnomalyDetector, BaseClusterer], + [BaseClassifier, BaseSeriesAnomalyDetector, BaseClusterer], ], ) def test_all_estimators_by_multiple_types(input): diff --git a/docs/developer_guide/adding_typehints.md b/docs/developer_guide/adding_typehints.md index 5f77ce119b..ad0a9d5fa9 100644 --- a/docs/developer_guide/adding_typehints.md +++ b/docs/developer_guide/adding_typehints.md @@ -34,17 +34,23 @@ information. The `pyod` `BaseDetector` class can now be used in type hints with these additions. ```python +"""Adapter for PyOD models""" + from __future__ import annotations -from aeon.anomaly_detection.base import BaseAnomalyDetector +__maintainer__ = [] +__all__ = ["PyODAdapter"] + +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from typing import TYPE_CHECKING if TYPE_CHECKING: from pyod.models.base import BaseDetector -class PyODAdapter(BaseAnomalyDetector): + +class PyODAdapter(BaseSeriesAnomalyDetector): def __init__( - self, pyod_model: BaseDetector, window_size: int = 10, stride: int = 1 + self, pyod_model: BaseDetector, window_size: int = 10, stride: int = 1 ): self.pyod_model = pyod_model self.window_size = window_size diff --git a/examples/anomaly_detection/anomaly_detection.ipynb b/examples/anomaly_detection/anomaly_detection.ipynb index 7afd00aff8..c9011d3871 100644 --- a/examples/anomaly_detection/anomaly_detection.ipynb +++ b/examples/anomaly_detection/anomaly_detection.ipynb @@ -185,7 +185,7 @@ "metadata": {}, "outputs": [], "source": [ - "from aeon.anomaly_detection.distance_based import STOMP\n", + "from aeon.anomaly_detection.series.distance_based import STOMP\n", "from aeon.benchmarking.metrics.anomaly_detection import range_roc_auc_score\n", "\n", "detector = STOMP(window_size=200)\n", @@ -203,21 +203,21 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "743fbbaa-a7d0-4f56-993a-07453f6a9442", "metadata": {}, + "cell_type": "code", "outputs": [], + "execution_count": null, "source": [ "from pyod.models.ocsvm import OCSVM\n", "\n", - "from aeon.anomaly_detection.outlier_detection import PyODAdapter\n", + "from aeon.anomaly_detection.series import PyODAdapter\n", "from aeon.benchmarking.metrics.anomaly_detection import range_roc_auc_score\n", "\n", "detector = PyODAdapter(OCSVM(), window_size=3)\n", "y_scores = detector.fit_predict(X, axis=0)\n", "range_roc_auc_score(y, y_scores)" - ] + ], + "id": "8c89d43f2e5476e6" }, { "cell_type": "markdown", From c245ccbd0c1481b188a9094ccd7ff493613a89cd Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Thu, 8 May 2025 10:04:27 +0100 Subject: [PATCH 12/24] smoothing refactor --- aeon/transformations/series/_dft.py | 59 +++---------- aeon/transformations/series/_exp_smoothing.py | 55 +++--------- aeon/transformations/series/_gauss.py | 43 +++------ .../transformations/series/_moving_average.py | 51 +++-------- aeon/transformations/series/_sg.py | 43 +++------ aeon/transformations/series/_siv.py | 53 +++-------- .../series/smoothing/__init__.py | 17 ++++ aeon/transformations/series/smoothing/_dfa.py | 84 ++++++++++++++++++ .../series/smoothing/_exp_smoothing.py | 84 ++++++++++++++++++ .../series/smoothing/_gauss.py | 67 ++++++++++++++ .../series/smoothing/_moving_average.py | 67 ++++++++++++++ aeon/transformations/series/smoothing/_rms.py | 87 +++++++++++++++++++ aeon/transformations/series/smoothing/_sg.py | 68 +++++++++++++++ .../series/smoothing/tests/__init__.py | 1 + .../series/{ => smoothing}/tests/test_dft.py | 12 +-- .../tests/test_exp_smoothing.py | 20 ++--- .../{ => smoothing}/tests/test_gauss.py | 22 +---- .../tests/test_moving_average.py | 21 ++--- .../tests/test_rms.py} | 20 +---- .../series/{ => smoothing}/tests/test_sg.py | 20 +---- 20 files changed, 582 insertions(+), 312 deletions(-) create mode 100644 aeon/transformations/series/smoothing/__init__.py create mode 100644 aeon/transformations/series/smoothing/_dfa.py create mode 100644 aeon/transformations/series/smoothing/_exp_smoothing.py create mode 100644 aeon/transformations/series/smoothing/_gauss.py create mode 100644 aeon/transformations/series/smoothing/_moving_average.py create mode 100644 aeon/transformations/series/smoothing/_rms.py create mode 100644 aeon/transformations/series/smoothing/_sg.py create mode 100644 aeon/transformations/series/smoothing/tests/__init__.py rename aeon/transformations/series/{ => smoothing}/tests/test_dft.py (73%) rename aeon/transformations/series/{ => smoothing}/tests/test_exp_smoothing.py (72%) rename aeon/transformations/series/{ => smoothing}/tests/test_gauss.py (60%) rename aeon/transformations/series/{ => smoothing}/tests/test_moving_average.py (54%) rename aeon/transformations/series/{tests/test_siv.py => smoothing/tests/test_rms.py} (64%) rename aeon/transformations/series/{ => smoothing}/tests/test_sg.py (64%) diff --git a/aeon/transformations/series/_dft.py b/aeon/transformations/series/_dft.py index 1f603c39af..24e0b27ab7 100644 --- a/aeon/transformations/series/_dft.py +++ b/aeon/transformations/series/_dft.py @@ -4,12 +4,20 @@ __all__ = ["DFTSeriesTransformer"] -import numpy as np +from deprecated.sphinx import deprecated -from aeon.transformations.series.base import BaseSeriesTransformer +from aeon.transformations.series.smoothing import DiscreteFourierApproximation -class DFTSeriesTransformer(BaseSeriesTransformer): +# TODO: Remove in v1.3.0 +@deprecated( + version="1.2.0", + reason="DFTSeriesTransformer is deprecated and will be removed in v1.3.0. " + "Please use DiscreteFourierApproximation from " + "transformations.series.smoothing instead.", + category=FutureWarning, +) +class DFTSeriesTransformer(DiscreteFourierApproximation): """Filter a times series using Discrete Fourier Approximation (DFT). Parameters @@ -42,47 +50,4 @@ class DFTSeriesTransformer(BaseSeriesTransformer): (2, 100) """ - _tags = { - "capability:multivariate": True, - "X_inner_type": "np.ndarray", - "fit_is_empty": True, - } - - def __init__(self, r=0.5, sort=False): - self.r = r - self.sort = sort - super().__init__(axis=1) - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - Parameters - ---------- - X : np.ndarray - time series in shape (n_channels, n_timepoints) - y : ignored argument for interface compatibility - - Returns - ------- - transformed version of X - """ - # Compute DFT - dft = np.fft.fft(X) - - # Mask array of terms to keep and number of terms to keep - mask = np.zeros_like(dft, dtype=bool) - keep = max(int(self.r * dft.shape[1]), 1) - - # If sort is set, sort the indices by the decreasing dft amplitude - if self.sort: - sorted_indices = np.argsort(np.abs(dft))[:, ::-1] - for i in range(dft.shape[0]): - mask[i, sorted_indices[i, 0:keep]] = True - # Else, keep the first terms - else: - mask[:, 0:keep] = True - - # Invert DFT with masked terms - X_ = np.fft.ifft(dft * mask).real - - return X_ + ... diff --git a/aeon/transformations/series/_exp_smoothing.py b/aeon/transformations/series/_exp_smoothing.py index 5566769463..08217a9d05 100644 --- a/aeon/transformations/series/_exp_smoothing.py +++ b/aeon/transformations/series/_exp_smoothing.py @@ -3,14 +3,21 @@ __maintainer__ = ["Datadote"] __all__ = ["ExpSmoothingSeriesTransformer"] -from typing import Union -import numpy as np +from deprecated.sphinx import deprecated -from aeon.transformations.series.base import BaseSeriesTransformer +from aeon.transformations.series.smoothing import ExponentialSmoothing -class ExpSmoothingSeriesTransformer(BaseSeriesTransformer): +# TODO: Remove in v1.3.0 +@deprecated( + version="1.2.0", + reason="ExpSmoothingSeriesTransformer is deprecated and will be removed in v1.3.0. " + "Please use ExponentialSmoothing from " + "transformations.series.smoothing instead.", + category=FutureWarning, +) +class ExpSmoothingSeriesTransformer(ExponentialSmoothing): """Filter a time series using exponential smoothing. - Exponential smoothing (EXP) is a generalisaton of moving average smoothing that @@ -54,42 +61,4 @@ class ExpSmoothingSeriesTransformer(BaseSeriesTransformer): [10. 9.5 8.75 7.875]] """ - _tags = { - "capability:multivariate": True, - "X_inner_type": "np.ndarray", - "fit_is_empty": True, - } - - def __init__( - self, alpha: float = 0.2, window_size: Union[int, float, None] = None - ) -> None: - if not 0 <= alpha <= 1: - raise ValueError(f"alpha must be in range [0, 1], got {alpha}") - if window_size is not None and window_size <= 0: - raise ValueError(f"window_size must be > 0, got {window_size}") - super().__init__(axis=1) - self.alpha = alpha if window_size is None else 2.0 / (window_size + 1) - self.window_size = window_size - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - private _transform containing core logic, called from transform - - Parameters - ---------- - X : np.ndarray - Data to be transformed - y : ignored argument for interface compatibility - Additional data, e.g., labels for transformation - - Returns - ------- - Xt: 2D np.ndarray - transformed version of X - """ - Xt = np.zeros_like(X, dtype="float") - Xt[:, 0] = X[:, 0] - for i in range(1, Xt.shape[1]): - Xt[:, i] = self.alpha * X[:, i] + (1 - self.alpha) * Xt[:, i - 1] - return Xt + ... diff --git a/aeon/transformations/series/_gauss.py b/aeon/transformations/series/_gauss.py index 863d8cf6b9..959cddac6e 100644 --- a/aeon/transformations/series/_gauss.py +++ b/aeon/transformations/series/_gauss.py @@ -4,12 +4,20 @@ __all__ = ["GaussSeriesTransformer"] -from scipy.ndimage import gaussian_filter1d +from deprecated.sphinx import deprecated -from aeon.transformations.series.base import BaseSeriesTransformer +from aeon.transformations.series.smoothing import GaussianFilter -class GaussSeriesTransformer(BaseSeriesTransformer): +# TODO: Remove in v1.3.0 +@deprecated( + version="1.2.0", + reason="GaussSeriesTransformer is deprecated and will be removed in v1.3.0. " + "Please use GaussianFilter from " + "transformations.series.smoothing instead.", + category=FutureWarning, +) +class GaussSeriesTransformer(GaussianFilter): """Filter a times series using Gaussian filter. Parameters @@ -45,31 +53,4 @@ class GaussSeriesTransformer(BaseSeriesTransformer): (2, 100) """ - _tags = { - "capability:multivariate": True, - "X_inner_type": "np.ndarray", - "fit_is_empty": True, - } - - def __init__(self, sigma=1, order=0): - self.sigma = sigma - self.order = order - super().__init__(axis=1) - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - Parameters - ---------- - X : np.ndarray - time series in shape (n_channels, n_timepoints) - y : ignored argument for interface compatibility - - Returns - ------- - transformed version of X - """ - # Compute Gaussian filter - X_ = gaussian_filter1d(X, self.sigma, self.axis, self.order) - - return X_ + ... diff --git a/aeon/transformations/series/_moving_average.py b/aeon/transformations/series/_moving_average.py index 7e9993946e..61fb15e834 100644 --- a/aeon/transformations/series/_moving_average.py +++ b/aeon/transformations/series/_moving_average.py @@ -3,12 +3,21 @@ __maintainer__ = ["Datadote"] __all__ = ["MovingAverageSeriesTransformer"] -import numpy as np -from aeon.transformations.series.base import BaseSeriesTransformer +from deprecated.sphinx import deprecated +from aeon.transformations.series.smoothing import MovingAverage -class MovingAverageSeriesTransformer(BaseSeriesTransformer): + +# TODO: Remove in v1.3.0 +@deprecated( + version="1.2.0", + reason="MovingAverageSeriesTransformer is deprecated and will be removed in " + "v1.3.0. Please use MovingAverage from " + "transformations.series.smoothing instead.", + category=FutureWarning, +) +class MovingAverageSeriesTransformer(MovingAverage): """Calculate the moving average of an array of numbers. Slides a window across the input array, and returns the averages for each window. @@ -41,38 +50,4 @@ class MovingAverageSeriesTransformer(BaseSeriesTransformer): [[-2.5 -1.5 -0.5 0.5 1.5 2.5]] """ - _tags = { - "capability:multivariate": True, - "X_inner_type": "np.ndarray", - "fit_is_empty": True, - } - - def __init__(self, window_size: int = 5) -> None: - super().__init__(axis=0) - if window_size <= 0: - raise ValueError(f"window_size must be > 0, got {window_size}") - self.window_size = window_size - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - private _transform containing core logic, called from transform - - Parameters - ---------- - X : np.ndarray - Data to be transformed - y : ignored argument for interface compatibility - Additional data, e.g., labels for transformation - - Returns - ------- - Xt: 2D np.ndarray - transformed version of X - """ - csum = np.cumsum(X, axis=0) - csum[self.window_size :, :] = ( - csum[self.window_size :, :] - csum[: -self.window_size, :] - ) - Xt = csum[self.window_size - 1 :, :] / self.window_size - return Xt + ... diff --git a/aeon/transformations/series/_sg.py b/aeon/transformations/series/_sg.py index 19000cc0e8..0fc1de3984 100644 --- a/aeon/transformations/series/_sg.py +++ b/aeon/transformations/series/_sg.py @@ -4,12 +4,20 @@ __all__ = ["SGSeriesTransformer"] -from scipy.signal import savgol_filter +from deprecated.sphinx import deprecated -from aeon.transformations.series.base import BaseSeriesTransformer +from aeon.transformations.series.smoothing import SavitzkyGolayFilter -class SGSeriesTransformer(BaseSeriesTransformer): +# TODO: Remove in v1.3.0 +@deprecated( + version="1.2.0", + reason="SGSeriesTransformer is deprecated and will be removed in v1.3.0. " + "Please use SavitzkyGolayFilter from " + "transformations.series.smoothing instead.", + category=FutureWarning, +) +class SGSeriesTransformer(SavitzkyGolayFilter): """Filter a times series using Savitzky-Golay (SG). Parameters @@ -45,31 +53,4 @@ class SGSeriesTransformer(BaseSeriesTransformer): (2, 100) """ - _tags = { - "capability:multivariate": True, - "X_inner_type": "np.ndarray", - "fit_is_empty": True, - } - - def __init__(self, window_length=5, polyorder=2): - self.window_length = window_length - self.polyorder = polyorder - super().__init__(axis=1) - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - Parameters - ---------- - X : np.ndarray - time series in shape (n_channels, n_timepoints) - y : ignored argument for interface compatibility - - Returns - ------- - transformed version of X - """ - # Compute SG - X_ = savgol_filter(X, self.window_length, self.polyorder) - - return X_ + ... diff --git a/aeon/transformations/series/_siv.py b/aeon/transformations/series/_siv.py index 1bb2ad3e0a..b6eeb5e591 100644 --- a/aeon/transformations/series/_siv.py +++ b/aeon/transformations/series/_siv.py @@ -4,13 +4,20 @@ __all__ = ["SIVSeriesTransformer"] -import numpy as np -from scipy.ndimage import median_filter +from deprecated.sphinx import deprecated -from aeon.transformations.series.base import BaseSeriesTransformer +from aeon.transformations.series.smoothing import RecursiveMedianSieve -class SIVSeriesTransformer(BaseSeriesTransformer): +# TODO: Remove in v1.3.0 +@deprecated( + version="1.2.0", + reason="SIVSeriesTransformer is deprecated and will be removed in v1.3.0. " + "Please use RecursiveMedianSieve from " + "transformations.series.smoothing instead.", + category=FutureWarning, +) +class SIVSeriesTransformer(RecursiveMedianSieve): """Filter a times series using Recursive Median Sieve (SIV). Parameters @@ -48,40 +55,4 @@ class SIVSeriesTransformer(BaseSeriesTransformer): (2, 100) """ - _tags = { - "capability:multivariate": True, - "X_inner_type": "np.ndarray", - "fit_is_empty": True, - } - - def __init__(self, window_length=None): - self.window_length = window_length - super().__init__(axis=1) - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - Parameters - ---------- - X : np.ndarray - time series in shape (n_channels, n_timepoints) - y : ignored argument for interface compatibility - - Returns - ------- - transformed version of X - """ - window_length = self.window_length - if window_length is None: - window_length = [3, 5, 7] - if not isinstance(window_length, list): - window_length = [window_length] - - # Compute SIV - X_ = X - - for w in window_length: - footprint = np.ones((1, w)) - X_ = median_filter(X_, footprint=footprint) - - return X_ + ... diff --git a/aeon/transformations/series/smoothing/__init__.py b/aeon/transformations/series/smoothing/__init__.py new file mode 100644 index 0000000000..52ebcc3c8e --- /dev/null +++ b/aeon/transformations/series/smoothing/__init__.py @@ -0,0 +1,17 @@ +"""Series smoothing transformers.""" + +__all__ = [ + "DiscreteFourierApproximation", + "ExponentialSmoothing", + "GaussianFilter", + "MovingAverage", + "SavitzkyGolayFilter", + "RecursiveMedianSieve", +] + +from aeon.transformations.series.smoothing._dfa import DiscreteFourierApproximation +from aeon.transformations.series.smoothing._exp_smoothing import ExponentialSmoothing +from aeon.transformations.series.smoothing._gauss import GaussianFilter +from aeon.transformations.series.smoothing._moving_average import MovingAverage +from aeon.transformations.series.smoothing._rms import RecursiveMedianSieve +from aeon.transformations.series.smoothing._sg import SavitzkyGolayFilter diff --git a/aeon/transformations/series/smoothing/_dfa.py b/aeon/transformations/series/smoothing/_dfa.py new file mode 100644 index 0000000000..a5e017ac66 --- /dev/null +++ b/aeon/transformations/series/smoothing/_dfa.py @@ -0,0 +1,84 @@ +"""Discrete Fourier Approximation filter transformation for smoothing.""" + +__maintainer__ = ["Cyril-Meyer"] +__all__ = ["DiscreteFourierApproximation"] + + +import numpy as np + +from aeon.transformations.series.base import BaseSeriesTransformer + + +class DiscreteFourierApproximation(BaseSeriesTransformer): + """Filter a times series using a Discrete Fourier Approximation. + + Smooths the series by first transforming into the frequency domain, discarding + the high frequency terms, then transforming back to the time domain. + + Parameters + ---------- + r : float, default=0.5 + Proportion of Fourier terms to retain [0, 1] + sort : bool, default=False + Sort the Fourier terms by amplitude to keep most important terms + + References + ---------- + .. [1] Cooley, J., Lewis, P., Welch, P.: The fast fourier transform and its + applications. IEEE Trans. Educ. 12(1), 27–34 (1969) + + Examples + -------- + >>> import numpy as np + >>> from aeon.transformations.series.smoothing import DiscreteFourierApproximation + >>> X = np.random.random((2, 100)) # Random series length 100 + >>> dft = DiscreteFourierApproximation() + >>> X_ = dft.fit_transform(X) + >>> X_.shape + (2, 100) + """ + + _tags = { + "capability:multivariate": True, + "X_inner_type": "np.ndarray", + "fit_is_empty": True, + } + + def __init__(self, r=0.5, sort=False): + self.r = r + self.sort = sort + super().__init__(axis=1) + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + Parameters + ---------- + X : np.ndarray + time series in shape (n_channels, n_timepoints) + y : ignored argument for interface compatibility + + Returns + ------- + transformed version of X + """ + # Compute DFT + dft = np.fft.fft(X) + + # Mask array of terms to keep and number of terms to keep + mask = np.zeros_like(dft, dtype=bool) + keep = max(int(self.r * dft.shape[1]), 1) + + # If sort is set, sort the indices by the decreasing dft amplitude + if self.sort: + sorted_indices = np.argsort(np.abs(dft))[:, ::-1] + for i in range(dft.shape[0]): + mask[i, sorted_indices[i, 0:keep]] = True + # Else, keep the first terms + else: + mask[:, 0:keep] = True + + # Invert DFT with masked terms + X_ = np.fft.ifft(dft * mask).real + + return X_ diff --git a/aeon/transformations/series/smoothing/_exp_smoothing.py b/aeon/transformations/series/smoothing/_exp_smoothing.py new file mode 100644 index 0000000000..cd70138fa8 --- /dev/null +++ b/aeon/transformations/series/smoothing/_exp_smoothing.py @@ -0,0 +1,84 @@ +"""Exponential smoothing transformation.""" + +__maintainer__ = ["Datadote"] +__all__ = ["ExponentialSmoothing"] + +from typing import Union + +import numpy as np + +from aeon.transformations.series.base import BaseSeriesTransformer + + +class ExponentialSmoothing(BaseSeriesTransformer): + """Filter a time series using exponential smoothing. + + - Exponential smoothing (EXP) is a generalisaton of moving average smoothing that + assigns a decaying weight to each element rather than averaging over a window. + - Assume time series T = [t_0, ..., t_j], and smoothed values S = [s_0, ..., s_j] + - Then, s_0 = t_0 and s_j = alpha * t_j + (1 - alpha) * s_j-1 + where 0 ≤ alpha ≤ 1. If window_size is given, alpha is overwritten, and set as + alpha = 2. / (window_size + 1) + + Parameters + ---------- + alpha: float, default=0.2 + decaying weight. Range [0, 1]. Overwritten by window_size if window_size exists + window_size: int or float or None, default=None + If window_size is specified, alpha is set to 2. / (window_size + 1) + + Examples + -------- + >>> import numpy as np + >>> from aeon.transformations.series.smoothing import ExponentialSmoothing + >>> X = np.array([-2, -1, 0, 1, 2]) + >>> transformer = ExponentialSmoothing(0.5) + >>> transformer.fit_transform(X) + [[-2. -1.5 -0.75 0.125 1.0625]] + >>> X = np.array([[1, 2, 3, 4], [10, 9, 8, 7]]) + >>> transformer.fit_transform(X) + [[ 1. 1.5 2.25 3.125] + [10. 9.5 8.75 7.875]] + """ + + _tags = { + "capability:multivariate": True, + "X_inner_type": "np.ndarray", + "fit_is_empty": True, + } + + def __init__( + self, alpha: float = 0.2, window_size: Union[int, float, None] = None + ) -> None: + self.alpha = alpha if window_size is None else 2.0 / (window_size + 1) + self.window_size = window_size + + super().__init__(axis=1) + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + private _transform containing core logic, called from transform + + Parameters + ---------- + X : np.ndarray + Data to be transformed + y : ignored argument for interface compatibility + Additional data, e.g., labels for transformation + + Returns + ------- + Xt: 2D np.ndarray + transformed version of X + """ + if not 0 <= self.alpha <= 1: + raise ValueError(f"alpha must be in range [0, 1], got {self.alpha}") + if self.window_size is not None and self.window_size <= 0: + raise ValueError(f"window_size must be > 0, got {self.window_size}") + + Xt = np.zeros_like(X, dtype="float") + Xt[:, 0] = X[:, 0] + for i in range(1, Xt.shape[1]): + Xt[:, i] = self.alpha * X[:, i] + (1 - self.alpha) * Xt[:, i - 1] + return Xt diff --git a/aeon/transformations/series/smoothing/_gauss.py b/aeon/transformations/series/smoothing/_gauss.py new file mode 100644 index 0000000000..7946f8d634 --- /dev/null +++ b/aeon/transformations/series/smoothing/_gauss.py @@ -0,0 +1,67 @@ +"""Gaussian filter transformation.""" + +__maintainer__ = ["Cyril-Meyer"] +__all__ = ["GaussianFilter"] + + +from scipy.ndimage import gaussian_filter1d + +from aeon.transformations.series.base import BaseSeriesTransformer + + +class GaussianFilter(BaseSeriesTransformer): + """Filter a time series using Gaussian filter. + + Wrapper for the SciPy ``gaussian_filter1d`` function. + + Parameters + ---------- + sigma : float, default=1 + Standard deviation for the Gaussian kernel. + order : int, default=0 + An order of 0 corresponds to convolution with a Gaussian kernel. + A positive order corresponds to convolution with that derivative of a + Gaussian. + + References + ---------- + .. [1] Chou, Y. L. "Statistical Analysis, Section 17.9." New York: Holt + International (1975). + + Examples + -------- + >>> import numpy as np + >>> from aeon.transformations.series.smoothing import GaussianFilter + >>> X = np.random.random((2, 100)) # Random series length 100 + >>> gauss = GaussianFilter(sigma=5) + >>> X_ = gauss.fit_transform(X) + >>> X_.shape + (2, 100) + """ + + _tags = { + "capability:multivariate": True, + "X_inner_type": "np.ndarray", + "fit_is_empty": True, + } + + def __init__(self, sigma=1, order=0): + self.sigma = sigma + self.order = order + + super().__init__(axis=1) + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + Parameters + ---------- + X : np.ndarray + time series in shape (n_channels, n_timepoints) + y : ignored argument for interface compatibility + + Returns + ------- + transformed version of X + """ + return gaussian_filter1d(X, self.sigma, axis=self.axis, order=self.order) diff --git a/aeon/transformations/series/smoothing/_moving_average.py b/aeon/transformations/series/smoothing/_moving_average.py new file mode 100644 index 0000000000..c4b806e6c7 --- /dev/null +++ b/aeon/transformations/series/smoothing/_moving_average.py @@ -0,0 +1,67 @@ +"""Moving average transformation.""" + +__maintainer__ = ["Datadote"] +__all__ = ["MovingAverage"] + +import numpy as np + +from aeon.transformations.series.base import BaseSeriesTransformer + + +class MovingAverage(BaseSeriesTransformer): + """Calculate the moving average for a time series. + + Slides a window across the input array, and returns the averages for each window. + + Parameters + ---------- + window_size: int, default=5 + Number of values to average for each window. + + Examples + -------- + >>> import numpy as np + >>> from aeon.transformations.series.smoothing import MovingAverage + >>> X = np.array([-3, -2, -1, 0, 1, 2, 3]) + >>> transformer = MovingAverage(2) + >>> transformer.fit_transform(X) + [[-2.5 -1.5 -0.5 0.5 1.5 2.5]] + """ + + _tags = { + "capability:multivariate": True, + "X_inner_type": "np.ndarray", + "fit_is_empty": True, + } + + def __init__(self, window_size: int = 5) -> None: + self.window_size = window_size + + super().__init__(axis=0) + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + private _transform containing core logic, called from transform + + Parameters + ---------- + X : np.ndarray + Data to be transformed + y : ignored argument for interface compatibility + Additional data, e.g., labels for transformation + + Returns + ------- + Xt: 2D np.ndarray + transformed version of X + """ + if self.window_size <= 0: + raise ValueError(f"window_size must be > 0, got {self.window_size}") + + csum = np.cumsum(X, axis=0) + csum[self.window_size :, :] = ( + csum[self.window_size :, :] - csum[: -self.window_size, :] + ) + Xt = csum[self.window_size - 1 :, :] / self.window_size + return Xt diff --git a/aeon/transformations/series/smoothing/_rms.py b/aeon/transformations/series/smoothing/_rms.py new file mode 100644 index 0000000000..9b9b45becb --- /dev/null +++ b/aeon/transformations/series/smoothing/_rms.py @@ -0,0 +1,87 @@ +"""Recursive Median Sieve filter transformation.""" + +__maintainer__ = ["Cyril-Meyer"] +__all__ = ["RecursiveMedianSieve"] + + +import numpy as np +from scipy.ndimage import median_filter + +from aeon.transformations.series.base import BaseSeriesTransformer + + +class RecursiveMedianSieve(BaseSeriesTransformer): + """Filter a times series using a Recursive Median Sieve. + + Parameters + ---------- + window_length : list of int or int, default=None + The filter windows lengths (recommended increasing value). + If None, defaults to [3, 5, 7]. + + Notes + ----- + Use scipy.ndimage.median_filter instead of scipy.signal.medfilt : + The more general function scipy.ndimage.median_filter has a more efficient + implementation of a median filter and therefore runs much faster. + https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.medfilt.html + + References + ---------- + .. [1] Bangham J. A. (1988). + Data-sieving hydrophobicity plots. + Analytical biochemistry, 174(1), 142–145. + https://doi.org/10.1016/0003-2697(88)90528-3 + .. [2] Yli-Harja, O., Koivisto, P., Bangham, J. A., Cawley, G., + Harvey, R., & Shmulevich, I. (2001). + Simplified implementation of the recursive median sieve. + Signal Process., 81(7), 1565–1570. + https://doi.org/10.1016/S0165-1684(01)00054-8 + + Examples + -------- + >>> import numpy as np + >>> from aeon.transformations.series.smoothing import RecursiveMedianSieve + >>> X = np.random.random((2, 100)) # Random series length 100 + >>> siv = RecursiveMedianSieve() + >>> X_ = siv.fit_transform(X) + >>> X_.shape + (2, 100) + """ + + _tags = { + "capability:multivariate": True, + "X_inner_type": "np.ndarray", + "fit_is_empty": True, + } + + def __init__(self, window_length=None): + self.window_length = window_length + + super().__init__(axis=1) + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + Parameters + ---------- + X : np.ndarray + time series in shape (n_channels, n_timepoints) + y : ignored argument for interface compatibility + + Returns + ------- + transformed version of X + """ + window_length = self.window_length + if window_length is None: + window_length = [3, 5, 7] + if not isinstance(window_length, list): + window_length = [window_length] + + X_ = X + for w in window_length: + footprint = np.ones((1, w)) + X_ = median_filter(X_, footprint=footprint) + + return X_ diff --git a/aeon/transformations/series/smoothing/_sg.py b/aeon/transformations/series/smoothing/_sg.py new file mode 100644 index 0000000000..750f5c5701 --- /dev/null +++ b/aeon/transformations/series/smoothing/_sg.py @@ -0,0 +1,68 @@ +"""Savitzky-Golay filter transformation.""" + +__maintainer__ = ["Cyril-Meyer"] +__all__ = ["SavitzkyGolayFilter"] + + +from scipy.signal import savgol_filter + +from aeon.transformations.series.base import BaseSeriesTransformer + + +class SavitzkyGolayFilter(BaseSeriesTransformer): + """Filter a times series using Savitzky-Golay (SG). + + Wrapper for the SciPy ``savgol_filter`` function. + + Parameters + ---------- + window_length : int, default=5 + The length of the filter window (i.e., the number of coefficients). + window_length must be less than or equal to the size of the input. + polyorder : int, default=2 + The order of the polynomial used to fit the samples. + polyorder must be less than window_length. + + References + ---------- + .. [1] Savitzky, A., & Golay, M. J. (1964). + Smoothing and differentiation of data by simplified least squares procedures. + Analytical chemistry, 36(8), 1627-1639. + + Examples + -------- + >>> import numpy as np + >>> from aeon.transformations.series.smoothing import SavitzkyGolayFilter + >>> X = np.random.random((2, 100)) # Random series length 100 + >>> sg = SavitzkyGolayFilter() + >>> X_ = sg.fit_transform(X) + >>> X_.shape + (2, 100) + """ + + _tags = { + "capability:multivariate": True, + "X_inner_type": "np.ndarray", + "fit_is_empty": True, + } + + def __init__(self, window_length=5, polyorder=2): + self.window_length = window_length + self.polyorder = polyorder + + super().__init__(axis=1) + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + Parameters + ---------- + X : np.ndarray + time series in shape (n_channels, n_timepoints) + y : ignored argument for interface compatibility + + Returns + ------- + transformed version of X + """ + return savgol_filter(X, self.window_length, self.polyorder) diff --git a/aeon/transformations/series/smoothing/tests/__init__.py b/aeon/transformations/series/smoothing/tests/__init__.py new file mode 100644 index 0000000000..5882e7a73b --- /dev/null +++ b/aeon/transformations/series/smoothing/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for series smoothing transformations.""" diff --git a/aeon/transformations/series/tests/test_dft.py b/aeon/transformations/series/smoothing/tests/test_dft.py similarity index 73% rename from aeon/transformations/series/tests/test_dft.py rename to aeon/transformations/series/smoothing/tests/test_dft.py index 6ffe04b78c..62cd7147a4 100644 --- a/aeon/transformations/series/tests/test_dft.py +++ b/aeon/transformations/series/smoothing/tests/test_dft.py @@ -1,12 +1,12 @@ """Tests for DFT transformation.""" -__maintainer__ = [] - import numpy as np import pytest +from aeon.transformations.series.smoothing._dfa import DiscreteFourierApproximation + -@pytest.mark.parametrize("r", [0.00, 0.25, 0.50, 0.75, 1.00]) +@pytest.mark.parametrize("r", [0.00, 0.50, 1.00]) @pytest.mark.parametrize("sort", [True, False]) def test_dft(r, sort): """Test the functionality of DFT transformation.""" @@ -23,15 +23,11 @@ def test_dft(r, sort): + 0.1 * np.sin(2 * np.pi * 8 * t) ) x12 = np.array([x1, x2]) - x12r = x12 + np.random.random((2, n_samples)) * 0.25 - - from aeon.transformations.series._dft import DFTSeriesTransformer - dft = DFTSeriesTransformer(r=r, sort=sort) + dft = DiscreteFourierApproximation(r=r, sort=sort) x_1 = dft.fit_transform(x1) x_2 = dft.fit_transform(x2) x_12 = dft.fit_transform(x12) - dft.fit_transform(x12r) np.testing.assert_almost_equal(x_1[0], x_12[0], decimal=4) np.testing.assert_almost_equal(x_2[0], x_12[1], decimal=4) diff --git a/aeon/transformations/series/tests/test_exp_smoothing.py b/aeon/transformations/series/smoothing/tests/test_exp_smoothing.py similarity index 72% rename from aeon/transformations/series/tests/test_exp_smoothing.py rename to aeon/transformations/series/smoothing/tests/test_exp_smoothing.py index cb83a4e7da..c441104e8c 100644 --- a/aeon/transformations/series/tests/test_exp_smoothing.py +++ b/aeon/transformations/series/smoothing/tests/test_exp_smoothing.py @@ -1,11 +1,9 @@ -"""Tests for ExpSmoothingSeriesTransformer.""" - -__maintainer__ = ["Datadote"] +"""Tests for ExponentialSmoothing.""" import numpy as np import pytest -from aeon.transformations.series._exp_smoothing import ExpSmoothingSeriesTransformer +from aeon.transformations.series.smoothing import ExponentialSmoothing TEST_DATA = [np.array([-2, -1, 0, 1, 2]), np.array([[1, 2, 3, 4], [10, 9, 8, 7]])] EXPECTED_RESULTS = [ @@ -16,7 +14,7 @@ def test_input_1d_array(): """Test inputs of dimension 1.""" - transformer = ExpSmoothingSeriesTransformer(0.5) + transformer = ExponentialSmoothing(0.5) idx_data = 0 Xt = transformer.fit_transform(TEST_DATA[idx_data]) np.testing.assert_almost_equal(Xt, EXPECTED_RESULTS[idx_data], decimal=5) @@ -24,7 +22,7 @@ def test_input_1d_array(): def test_input_2d_array(): """Test inputs of dimension 2.""" - transformer = ExpSmoothingSeriesTransformer(0.5) + transformer = ExponentialSmoothing(0.5) idx_data = 1 Xt = transformer.fit_transform(TEST_DATA[idx_data]) np.testing.assert_almost_equal(Xt, EXPECTED_RESULTS[idx_data], decimal=5) @@ -34,8 +32,8 @@ def test_input_2d_array(): def test_window_size_matches_alpha(alpha_window): """Check same output results using equivalent alpha and window_size.""" alpha, window_size = alpha_window - transformer1 = ExpSmoothingSeriesTransformer(alpha=alpha) - transformer2 = ExpSmoothingSeriesTransformer(window_size=window_size) + transformer1 = ExponentialSmoothing(alpha=alpha) + transformer2 = ExponentialSmoothing(window_size=window_size) for i in range(len(TEST_DATA)): Xt1 = transformer1.fit_transform(TEST_DATA[i]) Xt2 = transformer2.fit_transform(TEST_DATA[i]) @@ -45,16 +43,16 @@ def test_window_size_matches_alpha(alpha_window): def test_alpha_less_than_zero(): """Test alpha less than zero.""" with pytest.raises(ValueError): - ExpSmoothingSeriesTransformer(-0.5) + ExponentialSmoothing(-0.5) def test_alpha_greater_than_one(): """Test alpha greater than one.""" with pytest.raises(ValueError): - ExpSmoothingSeriesTransformer(2.0) + ExponentialSmoothing(2.0) def test_window_size_than_one(): """Test window_size < 0.""" with pytest.raises(ValueError): - ExpSmoothingSeriesTransformer(window_size=0) + ExponentialSmoothing(window_size=0) diff --git a/aeon/transformations/series/tests/test_gauss.py b/aeon/transformations/series/smoothing/tests/test_gauss.py similarity index 60% rename from aeon/transformations/series/tests/test_gauss.py rename to aeon/transformations/series/smoothing/tests/test_gauss.py index 6ab65ac107..52a8ae3ee8 100644 --- a/aeon/transformations/series/tests/test_gauss.py +++ b/aeon/transformations/series/smoothing/tests/test_gauss.py @@ -1,12 +1,12 @@ """Tests for Gauss transformation.""" -__maintainer__ = [] - import numpy as np import pytest +from aeon.transformations.series.smoothing import GaussianFilter + -@pytest.mark.parametrize("sigma", [0.1, 0.5, 1, 2, 5, 10]) +@pytest.mark.parametrize("sigma", [0.1, 1, 10]) @pytest.mark.parametrize("order", [0, 1, 2]) def test_gauss(sigma, order): """Test the functionality of Gauss transformation.""" @@ -23,25 +23,11 @@ def test_gauss(sigma, order): + 0.1 * np.sin(2 * np.pi * 8 * t) ) x12 = np.array([x1, x2]) - x12r = x12 + np.random.random((2, n_samples)) * 0.25 - from aeon.transformations.series._gauss import GaussSeriesTransformer - - sg = GaussSeriesTransformer(sigma=sigma, order=order) + sg = GaussianFilter(sigma=sigma, order=order) x_1 = sg.fit_transform(x1) x_2 = sg.fit_transform(x2) x_12 = sg.fit_transform(x12) - x_12_r = sg.fit_transform(x12r) - - """ - # Visualize smoothing - import matplotlib.pyplot as plt - plt.plot(x12r[0]) - plt.plot(x_12_r[0]) - plt.savefig(fname=f'Gauss_{sigma}_{order}.png') - plt.clf() - """ np.testing.assert_almost_equal(x_1[0], x_12[0], decimal=4) np.testing.assert_almost_equal(x_2[0], x_12[1], decimal=4) - assert x_12.shape == x_12_r.shape diff --git a/aeon/transformations/series/tests/test_moving_average.py b/aeon/transformations/series/smoothing/tests/test_moving_average.py similarity index 54% rename from aeon/transformations/series/tests/test_moving_average.py rename to aeon/transformations/series/smoothing/tests/test_moving_average.py index bfbf3a71bd..6fa1e55a74 100644 --- a/aeon/transformations/series/tests/test_moving_average.py +++ b/aeon/transformations/series/smoothing/tests/test_moving_average.py @@ -1,26 +1,27 @@ """Tests for MovingAverageTransformer.""" -__maintainer__ = ["Datadote"] - import numpy as np import pytest -from aeon.transformations.series._moving_average import MovingAverageSeriesTransformer +from aeon.transformations.series.smoothing import MovingAverage -TEST_DATA = [np.array([-3, -2, -1, 0, 1, 2, 3]), np.array([[-3, -2, -1, 0, 1, 2, 3]])] +TEST_DATA = [ + np.array([-3, -2, -1, 0, 1, 2, 3]), + np.array([[-3, -2, -1, 0, 1, 2, 3], [3, 2, 1, 0, -1, -2, -3]]), +] EXPECTED_RESULTS = [ np.array([[-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]]), - np.array([[-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]]), + np.array([[-2.5, -1.5, -0.5, 0.5, 1.5, 2.5], [2.5, 1.5, 0.5, -0.5, -1.5, -2.5]]), ] def test_window_size_greater_than_zero(): """Test window sizes > 0.""" - ma = MovingAverageSeriesTransformer(window_size=1) + ma = MovingAverage(window_size=1) xt = ma.fit_transform(TEST_DATA[0]) - np.testing.assert_array_almost_equal(xt, xt, decimal=2) + np.testing.assert_array_almost_equal(xt, TEST_DATA[0], decimal=2) - ma = MovingAverageSeriesTransformer(window_size=2) + ma = MovingAverage(window_size=2) for i in range(len(TEST_DATA)): xt = ma.fit_transform(TEST_DATA[i]) np.testing.assert_array_almost_equal(xt, EXPECTED_RESULTS[i], decimal=2) @@ -29,10 +30,10 @@ def test_window_size_greater_than_zero(): def test_window_size_equal_zero(): """Test window size == 0.""" with pytest.raises(ValueError): - MovingAverageSeriesTransformer(window_size=0) + MovingAverage(window_size=0) def test_window_size_less_than_zero(): """Test window sizes < 0.""" with pytest.raises(ValueError): - MovingAverageSeriesTransformer(window_size=-1) + MovingAverage(window_size=-1) diff --git a/aeon/transformations/series/tests/test_siv.py b/aeon/transformations/series/smoothing/tests/test_rms.py similarity index 64% rename from aeon/transformations/series/tests/test_siv.py rename to aeon/transformations/series/smoothing/tests/test_rms.py index c8042e9c5b..a99a02981e 100644 --- a/aeon/transformations/series/tests/test_siv.py +++ b/aeon/transformations/series/smoothing/tests/test_rms.py @@ -1,10 +1,10 @@ """Tests for SIV transformation.""" -__maintainer__ = [] - import numpy as np import pytest +from aeon.transformations.series.smoothing import RecursiveMedianSieve + @pytest.mark.parametrize( "window_length", [1, 2, 3, 5, 7, 10, 11, [2, 3], [3, 5], [3, 5, 7], [3, 5, 7, 11]] @@ -24,25 +24,11 @@ def test_siv(window_length): + 0.1 * np.sin(2 * np.pi * 8 * t) ) x12 = np.array([x1, x2]) - x12r = x12 + np.random.random((2, n_samples)) * 0.25 - from aeon.transformations.series._siv import SIVSeriesTransformer - - siv = SIVSeriesTransformer(window_length=window_length) + siv = RecursiveMedianSieve(window_length=window_length) x_1 = siv.fit_transform(x1) x_2 = siv.fit_transform(x2) x_12 = siv.fit_transform(x12) - x_12_r = siv.fit_transform(x12r) - - """ - # Visualize smoothing - import matplotlib.pyplot as plt - plt.plot(x12r[0]) - plt.plot(x_12_r[0]) - plt.savefig(fname=f'SIV_{window_length}.png') - plt.clf() - """ np.testing.assert_almost_equal(x_1[0], x_12[0], decimal=4) np.testing.assert_almost_equal(x_2[0], x_12[1], decimal=4) - assert x_12.shape == x_12_r.shape diff --git a/aeon/transformations/series/tests/test_sg.py b/aeon/transformations/series/smoothing/tests/test_sg.py similarity index 64% rename from aeon/transformations/series/tests/test_sg.py rename to aeon/transformations/series/smoothing/tests/test_sg.py index 7df2970086..75604fb196 100644 --- a/aeon/transformations/series/tests/test_sg.py +++ b/aeon/transformations/series/smoothing/tests/test_sg.py @@ -1,10 +1,10 @@ """Tests for SG transformation.""" -__maintainer__ = [] - import numpy as np import pytest +from aeon.transformations.series.smoothing import SavitzkyGolayFilter + @pytest.mark.parametrize("window_length", [5, 9, 17]) @pytest.mark.parametrize("polyorder", [2, 3, 4]) @@ -23,25 +23,11 @@ def test_sg(window_length, polyorder): + 0.1 * np.sin(2 * np.pi * 8 * t) ) x12 = np.array([x1, x2]) - x12r = x12 + np.random.random((2, n_samples)) * 0.25 - from aeon.transformations.series._sg import SGSeriesTransformer - - sg = SGSeriesTransformer(window_length=window_length, polyorder=polyorder) + sg = SavitzkyGolayFilter(window_length=window_length, polyorder=polyorder) x_1 = sg.fit_transform(x1) x_2 = sg.fit_transform(x2) x_12 = sg.fit_transform(x12) - x_12_r = sg.fit_transform(x12r) - - """ - # Visualize smoothing - import matplotlib.pyplot as plt - plt.plot(x12r[0]) - plt.plot(x_12_r[0]) - plt.savefig(fname=f'SG_{window_length}_{polyorder}.png') - plt.clf() - """ np.testing.assert_almost_equal(x_1[0], x_12[0], decimal=4) np.testing.assert_almost_equal(x_2[0], x_12[1], decimal=4) - assert x_12.shape == x_12_r.shape From f6122d7891c05f1a7a8c25f8703350f4535dcd9f Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Thu, 8 May 2025 10:07:49 +0100 Subject: [PATCH 13/24] Revert "smoothing refactor" This reverts commit c245ccbd0c1481b188a9094ccd7ff493613a89cd. --- aeon/transformations/series/_dft.py | 59 ++++++++++--- aeon/transformations/series/_exp_smoothing.py | 55 +++++++++--- aeon/transformations/series/_gauss.py | 43 ++++++--- .../transformations/series/_moving_average.py | 51 ++++++++--- aeon/transformations/series/_sg.py | 43 ++++++--- aeon/transformations/series/_siv.py | 53 ++++++++--- .../series/smoothing/__init__.py | 17 ---- aeon/transformations/series/smoothing/_dfa.py | 84 ------------------ .../series/smoothing/_exp_smoothing.py | 84 ------------------ .../series/smoothing/_gauss.py | 67 -------------- .../series/smoothing/_moving_average.py | 67 -------------- aeon/transformations/series/smoothing/_rms.py | 87 ------------------- aeon/transformations/series/smoothing/_sg.py | 68 --------------- .../series/smoothing/tests/__init__.py | 1 - .../series/{smoothing => }/tests/test_dft.py | 12 ++- .../tests/test_exp_smoothing.py | 20 +++-- .../{smoothing => }/tests/test_gauss.py | 22 ++++- .../tests/test_moving_average.py | 21 +++-- .../series/{smoothing => }/tests/test_sg.py | 20 ++++- .../tests/test_rms.py => tests/test_siv.py} | 20 ++++- 20 files changed, 312 insertions(+), 582 deletions(-) delete mode 100644 aeon/transformations/series/smoothing/__init__.py delete mode 100644 aeon/transformations/series/smoothing/_dfa.py delete mode 100644 aeon/transformations/series/smoothing/_exp_smoothing.py delete mode 100644 aeon/transformations/series/smoothing/_gauss.py delete mode 100644 aeon/transformations/series/smoothing/_moving_average.py delete mode 100644 aeon/transformations/series/smoothing/_rms.py delete mode 100644 aeon/transformations/series/smoothing/_sg.py delete mode 100644 aeon/transformations/series/smoothing/tests/__init__.py rename aeon/transformations/series/{smoothing => }/tests/test_dft.py (73%) rename aeon/transformations/series/{smoothing => }/tests/test_exp_smoothing.py (72%) rename aeon/transformations/series/{smoothing => }/tests/test_gauss.py (60%) rename aeon/transformations/series/{smoothing => }/tests/test_moving_average.py (54%) rename aeon/transformations/series/{smoothing => }/tests/test_sg.py (64%) rename aeon/transformations/series/{smoothing/tests/test_rms.py => tests/test_siv.py} (64%) diff --git a/aeon/transformations/series/_dft.py b/aeon/transformations/series/_dft.py index 24e0b27ab7..1f603c39af 100644 --- a/aeon/transformations/series/_dft.py +++ b/aeon/transformations/series/_dft.py @@ -4,20 +4,12 @@ __all__ = ["DFTSeriesTransformer"] -from deprecated.sphinx import deprecated +import numpy as np -from aeon.transformations.series.smoothing import DiscreteFourierApproximation +from aeon.transformations.series.base import BaseSeriesTransformer -# TODO: Remove in v1.3.0 -@deprecated( - version="1.2.0", - reason="DFTSeriesTransformer is deprecated and will be removed in v1.3.0. " - "Please use DiscreteFourierApproximation from " - "transformations.series.smoothing instead.", - category=FutureWarning, -) -class DFTSeriesTransformer(DiscreteFourierApproximation): +class DFTSeriesTransformer(BaseSeriesTransformer): """Filter a times series using Discrete Fourier Approximation (DFT). Parameters @@ -50,4 +42,47 @@ class DFTSeriesTransformer(DiscreteFourierApproximation): (2, 100) """ - ... + _tags = { + "capability:multivariate": True, + "X_inner_type": "np.ndarray", + "fit_is_empty": True, + } + + def __init__(self, r=0.5, sort=False): + self.r = r + self.sort = sort + super().__init__(axis=1) + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + Parameters + ---------- + X : np.ndarray + time series in shape (n_channels, n_timepoints) + y : ignored argument for interface compatibility + + Returns + ------- + transformed version of X + """ + # Compute DFT + dft = np.fft.fft(X) + + # Mask array of terms to keep and number of terms to keep + mask = np.zeros_like(dft, dtype=bool) + keep = max(int(self.r * dft.shape[1]), 1) + + # If sort is set, sort the indices by the decreasing dft amplitude + if self.sort: + sorted_indices = np.argsort(np.abs(dft))[:, ::-1] + for i in range(dft.shape[0]): + mask[i, sorted_indices[i, 0:keep]] = True + # Else, keep the first terms + else: + mask[:, 0:keep] = True + + # Invert DFT with masked terms + X_ = np.fft.ifft(dft * mask).real + + return X_ diff --git a/aeon/transformations/series/_exp_smoothing.py b/aeon/transformations/series/_exp_smoothing.py index 08217a9d05..5566769463 100644 --- a/aeon/transformations/series/_exp_smoothing.py +++ b/aeon/transformations/series/_exp_smoothing.py @@ -3,21 +3,14 @@ __maintainer__ = ["Datadote"] __all__ = ["ExpSmoothingSeriesTransformer"] +from typing import Union -from deprecated.sphinx import deprecated +import numpy as np -from aeon.transformations.series.smoothing import ExponentialSmoothing +from aeon.transformations.series.base import BaseSeriesTransformer -# TODO: Remove in v1.3.0 -@deprecated( - version="1.2.0", - reason="ExpSmoothingSeriesTransformer is deprecated and will be removed in v1.3.0. " - "Please use ExponentialSmoothing from " - "transformations.series.smoothing instead.", - category=FutureWarning, -) -class ExpSmoothingSeriesTransformer(ExponentialSmoothing): +class ExpSmoothingSeriesTransformer(BaseSeriesTransformer): """Filter a time series using exponential smoothing. - Exponential smoothing (EXP) is a generalisaton of moving average smoothing that @@ -61,4 +54,42 @@ class ExpSmoothingSeriesTransformer(ExponentialSmoothing): [10. 9.5 8.75 7.875]] """ - ... + _tags = { + "capability:multivariate": True, + "X_inner_type": "np.ndarray", + "fit_is_empty": True, + } + + def __init__( + self, alpha: float = 0.2, window_size: Union[int, float, None] = None + ) -> None: + if not 0 <= alpha <= 1: + raise ValueError(f"alpha must be in range [0, 1], got {alpha}") + if window_size is not None and window_size <= 0: + raise ValueError(f"window_size must be > 0, got {window_size}") + super().__init__(axis=1) + self.alpha = alpha if window_size is None else 2.0 / (window_size + 1) + self.window_size = window_size + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + private _transform containing core logic, called from transform + + Parameters + ---------- + X : np.ndarray + Data to be transformed + y : ignored argument for interface compatibility + Additional data, e.g., labels for transformation + + Returns + ------- + Xt: 2D np.ndarray + transformed version of X + """ + Xt = np.zeros_like(X, dtype="float") + Xt[:, 0] = X[:, 0] + for i in range(1, Xt.shape[1]): + Xt[:, i] = self.alpha * X[:, i] + (1 - self.alpha) * Xt[:, i - 1] + return Xt diff --git a/aeon/transformations/series/_gauss.py b/aeon/transformations/series/_gauss.py index 959cddac6e..863d8cf6b9 100644 --- a/aeon/transformations/series/_gauss.py +++ b/aeon/transformations/series/_gauss.py @@ -4,20 +4,12 @@ __all__ = ["GaussSeriesTransformer"] -from deprecated.sphinx import deprecated +from scipy.ndimage import gaussian_filter1d -from aeon.transformations.series.smoothing import GaussianFilter +from aeon.transformations.series.base import BaseSeriesTransformer -# TODO: Remove in v1.3.0 -@deprecated( - version="1.2.0", - reason="GaussSeriesTransformer is deprecated and will be removed in v1.3.0. " - "Please use GaussianFilter from " - "transformations.series.smoothing instead.", - category=FutureWarning, -) -class GaussSeriesTransformer(GaussianFilter): +class GaussSeriesTransformer(BaseSeriesTransformer): """Filter a times series using Gaussian filter. Parameters @@ -53,4 +45,31 @@ class GaussSeriesTransformer(GaussianFilter): (2, 100) """ - ... + _tags = { + "capability:multivariate": True, + "X_inner_type": "np.ndarray", + "fit_is_empty": True, + } + + def __init__(self, sigma=1, order=0): + self.sigma = sigma + self.order = order + super().__init__(axis=1) + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + Parameters + ---------- + X : np.ndarray + time series in shape (n_channels, n_timepoints) + y : ignored argument for interface compatibility + + Returns + ------- + transformed version of X + """ + # Compute Gaussian filter + X_ = gaussian_filter1d(X, self.sigma, self.axis, self.order) + + return X_ diff --git a/aeon/transformations/series/_moving_average.py b/aeon/transformations/series/_moving_average.py index 61fb15e834..7e9993946e 100644 --- a/aeon/transformations/series/_moving_average.py +++ b/aeon/transformations/series/_moving_average.py @@ -3,21 +3,12 @@ __maintainer__ = ["Datadote"] __all__ = ["MovingAverageSeriesTransformer"] +import numpy as np -from deprecated.sphinx import deprecated +from aeon.transformations.series.base import BaseSeriesTransformer -from aeon.transformations.series.smoothing import MovingAverage - -# TODO: Remove in v1.3.0 -@deprecated( - version="1.2.0", - reason="MovingAverageSeriesTransformer is deprecated and will be removed in " - "v1.3.0. Please use MovingAverage from " - "transformations.series.smoothing instead.", - category=FutureWarning, -) -class MovingAverageSeriesTransformer(MovingAverage): +class MovingAverageSeriesTransformer(BaseSeriesTransformer): """Calculate the moving average of an array of numbers. Slides a window across the input array, and returns the averages for each window. @@ -50,4 +41,38 @@ class MovingAverageSeriesTransformer(MovingAverage): [[-2.5 -1.5 -0.5 0.5 1.5 2.5]] """ - ... + _tags = { + "capability:multivariate": True, + "X_inner_type": "np.ndarray", + "fit_is_empty": True, + } + + def __init__(self, window_size: int = 5) -> None: + super().__init__(axis=0) + if window_size <= 0: + raise ValueError(f"window_size must be > 0, got {window_size}") + self.window_size = window_size + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + private _transform containing core logic, called from transform + + Parameters + ---------- + X : np.ndarray + Data to be transformed + y : ignored argument for interface compatibility + Additional data, e.g., labels for transformation + + Returns + ------- + Xt: 2D np.ndarray + transformed version of X + """ + csum = np.cumsum(X, axis=0) + csum[self.window_size :, :] = ( + csum[self.window_size :, :] - csum[: -self.window_size, :] + ) + Xt = csum[self.window_size - 1 :, :] / self.window_size + return Xt diff --git a/aeon/transformations/series/_sg.py b/aeon/transformations/series/_sg.py index 0fc1de3984..19000cc0e8 100644 --- a/aeon/transformations/series/_sg.py +++ b/aeon/transformations/series/_sg.py @@ -4,20 +4,12 @@ __all__ = ["SGSeriesTransformer"] -from deprecated.sphinx import deprecated +from scipy.signal import savgol_filter -from aeon.transformations.series.smoothing import SavitzkyGolayFilter +from aeon.transformations.series.base import BaseSeriesTransformer -# TODO: Remove in v1.3.0 -@deprecated( - version="1.2.0", - reason="SGSeriesTransformer is deprecated and will be removed in v1.3.0. " - "Please use SavitzkyGolayFilter from " - "transformations.series.smoothing instead.", - category=FutureWarning, -) -class SGSeriesTransformer(SavitzkyGolayFilter): +class SGSeriesTransformer(BaseSeriesTransformer): """Filter a times series using Savitzky-Golay (SG). Parameters @@ -53,4 +45,31 @@ class SGSeriesTransformer(SavitzkyGolayFilter): (2, 100) """ - ... + _tags = { + "capability:multivariate": True, + "X_inner_type": "np.ndarray", + "fit_is_empty": True, + } + + def __init__(self, window_length=5, polyorder=2): + self.window_length = window_length + self.polyorder = polyorder + super().__init__(axis=1) + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + Parameters + ---------- + X : np.ndarray + time series in shape (n_channels, n_timepoints) + y : ignored argument for interface compatibility + + Returns + ------- + transformed version of X + """ + # Compute SG + X_ = savgol_filter(X, self.window_length, self.polyorder) + + return X_ diff --git a/aeon/transformations/series/_siv.py b/aeon/transformations/series/_siv.py index b6eeb5e591..1bb2ad3e0a 100644 --- a/aeon/transformations/series/_siv.py +++ b/aeon/transformations/series/_siv.py @@ -4,20 +4,13 @@ __all__ = ["SIVSeriesTransformer"] -from deprecated.sphinx import deprecated +import numpy as np +from scipy.ndimage import median_filter -from aeon.transformations.series.smoothing import RecursiveMedianSieve +from aeon.transformations.series.base import BaseSeriesTransformer -# TODO: Remove in v1.3.0 -@deprecated( - version="1.2.0", - reason="SIVSeriesTransformer is deprecated and will be removed in v1.3.0. " - "Please use RecursiveMedianSieve from " - "transformations.series.smoothing instead.", - category=FutureWarning, -) -class SIVSeriesTransformer(RecursiveMedianSieve): +class SIVSeriesTransformer(BaseSeriesTransformer): """Filter a times series using Recursive Median Sieve (SIV). Parameters @@ -55,4 +48,40 @@ class SIVSeriesTransformer(RecursiveMedianSieve): (2, 100) """ - ... + _tags = { + "capability:multivariate": True, + "X_inner_type": "np.ndarray", + "fit_is_empty": True, + } + + def __init__(self, window_length=None): + self.window_length = window_length + super().__init__(axis=1) + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + Parameters + ---------- + X : np.ndarray + time series in shape (n_channels, n_timepoints) + y : ignored argument for interface compatibility + + Returns + ------- + transformed version of X + """ + window_length = self.window_length + if window_length is None: + window_length = [3, 5, 7] + if not isinstance(window_length, list): + window_length = [window_length] + + # Compute SIV + X_ = X + + for w in window_length: + footprint = np.ones((1, w)) + X_ = median_filter(X_, footprint=footprint) + + return X_ diff --git a/aeon/transformations/series/smoothing/__init__.py b/aeon/transformations/series/smoothing/__init__.py deleted file mode 100644 index 52ebcc3c8e..0000000000 --- a/aeon/transformations/series/smoothing/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -"""Series smoothing transformers.""" - -__all__ = [ - "DiscreteFourierApproximation", - "ExponentialSmoothing", - "GaussianFilter", - "MovingAverage", - "SavitzkyGolayFilter", - "RecursiveMedianSieve", -] - -from aeon.transformations.series.smoothing._dfa import DiscreteFourierApproximation -from aeon.transformations.series.smoothing._exp_smoothing import ExponentialSmoothing -from aeon.transformations.series.smoothing._gauss import GaussianFilter -from aeon.transformations.series.smoothing._moving_average import MovingAverage -from aeon.transformations.series.smoothing._rms import RecursiveMedianSieve -from aeon.transformations.series.smoothing._sg import SavitzkyGolayFilter diff --git a/aeon/transformations/series/smoothing/_dfa.py b/aeon/transformations/series/smoothing/_dfa.py deleted file mode 100644 index a5e017ac66..0000000000 --- a/aeon/transformations/series/smoothing/_dfa.py +++ /dev/null @@ -1,84 +0,0 @@ -"""Discrete Fourier Approximation filter transformation for smoothing.""" - -__maintainer__ = ["Cyril-Meyer"] -__all__ = ["DiscreteFourierApproximation"] - - -import numpy as np - -from aeon.transformations.series.base import BaseSeriesTransformer - - -class DiscreteFourierApproximation(BaseSeriesTransformer): - """Filter a times series using a Discrete Fourier Approximation. - - Smooths the series by first transforming into the frequency domain, discarding - the high frequency terms, then transforming back to the time domain. - - Parameters - ---------- - r : float, default=0.5 - Proportion of Fourier terms to retain [0, 1] - sort : bool, default=False - Sort the Fourier terms by amplitude to keep most important terms - - References - ---------- - .. [1] Cooley, J., Lewis, P., Welch, P.: The fast fourier transform and its - applications. IEEE Trans. Educ. 12(1), 27–34 (1969) - - Examples - -------- - >>> import numpy as np - >>> from aeon.transformations.series.smoothing import DiscreteFourierApproximation - >>> X = np.random.random((2, 100)) # Random series length 100 - >>> dft = DiscreteFourierApproximation() - >>> X_ = dft.fit_transform(X) - >>> X_.shape - (2, 100) - """ - - _tags = { - "capability:multivariate": True, - "X_inner_type": "np.ndarray", - "fit_is_empty": True, - } - - def __init__(self, r=0.5, sort=False): - self.r = r - self.sort = sort - super().__init__(axis=1) - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - Parameters - ---------- - X : np.ndarray - time series in shape (n_channels, n_timepoints) - y : ignored argument for interface compatibility - - Returns - ------- - transformed version of X - """ - # Compute DFT - dft = np.fft.fft(X) - - # Mask array of terms to keep and number of terms to keep - mask = np.zeros_like(dft, dtype=bool) - keep = max(int(self.r * dft.shape[1]), 1) - - # If sort is set, sort the indices by the decreasing dft amplitude - if self.sort: - sorted_indices = np.argsort(np.abs(dft))[:, ::-1] - for i in range(dft.shape[0]): - mask[i, sorted_indices[i, 0:keep]] = True - # Else, keep the first terms - else: - mask[:, 0:keep] = True - - # Invert DFT with masked terms - X_ = np.fft.ifft(dft * mask).real - - return X_ diff --git a/aeon/transformations/series/smoothing/_exp_smoothing.py b/aeon/transformations/series/smoothing/_exp_smoothing.py deleted file mode 100644 index cd70138fa8..0000000000 --- a/aeon/transformations/series/smoothing/_exp_smoothing.py +++ /dev/null @@ -1,84 +0,0 @@ -"""Exponential smoothing transformation.""" - -__maintainer__ = ["Datadote"] -__all__ = ["ExponentialSmoothing"] - -from typing import Union - -import numpy as np - -from aeon.transformations.series.base import BaseSeriesTransformer - - -class ExponentialSmoothing(BaseSeriesTransformer): - """Filter a time series using exponential smoothing. - - - Exponential smoothing (EXP) is a generalisaton of moving average smoothing that - assigns a decaying weight to each element rather than averaging over a window. - - Assume time series T = [t_0, ..., t_j], and smoothed values S = [s_0, ..., s_j] - - Then, s_0 = t_0 and s_j = alpha * t_j + (1 - alpha) * s_j-1 - where 0 ≤ alpha ≤ 1. If window_size is given, alpha is overwritten, and set as - alpha = 2. / (window_size + 1) - - Parameters - ---------- - alpha: float, default=0.2 - decaying weight. Range [0, 1]. Overwritten by window_size if window_size exists - window_size: int or float or None, default=None - If window_size is specified, alpha is set to 2. / (window_size + 1) - - Examples - -------- - >>> import numpy as np - >>> from aeon.transformations.series.smoothing import ExponentialSmoothing - >>> X = np.array([-2, -1, 0, 1, 2]) - >>> transformer = ExponentialSmoothing(0.5) - >>> transformer.fit_transform(X) - [[-2. -1.5 -0.75 0.125 1.0625]] - >>> X = np.array([[1, 2, 3, 4], [10, 9, 8, 7]]) - >>> transformer.fit_transform(X) - [[ 1. 1.5 2.25 3.125] - [10. 9.5 8.75 7.875]] - """ - - _tags = { - "capability:multivariate": True, - "X_inner_type": "np.ndarray", - "fit_is_empty": True, - } - - def __init__( - self, alpha: float = 0.2, window_size: Union[int, float, None] = None - ) -> None: - self.alpha = alpha if window_size is None else 2.0 / (window_size + 1) - self.window_size = window_size - - super().__init__(axis=1) - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - private _transform containing core logic, called from transform - - Parameters - ---------- - X : np.ndarray - Data to be transformed - y : ignored argument for interface compatibility - Additional data, e.g., labels for transformation - - Returns - ------- - Xt: 2D np.ndarray - transformed version of X - """ - if not 0 <= self.alpha <= 1: - raise ValueError(f"alpha must be in range [0, 1], got {self.alpha}") - if self.window_size is not None and self.window_size <= 0: - raise ValueError(f"window_size must be > 0, got {self.window_size}") - - Xt = np.zeros_like(X, dtype="float") - Xt[:, 0] = X[:, 0] - for i in range(1, Xt.shape[1]): - Xt[:, i] = self.alpha * X[:, i] + (1 - self.alpha) * Xt[:, i - 1] - return Xt diff --git a/aeon/transformations/series/smoothing/_gauss.py b/aeon/transformations/series/smoothing/_gauss.py deleted file mode 100644 index 7946f8d634..0000000000 --- a/aeon/transformations/series/smoothing/_gauss.py +++ /dev/null @@ -1,67 +0,0 @@ -"""Gaussian filter transformation.""" - -__maintainer__ = ["Cyril-Meyer"] -__all__ = ["GaussianFilter"] - - -from scipy.ndimage import gaussian_filter1d - -from aeon.transformations.series.base import BaseSeriesTransformer - - -class GaussianFilter(BaseSeriesTransformer): - """Filter a time series using Gaussian filter. - - Wrapper for the SciPy ``gaussian_filter1d`` function. - - Parameters - ---------- - sigma : float, default=1 - Standard deviation for the Gaussian kernel. - order : int, default=0 - An order of 0 corresponds to convolution with a Gaussian kernel. - A positive order corresponds to convolution with that derivative of a - Gaussian. - - References - ---------- - .. [1] Chou, Y. L. "Statistical Analysis, Section 17.9." New York: Holt - International (1975). - - Examples - -------- - >>> import numpy as np - >>> from aeon.transformations.series.smoothing import GaussianFilter - >>> X = np.random.random((2, 100)) # Random series length 100 - >>> gauss = GaussianFilter(sigma=5) - >>> X_ = gauss.fit_transform(X) - >>> X_.shape - (2, 100) - """ - - _tags = { - "capability:multivariate": True, - "X_inner_type": "np.ndarray", - "fit_is_empty": True, - } - - def __init__(self, sigma=1, order=0): - self.sigma = sigma - self.order = order - - super().__init__(axis=1) - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - Parameters - ---------- - X : np.ndarray - time series in shape (n_channels, n_timepoints) - y : ignored argument for interface compatibility - - Returns - ------- - transformed version of X - """ - return gaussian_filter1d(X, self.sigma, axis=self.axis, order=self.order) diff --git a/aeon/transformations/series/smoothing/_moving_average.py b/aeon/transformations/series/smoothing/_moving_average.py deleted file mode 100644 index c4b806e6c7..0000000000 --- a/aeon/transformations/series/smoothing/_moving_average.py +++ /dev/null @@ -1,67 +0,0 @@ -"""Moving average transformation.""" - -__maintainer__ = ["Datadote"] -__all__ = ["MovingAverage"] - -import numpy as np - -from aeon.transformations.series.base import BaseSeriesTransformer - - -class MovingAverage(BaseSeriesTransformer): - """Calculate the moving average for a time series. - - Slides a window across the input array, and returns the averages for each window. - - Parameters - ---------- - window_size: int, default=5 - Number of values to average for each window. - - Examples - -------- - >>> import numpy as np - >>> from aeon.transformations.series.smoothing import MovingAverage - >>> X = np.array([-3, -2, -1, 0, 1, 2, 3]) - >>> transformer = MovingAverage(2) - >>> transformer.fit_transform(X) - [[-2.5 -1.5 -0.5 0.5 1.5 2.5]] - """ - - _tags = { - "capability:multivariate": True, - "X_inner_type": "np.ndarray", - "fit_is_empty": True, - } - - def __init__(self, window_size: int = 5) -> None: - self.window_size = window_size - - super().__init__(axis=0) - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - private _transform containing core logic, called from transform - - Parameters - ---------- - X : np.ndarray - Data to be transformed - y : ignored argument for interface compatibility - Additional data, e.g., labels for transformation - - Returns - ------- - Xt: 2D np.ndarray - transformed version of X - """ - if self.window_size <= 0: - raise ValueError(f"window_size must be > 0, got {self.window_size}") - - csum = np.cumsum(X, axis=0) - csum[self.window_size :, :] = ( - csum[self.window_size :, :] - csum[: -self.window_size, :] - ) - Xt = csum[self.window_size - 1 :, :] / self.window_size - return Xt diff --git a/aeon/transformations/series/smoothing/_rms.py b/aeon/transformations/series/smoothing/_rms.py deleted file mode 100644 index 9b9b45becb..0000000000 --- a/aeon/transformations/series/smoothing/_rms.py +++ /dev/null @@ -1,87 +0,0 @@ -"""Recursive Median Sieve filter transformation.""" - -__maintainer__ = ["Cyril-Meyer"] -__all__ = ["RecursiveMedianSieve"] - - -import numpy as np -from scipy.ndimage import median_filter - -from aeon.transformations.series.base import BaseSeriesTransformer - - -class RecursiveMedianSieve(BaseSeriesTransformer): - """Filter a times series using a Recursive Median Sieve. - - Parameters - ---------- - window_length : list of int or int, default=None - The filter windows lengths (recommended increasing value). - If None, defaults to [3, 5, 7]. - - Notes - ----- - Use scipy.ndimage.median_filter instead of scipy.signal.medfilt : - The more general function scipy.ndimage.median_filter has a more efficient - implementation of a median filter and therefore runs much faster. - https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.medfilt.html - - References - ---------- - .. [1] Bangham J. A. (1988). - Data-sieving hydrophobicity plots. - Analytical biochemistry, 174(1), 142–145. - https://doi.org/10.1016/0003-2697(88)90528-3 - .. [2] Yli-Harja, O., Koivisto, P., Bangham, J. A., Cawley, G., - Harvey, R., & Shmulevich, I. (2001). - Simplified implementation of the recursive median sieve. - Signal Process., 81(7), 1565–1570. - https://doi.org/10.1016/S0165-1684(01)00054-8 - - Examples - -------- - >>> import numpy as np - >>> from aeon.transformations.series.smoothing import RecursiveMedianSieve - >>> X = np.random.random((2, 100)) # Random series length 100 - >>> siv = RecursiveMedianSieve() - >>> X_ = siv.fit_transform(X) - >>> X_.shape - (2, 100) - """ - - _tags = { - "capability:multivariate": True, - "X_inner_type": "np.ndarray", - "fit_is_empty": True, - } - - def __init__(self, window_length=None): - self.window_length = window_length - - super().__init__(axis=1) - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - Parameters - ---------- - X : np.ndarray - time series in shape (n_channels, n_timepoints) - y : ignored argument for interface compatibility - - Returns - ------- - transformed version of X - """ - window_length = self.window_length - if window_length is None: - window_length = [3, 5, 7] - if not isinstance(window_length, list): - window_length = [window_length] - - X_ = X - for w in window_length: - footprint = np.ones((1, w)) - X_ = median_filter(X_, footprint=footprint) - - return X_ diff --git a/aeon/transformations/series/smoothing/_sg.py b/aeon/transformations/series/smoothing/_sg.py deleted file mode 100644 index 750f5c5701..0000000000 --- a/aeon/transformations/series/smoothing/_sg.py +++ /dev/null @@ -1,68 +0,0 @@ -"""Savitzky-Golay filter transformation.""" - -__maintainer__ = ["Cyril-Meyer"] -__all__ = ["SavitzkyGolayFilter"] - - -from scipy.signal import savgol_filter - -from aeon.transformations.series.base import BaseSeriesTransformer - - -class SavitzkyGolayFilter(BaseSeriesTransformer): - """Filter a times series using Savitzky-Golay (SG). - - Wrapper for the SciPy ``savgol_filter`` function. - - Parameters - ---------- - window_length : int, default=5 - The length of the filter window (i.e., the number of coefficients). - window_length must be less than or equal to the size of the input. - polyorder : int, default=2 - The order of the polynomial used to fit the samples. - polyorder must be less than window_length. - - References - ---------- - .. [1] Savitzky, A., & Golay, M. J. (1964). - Smoothing and differentiation of data by simplified least squares procedures. - Analytical chemistry, 36(8), 1627-1639. - - Examples - -------- - >>> import numpy as np - >>> from aeon.transformations.series.smoothing import SavitzkyGolayFilter - >>> X = np.random.random((2, 100)) # Random series length 100 - >>> sg = SavitzkyGolayFilter() - >>> X_ = sg.fit_transform(X) - >>> X_.shape - (2, 100) - """ - - _tags = { - "capability:multivariate": True, - "X_inner_type": "np.ndarray", - "fit_is_empty": True, - } - - def __init__(self, window_length=5, polyorder=2): - self.window_length = window_length - self.polyorder = polyorder - - super().__init__(axis=1) - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - Parameters - ---------- - X : np.ndarray - time series in shape (n_channels, n_timepoints) - y : ignored argument for interface compatibility - - Returns - ------- - transformed version of X - """ - return savgol_filter(X, self.window_length, self.polyorder) diff --git a/aeon/transformations/series/smoothing/tests/__init__.py b/aeon/transformations/series/smoothing/tests/__init__.py deleted file mode 100644 index 5882e7a73b..0000000000 --- a/aeon/transformations/series/smoothing/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Tests for series smoothing transformations.""" diff --git a/aeon/transformations/series/smoothing/tests/test_dft.py b/aeon/transformations/series/tests/test_dft.py similarity index 73% rename from aeon/transformations/series/smoothing/tests/test_dft.py rename to aeon/transformations/series/tests/test_dft.py index 62cd7147a4..6ffe04b78c 100644 --- a/aeon/transformations/series/smoothing/tests/test_dft.py +++ b/aeon/transformations/series/tests/test_dft.py @@ -1,12 +1,12 @@ """Tests for DFT transformation.""" +__maintainer__ = [] + import numpy as np import pytest -from aeon.transformations.series.smoothing._dfa import DiscreteFourierApproximation - -@pytest.mark.parametrize("r", [0.00, 0.50, 1.00]) +@pytest.mark.parametrize("r", [0.00, 0.25, 0.50, 0.75, 1.00]) @pytest.mark.parametrize("sort", [True, False]) def test_dft(r, sort): """Test the functionality of DFT transformation.""" @@ -23,11 +23,15 @@ def test_dft(r, sort): + 0.1 * np.sin(2 * np.pi * 8 * t) ) x12 = np.array([x1, x2]) + x12r = x12 + np.random.random((2, n_samples)) * 0.25 + + from aeon.transformations.series._dft import DFTSeriesTransformer - dft = DiscreteFourierApproximation(r=r, sort=sort) + dft = DFTSeriesTransformer(r=r, sort=sort) x_1 = dft.fit_transform(x1) x_2 = dft.fit_transform(x2) x_12 = dft.fit_transform(x12) + dft.fit_transform(x12r) np.testing.assert_almost_equal(x_1[0], x_12[0], decimal=4) np.testing.assert_almost_equal(x_2[0], x_12[1], decimal=4) diff --git a/aeon/transformations/series/smoothing/tests/test_exp_smoothing.py b/aeon/transformations/series/tests/test_exp_smoothing.py similarity index 72% rename from aeon/transformations/series/smoothing/tests/test_exp_smoothing.py rename to aeon/transformations/series/tests/test_exp_smoothing.py index c441104e8c..cb83a4e7da 100644 --- a/aeon/transformations/series/smoothing/tests/test_exp_smoothing.py +++ b/aeon/transformations/series/tests/test_exp_smoothing.py @@ -1,9 +1,11 @@ -"""Tests for ExponentialSmoothing.""" +"""Tests for ExpSmoothingSeriesTransformer.""" + +__maintainer__ = ["Datadote"] import numpy as np import pytest -from aeon.transformations.series.smoothing import ExponentialSmoothing +from aeon.transformations.series._exp_smoothing import ExpSmoothingSeriesTransformer TEST_DATA = [np.array([-2, -1, 0, 1, 2]), np.array([[1, 2, 3, 4], [10, 9, 8, 7]])] EXPECTED_RESULTS = [ @@ -14,7 +16,7 @@ def test_input_1d_array(): """Test inputs of dimension 1.""" - transformer = ExponentialSmoothing(0.5) + transformer = ExpSmoothingSeriesTransformer(0.5) idx_data = 0 Xt = transformer.fit_transform(TEST_DATA[idx_data]) np.testing.assert_almost_equal(Xt, EXPECTED_RESULTS[idx_data], decimal=5) @@ -22,7 +24,7 @@ def test_input_1d_array(): def test_input_2d_array(): """Test inputs of dimension 2.""" - transformer = ExponentialSmoothing(0.5) + transformer = ExpSmoothingSeriesTransformer(0.5) idx_data = 1 Xt = transformer.fit_transform(TEST_DATA[idx_data]) np.testing.assert_almost_equal(Xt, EXPECTED_RESULTS[idx_data], decimal=5) @@ -32,8 +34,8 @@ def test_input_2d_array(): def test_window_size_matches_alpha(alpha_window): """Check same output results using equivalent alpha and window_size.""" alpha, window_size = alpha_window - transformer1 = ExponentialSmoothing(alpha=alpha) - transformer2 = ExponentialSmoothing(window_size=window_size) + transformer1 = ExpSmoothingSeriesTransformer(alpha=alpha) + transformer2 = ExpSmoothingSeriesTransformer(window_size=window_size) for i in range(len(TEST_DATA)): Xt1 = transformer1.fit_transform(TEST_DATA[i]) Xt2 = transformer2.fit_transform(TEST_DATA[i]) @@ -43,16 +45,16 @@ def test_window_size_matches_alpha(alpha_window): def test_alpha_less_than_zero(): """Test alpha less than zero.""" with pytest.raises(ValueError): - ExponentialSmoothing(-0.5) + ExpSmoothingSeriesTransformer(-0.5) def test_alpha_greater_than_one(): """Test alpha greater than one.""" with pytest.raises(ValueError): - ExponentialSmoothing(2.0) + ExpSmoothingSeriesTransformer(2.0) def test_window_size_than_one(): """Test window_size < 0.""" with pytest.raises(ValueError): - ExponentialSmoothing(window_size=0) + ExpSmoothingSeriesTransformer(window_size=0) diff --git a/aeon/transformations/series/smoothing/tests/test_gauss.py b/aeon/transformations/series/tests/test_gauss.py similarity index 60% rename from aeon/transformations/series/smoothing/tests/test_gauss.py rename to aeon/transformations/series/tests/test_gauss.py index 52a8ae3ee8..6ab65ac107 100644 --- a/aeon/transformations/series/smoothing/tests/test_gauss.py +++ b/aeon/transformations/series/tests/test_gauss.py @@ -1,12 +1,12 @@ """Tests for Gauss transformation.""" +__maintainer__ = [] + import numpy as np import pytest -from aeon.transformations.series.smoothing import GaussianFilter - -@pytest.mark.parametrize("sigma", [0.1, 1, 10]) +@pytest.mark.parametrize("sigma", [0.1, 0.5, 1, 2, 5, 10]) @pytest.mark.parametrize("order", [0, 1, 2]) def test_gauss(sigma, order): """Test the functionality of Gauss transformation.""" @@ -23,11 +23,25 @@ def test_gauss(sigma, order): + 0.1 * np.sin(2 * np.pi * 8 * t) ) x12 = np.array([x1, x2]) + x12r = x12 + np.random.random((2, n_samples)) * 0.25 - sg = GaussianFilter(sigma=sigma, order=order) + from aeon.transformations.series._gauss import GaussSeriesTransformer + + sg = GaussSeriesTransformer(sigma=sigma, order=order) x_1 = sg.fit_transform(x1) x_2 = sg.fit_transform(x2) x_12 = sg.fit_transform(x12) + x_12_r = sg.fit_transform(x12r) + + """ + # Visualize smoothing + import matplotlib.pyplot as plt + plt.plot(x12r[0]) + plt.plot(x_12_r[0]) + plt.savefig(fname=f'Gauss_{sigma}_{order}.png') + plt.clf() + """ np.testing.assert_almost_equal(x_1[0], x_12[0], decimal=4) np.testing.assert_almost_equal(x_2[0], x_12[1], decimal=4) + assert x_12.shape == x_12_r.shape diff --git a/aeon/transformations/series/smoothing/tests/test_moving_average.py b/aeon/transformations/series/tests/test_moving_average.py similarity index 54% rename from aeon/transformations/series/smoothing/tests/test_moving_average.py rename to aeon/transformations/series/tests/test_moving_average.py index 6fa1e55a74..bfbf3a71bd 100644 --- a/aeon/transformations/series/smoothing/tests/test_moving_average.py +++ b/aeon/transformations/series/tests/test_moving_average.py @@ -1,27 +1,26 @@ """Tests for MovingAverageTransformer.""" +__maintainer__ = ["Datadote"] + import numpy as np import pytest -from aeon.transformations.series.smoothing import MovingAverage +from aeon.transformations.series._moving_average import MovingAverageSeriesTransformer -TEST_DATA = [ - np.array([-3, -2, -1, 0, 1, 2, 3]), - np.array([[-3, -2, -1, 0, 1, 2, 3], [3, 2, 1, 0, -1, -2, -3]]), -] +TEST_DATA = [np.array([-3, -2, -1, 0, 1, 2, 3]), np.array([[-3, -2, -1, 0, 1, 2, 3]])] EXPECTED_RESULTS = [ np.array([[-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]]), - np.array([[-2.5, -1.5, -0.5, 0.5, 1.5, 2.5], [2.5, 1.5, 0.5, -0.5, -1.5, -2.5]]), + np.array([[-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]]), ] def test_window_size_greater_than_zero(): """Test window sizes > 0.""" - ma = MovingAverage(window_size=1) + ma = MovingAverageSeriesTransformer(window_size=1) xt = ma.fit_transform(TEST_DATA[0]) - np.testing.assert_array_almost_equal(xt, TEST_DATA[0], decimal=2) + np.testing.assert_array_almost_equal(xt, xt, decimal=2) - ma = MovingAverage(window_size=2) + ma = MovingAverageSeriesTransformer(window_size=2) for i in range(len(TEST_DATA)): xt = ma.fit_transform(TEST_DATA[i]) np.testing.assert_array_almost_equal(xt, EXPECTED_RESULTS[i], decimal=2) @@ -30,10 +29,10 @@ def test_window_size_greater_than_zero(): def test_window_size_equal_zero(): """Test window size == 0.""" with pytest.raises(ValueError): - MovingAverage(window_size=0) + MovingAverageSeriesTransformer(window_size=0) def test_window_size_less_than_zero(): """Test window sizes < 0.""" with pytest.raises(ValueError): - MovingAverage(window_size=-1) + MovingAverageSeriesTransformer(window_size=-1) diff --git a/aeon/transformations/series/smoothing/tests/test_sg.py b/aeon/transformations/series/tests/test_sg.py similarity index 64% rename from aeon/transformations/series/smoothing/tests/test_sg.py rename to aeon/transformations/series/tests/test_sg.py index 75604fb196..7df2970086 100644 --- a/aeon/transformations/series/smoothing/tests/test_sg.py +++ b/aeon/transformations/series/tests/test_sg.py @@ -1,10 +1,10 @@ """Tests for SG transformation.""" +__maintainer__ = [] + import numpy as np import pytest -from aeon.transformations.series.smoothing import SavitzkyGolayFilter - @pytest.mark.parametrize("window_length", [5, 9, 17]) @pytest.mark.parametrize("polyorder", [2, 3, 4]) @@ -23,11 +23,25 @@ def test_sg(window_length, polyorder): + 0.1 * np.sin(2 * np.pi * 8 * t) ) x12 = np.array([x1, x2]) + x12r = x12 + np.random.random((2, n_samples)) * 0.25 - sg = SavitzkyGolayFilter(window_length=window_length, polyorder=polyorder) + from aeon.transformations.series._sg import SGSeriesTransformer + + sg = SGSeriesTransformer(window_length=window_length, polyorder=polyorder) x_1 = sg.fit_transform(x1) x_2 = sg.fit_transform(x2) x_12 = sg.fit_transform(x12) + x_12_r = sg.fit_transform(x12r) + + """ + # Visualize smoothing + import matplotlib.pyplot as plt + plt.plot(x12r[0]) + plt.plot(x_12_r[0]) + plt.savefig(fname=f'SG_{window_length}_{polyorder}.png') + plt.clf() + """ np.testing.assert_almost_equal(x_1[0], x_12[0], decimal=4) np.testing.assert_almost_equal(x_2[0], x_12[1], decimal=4) + assert x_12.shape == x_12_r.shape diff --git a/aeon/transformations/series/smoothing/tests/test_rms.py b/aeon/transformations/series/tests/test_siv.py similarity index 64% rename from aeon/transformations/series/smoothing/tests/test_rms.py rename to aeon/transformations/series/tests/test_siv.py index a99a02981e..c8042e9c5b 100644 --- a/aeon/transformations/series/smoothing/tests/test_rms.py +++ b/aeon/transformations/series/tests/test_siv.py @@ -1,10 +1,10 @@ """Tests for SIV transformation.""" +__maintainer__ = [] + import numpy as np import pytest -from aeon.transformations.series.smoothing import RecursiveMedianSieve - @pytest.mark.parametrize( "window_length", [1, 2, 3, 5, 7, 10, 11, [2, 3], [3, 5], [3, 5, 7], [3, 5, 7, 11]] @@ -24,11 +24,25 @@ def test_siv(window_length): + 0.1 * np.sin(2 * np.pi * 8 * t) ) x12 = np.array([x1, x2]) + x12r = x12 + np.random.random((2, n_samples)) * 0.25 - siv = RecursiveMedianSieve(window_length=window_length) + from aeon.transformations.series._siv import SIVSeriesTransformer + + siv = SIVSeriesTransformer(window_length=window_length) x_1 = siv.fit_transform(x1) x_2 = siv.fit_transform(x2) x_12 = siv.fit_transform(x12) + x_12_r = siv.fit_transform(x12r) + + """ + # Visualize smoothing + import matplotlib.pyplot as plt + plt.plot(x12r[0]) + plt.plot(x_12_r[0]) + plt.savefig(fname=f'SIV_{window_length}.png') + plt.clf() + """ np.testing.assert_almost_equal(x_1[0], x_12[0], decimal=4) np.testing.assert_almost_equal(x_2[0], x_12[1], decimal=4) + assert x_12.shape == x_12_r.shape From d777a1278f6490392ff3bc5c0908cb85ba89b565 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Thu, 8 May 2025 17:35:55 +0100 Subject: [PATCH 14/24] fixes --- aeon/anomaly_detection/base.py | 8 +-- aeon/anomaly_detection/collection/base.py | 60 ++++++++++++++++++- aeon/anomaly_detection/series/base.py | 3 +- .../series/distance_based/_kmeans.py | 2 +- .../series/distance_based/_merlin.py | 2 +- .../series/distribution_based/_dwt_mlead.py | 2 +- .../series/outlier_detection/_stray.py | 2 +- .../series/tests/test_pyod_adapter.py | 2 +- .../_yield_estimator_checks.py | 2 +- aeon/utils/base/_identifier.py | 8 +-- aeon/utils/base/_register.py | 8 +-- docs/api_reference/anomaly_detection.rst | 2 +- 12 files changed, 78 insertions(+), 23 deletions(-) diff --git a/aeon/anomaly_detection/base.py b/aeon/anomaly_detection/base.py index 60d35f0e2e..a94e93a6f1 100644 --- a/aeon/anomaly_detection/base.py +++ b/aeon/anomaly_detection/base.py @@ -17,11 +17,11 @@ class BaseAnomalyDetector(BaseAeonEstimator): # todo } - def __init__(self, axis): + def __init__(self): super().__init__() @abstractmethod - def fit(self, X, y=None, axis=1): + def fit(self, X, y=None): """Fit time series anomaly detector to X. If the tag ``fit_is_empty`` is true, this just sets the ``is_fitted`` tag to @@ -54,7 +54,7 @@ def fit(self, X, y=None, axis=1): ... @abstractmethod - def predict(self, X, axis=1) -> np.ndarray: + def predict(self, X) -> np.ndarray: """Find anomalies in X. Parameters @@ -79,7 +79,7 @@ def predict(self, X, axis=1) -> np.ndarray: ... @abstractmethod - def fit_predict(self, X, y=None, axis=1) -> np.ndarray: + def fit_predict(self, X, y=None) -> np.ndarray: """Fit time series anomaly detector and find anomalies for X. Parameters diff --git a/aeon/anomaly_detection/collection/base.py b/aeon/anomaly_detection/collection/base.py index 9194ae557b..37b903b463 100644 --- a/aeon/anomaly_detection/collection/base.py +++ b/aeon/anomaly_detection/collection/base.py @@ -32,10 +32,11 @@ class name: BaseCollectionAnomalyDetector import numpy as np import pandas as pd +from aeon.anomaly_detection.base import BaseAnomalyDetector from aeon.base import BaseCollectionEstimator -class BaseCollectionAnomalyDetector(BaseCollectionEstimator): +class BaseCollectionAnomalyDetector(BaseCollectionEstimator, BaseAnomalyDetector): """ Abstract base class for collection anomaly detectors. @@ -162,12 +163,65 @@ def predict(self, X): return self._predict(X) - @abstractmethod - def _fit(self, X, y=None): ... + @final + def fit_predict(self, X, y=None, axis=1) -> np.ndarray: + """Fit time series anomaly detector and find anomalies for X. + + Parameters + ---------- + X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES + The time series to fit the model to. + A valid aeon time series data structure. See + aeon.base._base_series.VALID_INPUT_TYPES for aeon supported types. + y : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES, default=None + The target values for the time series. + A valid aeon time series data structure. See + aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. + axis : int, default=1 + The time point axis of the input series if it is 2D. If ``axis==0``, it is + assumed each column is a time series and each row is a time point. i.e. the + shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates + the time series are in rows, i.e. the shape of the data is + ``(n_channels, n_timepoints)``. + + Returns + ------- + np.ndarray + A boolean, int or float array of length len(X), where each element indicates + whether the corresponding subsequence is anomalous or its anomaly score. + """ + if self.get_tag("requires_y"): + if y is None: + raise ValueError("Tag requires_y is true, but fit called with y=None") + + # reset estimator at the start of fit + self.reset() + + X = self._preprocess_series(X, axis, True) + + if self.get_tag("fit_is_empty"): + self.is_fitted = True + return self._predict(X) + + if y is not None: + y = self._check_y(y) + + pred = self._fit_predict(X, y) + + # this should happen last + self.is_fitted = True + return pred + + def _fit(self, X, y): + return self @abstractmethod def _predict(self, X): ... + def _fit_predict(self, X, y): + self._fit(X, y) + return self._predict(X) + def _check_y(self, y, n_cases): """Check y input is valid. diff --git a/aeon/anomaly_detection/series/base.py b/aeon/anomaly_detection/series/base.py index 57863d9f3d..4c7a97ba2e 100644 --- a/aeon/anomaly_detection/series/base.py +++ b/aeon/anomaly_detection/series/base.py @@ -9,11 +9,12 @@ import numpy as np import pandas as pd +from aeon.anomaly_detection.base import BaseAnomalyDetector from aeon.base import BaseSeriesEstimator from aeon.base._base_series import VALID_SERIES_INPUT_TYPES -class BaseSeriesAnomalyDetector(BaseSeriesEstimator): +class BaseSeriesAnomalyDetector(BaseSeriesEstimator, BaseAnomalyDetector): """Base class for series anomaly detection algorithms. Anomaly detection algorithms are used to identify anomalous subsequences in time diff --git a/aeon/anomaly_detection/series/distance_based/_kmeans.py b/aeon/anomaly_detection/series/distance_based/_kmeans.py index aea82ee21a..1cadc5cec4 100644 --- a/aeon/anomaly_detection/series/distance_based/_kmeans.py +++ b/aeon/anomaly_detection/series/distance_based/_kmeans.py @@ -65,7 +65,7 @@ class KMeansAD(BaseSeriesAnomalyDetector): Examples -------- >>> import numpy as np - >>> from aeon.anomaly_detection.distance_based import KMeansAD + >>> from aeon.anomaly_detection.series.distance_based import KMeansAD >>> X = np.array([1, 2, 3, 4, 1, 2, 3, 3, 2, 8, 9, 8, 1, 2, 3, 4], dtype=np.float64) >>> detector = KMeansAD(n_clusters=3, window_size=4, stride=1, random_state=0) >>> detector.fit_predict(X) diff --git a/aeon/anomaly_detection/series/distance_based/_merlin.py b/aeon/anomaly_detection/series/distance_based/_merlin.py index be0d2a9ead..470b60add0 100644 --- a/aeon/anomaly_detection/series/distance_based/_merlin.py +++ b/aeon/anomaly_detection/series/distance_based/_merlin.py @@ -43,7 +43,7 @@ class MERLIN(BaseSeriesAnomalyDetector): Examples -------- >>> import numpy as np - >>> from aeon.anomaly_detection.distance_based import MERLIN + >>> from aeon.anomaly_detection.series.distance_based import MERLIN >>> X = np.array([1, 2, 3, 4, 1, 2, 3, 4, 2, 3, 4, 5, 1, 2, 3, 4]) >>> detector = MERLIN(min_length=4, max_length=5) >>> detector.fit_predict(X) diff --git a/aeon/anomaly_detection/series/distribution_based/_dwt_mlead.py b/aeon/anomaly_detection/series/distribution_based/_dwt_mlead.py index 2154abee22..9c97e8d129 100644 --- a/aeon/anomaly_detection/series/distribution_based/_dwt_mlead.py +++ b/aeon/anomaly_detection/series/distribution_based/_dwt_mlead.py @@ -78,7 +78,7 @@ class DWT_MLEAD(BaseSeriesAnomalyDetector): Examples -------- >>> import numpy as np - >>> from aeon.anomaly_detection.distribution_based import DWT_MLEAD + >>> from aeon.anomaly_detection.series.distribution_based import DWT_MLEAD >>> X = np.array([1, 2, 3, 4, 1, 2, 3, 3, 2, 8, 9, 8, 1, 2, 3, 4], dtype=np.float64) >>> detector = DWT_MLEAD( ... start_level=1, quantile_boundary_type='percentile', quantile_epsilon=0.01 diff --git a/aeon/anomaly_detection/series/outlier_detection/_stray.py b/aeon/anomaly_detection/series/outlier_detection/_stray.py index 3d78be8643..bfda74cede 100644 --- a/aeon/anomaly_detection/series/outlier_detection/_stray.py +++ b/aeon/anomaly_detection/series/outlier_detection/_stray.py @@ -54,7 +54,7 @@ class STRAY(BaseSeriesAnomalyDetector): Examples -------- - >>> from aeon.anomaly_detection.outlier_detection import STRAY + >>> from aeon.anomaly_detection.series.outlier_detection import STRAY >>> from aeon.datasets import load_airline >>> import numpy as np >>> X = load_airline() diff --git a/aeon/anomaly_detection/series/tests/test_pyod_adapter.py b/aeon/anomaly_detection/series/tests/test_pyod_adapter.py index 84906c245d..aee336e089 100644 --- a/aeon/anomaly_detection/series/tests/test_pyod_adapter.py +++ b/aeon/anomaly_detection/series/tests/test_pyod_adapter.py @@ -6,7 +6,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.series.outlier_detection import PyODAdapter +from aeon.anomaly_detection.series import PyODAdapter from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/testing/estimator_checking/_yield_estimator_checks.py b/aeon/testing/estimator_checking/_yield_estimator_checks.py index 643118793d..472783bfaa 100644 --- a/aeon/testing/estimator_checking/_yield_estimator_checks.py +++ b/aeon/testing/estimator_checking/_yield_estimator_checks.py @@ -11,8 +11,8 @@ import numpy as np from sklearn.exceptions import NotFittedError +from aeon.anomaly_detection.collection.base import BaseCollectionAnomalyDetector from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector -from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector from aeon.base import BaseAeonEstimator from aeon.base._base import _clone_estimator from aeon.classification import BaseClassifier diff --git a/aeon/utils/base/_identifier.py b/aeon/utils/base/_identifier.py index 2857b45bd1..78174ebc83 100644 --- a/aeon/utils/base/_identifier.py +++ b/aeon/utils/base/_identifier.py @@ -47,16 +47,16 @@ def get_identifier(estimator): if len(identifiers) == 0: raise TypeError("Error, no identifiers could be determined for estimator") - if len(identifiers) > 1 and "anomaly-detector" in identifiers: - identifiers.remove("anomaly-detector") if len(identifiers) > 1 and "estimator" in identifiers: identifiers.remove("estimator") - if len(identifiers) > 1 and "series-estimator" in identifiers: - identifiers.remove("series-estimator") if len(identifiers) > 1 and "collection-estimator" in identifiers: identifiers.remove("collection-estimator") + if len(identifiers) > 1 and "series-estimator" in identifiers: + identifiers.remove("series-estimator") if len(identifiers) > 1 and "transformer" in identifiers: identifiers.remove("transformer") + if len(identifiers) > 1 and "anomaly-detector" in identifiers: + identifiers.remove("anomaly-detector") if len(identifiers) > 1: TypeError( diff --git a/aeon/utils/base/_register.py b/aeon/utils/base/_register.py index 749c005e5f..f099981afb 100644 --- a/aeon/utils/base/_register.py +++ b/aeon/utils/base/_register.py @@ -33,23 +33,23 @@ # all base classes BASE_CLASS_REGISTER = { # abstract - no estimator directly inherits from these - "anomaly-detector": BaseAnomalyDetector, - "collection-estimator": BaseCollectionEstimator, "estimator": BaseAeonEstimator, + "collection-estimator": BaseCollectionEstimator, "series-estimator": BaseSeriesEstimator, "transformer": BaseTransformer, + "anomaly-detector": BaseAnomalyDetector, # estimator types "collection-anomaly-detector": BaseCollectionAnomalyDetector, "collection-transformer": BaseCollectionTransformer, "classifier": BaseClassifier, "clusterer": BaseClusterer, "early_classifier": BaseEarlyClassifier, + "forecaster": BaseForecaster, "regressor": BaseRegressor, "segmenter": BaseSegmenter, "similarity_searcher": BaseSimilaritySearch, "series-anomaly-detector": BaseSeriesAnomalyDetector, "series-transformer": BaseSeriesTransformer, - "forecaster": BaseForecaster, } # base classes which are valid for estimator to directly inherit from @@ -57,10 +57,10 @@ k: BASE_CLASS_REGISTER[k] for k in BASE_CLASS_REGISTER.keys() - { - "anomaly-detector", "estimator", "collection-estimator", "series-estimator", "transformer", + "anomaly-detector", } } diff --git a/docs/api_reference/anomaly_detection.rst b/docs/api_reference/anomaly_detection.rst index e50d51f3f0..d0289c5115 100644 --- a/docs/api_reference/anomaly_detection.rst +++ b/docs/api_reference/anomaly_detection.rst @@ -80,7 +80,7 @@ The algorithms for this family are not implemented yet. Whole-series ------------ -.. currentmodule:: aeon.anomaly_detection.whole_series +.. currentmodule:: aeon.anomaly_detection.collection .. autosummary:: :toctree: auto_generated/ From 7508ffd907ec1df4469c9bcecff70d0de86c7acb Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Fri, 9 May 2025 18:04:49 +0100 Subject: [PATCH 15/24] final bits and docs for refactor --- aeon/anomaly_detection/base.py | 71 +++++++------------ .../collection/_classification.py | 4 +- .../collection/_outlier_detection.py | 2 + aeon/anomaly_detection/collection/base.py | 5 -- aeon/anomaly_detection/series/base.py | 6 -- docs/api_reference/anomaly_detection.rst | 68 ++++++++++-------- 6 files changed, 67 insertions(+), 89 deletions(-) diff --git a/aeon/anomaly_detection/base.py b/aeon/anomaly_detection/base.py index a94e93a6f1..62d61f8d53 100644 --- a/aeon/anomaly_detection/base.py +++ b/aeon/anomaly_detection/base.py @@ -11,10 +11,11 @@ class BaseAnomalyDetector(BaseAeonEstimator): - """todo base class docs.""" + """Anomaly detection base class.""" _tags = { - # todo + "fit_is_empty": True, + "requires_y": False, } def __init__(self): @@ -22,29 +23,23 @@ def __init__(self): @abstractmethod def fit(self, X, y=None): - """Fit time series anomaly detector to X. + """Fit anomaly detector to X, optionally to y. - If the tag ``fit_is_empty`` is true, this just sets the ``is_fitted`` tag to - true. Otherwise, it checks ``self`` can handle ``X``, formats ``X`` into - the structure required by ``self`` then passes ``X`` (and possibly ``y``) to - ``_fit``. + State change: + Changes state to "fitted". + + Writes to self: + _is_fitted : flag is set to True. Parameters ---------- - X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES - The time series to fit the model to. - A valid aeon time series data structure. See - aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. - y : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES, default=None - The target values for the time series. - A valid aeon time series data structure. See - aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. - axis : int - The time point axis of the input series if it is 2D. If ``axis==0``, it is - assumed each column is a time series and each row is a time point. i.e. the - shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates - the time series are in rows, i.e. the shape of the data is - ``(n_channels, n_timepoints)``. + X : Series or Collection, any supported type + Data to fit anomaly detector to, of python type as follows: + Series: 2D np.ndarray shape (n_channels, n_timepoints) + Collection: 3D np.ndarray shape (n_cases, n_channels, n_timepoints) + or list of 2D np.ndarray, case i has shape (n_channels, n_timepoints_i) + y : Series, default=None + Additional data, e.g., labels for anomaly detector. Returns ------- @@ -59,16 +54,11 @@ def predict(self, X) -> np.ndarray: Parameters ---------- - X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES - The time series to fit the model to. - A valid aeon time series data structure. See - aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. - axis : int, default=1 - The time point axis of the input series if it is 2D. If ``axis==0``, it is - assumed each column is a time series and each row is a time point. i.e. the - shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates - the time series are in rows, i.e. the shape of the data is - ``(n_channels, n_timepoints)``. + X : Series or Collection, any supported type + Data to fit anomaly detector to, of python type as follows: + Series: 2D np.ndarray shape (n_channels, n_timepoints) + Collection: 3D np.ndarray shape (n_cases, n_channels, n_timepoints) + or list of 2D np.ndarray, case i has shape (n_channels, n_timepoints_i) Returns ------- @@ -84,20 +74,11 @@ def fit_predict(self, X, y=None) -> np.ndarray: Parameters ---------- - X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES - The time series to fit the model to. - A valid aeon time series data structure. See - aeon.base._base_series.VALID_INPUT_TYPES for aeon supported types. - y : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES, default=None - The target values for the time series. - A valid aeon time series data structure. See - aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. - axis : int, default=1 - The time point axis of the input series if it is 2D. If ``axis==0``, it is - assumed each column is a time series and each row is a time point. i.e. the - shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates - the time series are in rows, i.e. the shape of the data is - ``(n_channels, n_timepoints)``. + X : Series or Collection, any supported type + Data to fit anomaly detector to, of python type as follows: + Series: 2D np.ndarray shape (n_channels, n_timepoints) + Collection: 3D np.ndarray shape (n_cases, n_channels, n_timepoints) + or list of 2D np.ndarray, case i has shape (n_channels, n_timepoints_i) Returns ------- diff --git a/aeon/anomaly_detection/collection/_classification.py b/aeon/anomaly_detection/collection/_classification.py index d8aa34ebea..904c4bf80e 100644 --- a/aeon/anomaly_detection/collection/_classification.py +++ b/aeon/anomaly_detection/collection/_classification.py @@ -1,7 +1,7 @@ """Adapter to use classification algorithms for collection anomaly detection.""" __maintainer__ = [] - +__all__ = ["ClassificationAdapter"] from sklearn.base import ClassifierMixin from sklearn.ensemble import RandomForestClassifier @@ -30,7 +30,7 @@ class ClassificationAdapter(BaseCollectionAnomalyDetector): """ _tags = { - "X_inner_type": "numpy2D", + "fit_is_empty": False, "requires_y": True, } diff --git a/aeon/anomaly_detection/collection/_outlier_detection.py b/aeon/anomaly_detection/collection/_outlier_detection.py index db40a8e749..0df50ff40c 100644 --- a/aeon/anomaly_detection/collection/_outlier_detection.py +++ b/aeon/anomaly_detection/collection/_outlier_detection.py @@ -1,6 +1,7 @@ """Adapter to use outlier detection algorithms for collection anomaly detection.""" __maintainer__ = [] +__all__ = ["OutlierDetectionAdapter"] from sklearn.base import OutlierMixin from sklearn.ensemble import IsolationForest @@ -29,6 +30,7 @@ class OutlierDetectionAdapter(BaseCollectionAnomalyDetector): _tags = { "X_inner_type": "numpy2D", + "fit_is_empty": False, } def __init__(self, detector, random_state=None): diff --git a/aeon/anomaly_detection/collection/base.py b/aeon/anomaly_detection/collection/base.py index 37b903b463..701c4c0cf2 100644 --- a/aeon/anomaly_detection/collection/base.py +++ b/aeon/anomaly_detection/collection/base.py @@ -55,11 +55,6 @@ class BaseCollectionAnomalyDetector(BaseCollectionEstimator, BaseAnomalyDetector Dictionary containing dynamic tag values which have been set at runtime. """ - _tags = { - "fit_is_empty": False, - "requires_y": False, - } - def __init__(self): super().__init__() diff --git a/aeon/anomaly_detection/series/base.py b/aeon/anomaly_detection/series/base.py index 4c7a97ba2e..ccd3649cd7 100644 --- a/aeon/anomaly_detection/series/base.py +++ b/aeon/anomaly_detection/series/base.py @@ -76,12 +76,6 @@ class BaseSeriesAnomalyDetector(BaseSeriesEstimator, BaseAnomalyDetector): Setting this class variable will convert the input data to the chosen axis. """ - _tags = { - "X_inner_type": "np.ndarray", # One of VALID_SERIES_INNER_TYPES - "fit_is_empty": True, - "requires_y": False, - } - def __init__(self, axis): super().__init__(axis=axis) diff --git a/docs/api_reference/anomaly_detection.rst b/docs/api_reference/anomaly_detection.rst index d0289c5115..d1c2bceb65 100644 --- a/docs/api_reference/anomaly_detection.rst +++ b/docs/api_reference/anomaly_detection.rst @@ -3,25 +3,38 @@ Anomaly Detection ================= -The :mod:`aeon.anomaly_detection` module contains algorithms and composition tools for time series classification. +The :mod:`aeon.anomaly_detection` module contains algorithms and composition tools for +time series anomaly detection. All detectors in `aeon` can be listed using the `aeon.utils.discovery.all_estimators` utility, using ``estimator_types="anomaly-detector"``, optionally filtered by tags. -Valid tags can be listed by calling the function `aeon.utils.discovery.all_tags_for_estimator`. +Valid tags can be listed by calling the function `aeon.utils.tags.all_tags_for_estimator`. Each detector in this module specifies its supported input data format, output data format, and learning type as an overview table in its documentation. Some detectors support multiple learning types. -.. note:: - Not all algorithm families are currently implemented. The documentation includes - placeholders for planned categories which will be supported in future. +Collection anomaly detectors +---------------------------- + +.. currentmodule:: aeon.anomaly_detection.collection + +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst + + BaseCollectionAnomalyDetector + ClassificationAdapter + OutlierDetectionAdapter + +Series anomaly detectors +------------------------ Distance-based --------------- +~~~~~~~~~~~~~~ -.. currentmodule:: aeon.anomaly_detection.distance_based +.. currentmodule:: aeon.anomaly_detection.series.distance_based .. autosummary:: :toctree: auto_generated/ @@ -37,9 +50,9 @@ Distance-based ROCKAD Distribution-based ------------------ +~~~~~~~~~~~~~~~~~~ -.. currentmodule:: aeon.anomaly_detection.distribution_based +.. currentmodule:: aeon.anomaly_detection.series.distribution_based .. autosummary:: :toctree: auto_generated/ @@ -48,47 +61,40 @@ Distribution-based COPOD DWT_MLEAD -Encoding-based --------------- - -The algorithms for this family are not implemented yet. - -Forecasting-based ------------------ - -The algorithms for this family are not implemented yet. - Outlier-Detection ------------------ +~~~~~~~~~~~~~~~~~ -.. currentmodule:: aeon.anomaly_detection.outlier_detection +.. currentmodule:: aeon.anomaly_detection.series.outlier_detection .. autosummary:: :toctree: auto_generated/ :template: class.rst IsolationForest - PyODAdapter STRAY -Reconstruction-based --------------------- +Adapters +~~~~~~~~ -The algorithms for this family are not implemented yet. +.. currentmodule:: aeon.anomaly_detection.series +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst -Whole-series ------------- + PyODAdapter -.. currentmodule:: aeon.anomaly_detection.collection +Base +~~~~ + +.. currentmodule:: aeon.anomaly_detection.series .. autosummary:: :toctree: auto_generated/ :template: class.rst - BaseCollectionAnomalyDetector - ClassificationAdapter - OutlierDetectionAdapter + BaseSeriesAnomalyDetector + Base ---- From 97c5f250c6fac40a41982b42fe4360fb1aa6f6dd Mon Sep 17 00:00:00 2001 From: pattplatt Date: Wed, 14 May 2025 13:53:25 +0200 Subject: [PATCH 16/24] move predict_proba logic into inner_predict for code consistency --- .../series/distance_based/_rockad.py | 30 ++++++++----------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/aeon/anomaly_detection/series/distance_based/_rockad.py b/aeon/anomaly_detection/series/distance_based/_rockad.py index 15e2016e18..72d5f67f31 100644 --- a/aeon/anomaly_detection/series/distance_based/_rockad.py +++ b/aeon/anomaly_detection/series/distance_based/_rockad.py @@ -206,23 +206,9 @@ def _fit_predict(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndar point_anomaly_scores = self._inner_predict(_X, padding) return point_anomaly_scores - def _inner_predict(self, X: np.ndarray, padding: int) -> np.ndarray: - - anomaly_scores = self._predict_proba(X) - - point_anomaly_scores = reverse_windowing( - anomaly_scores, self.window_size, np.nanmean, self.stride, padding - ) - - point_anomaly_scores = (point_anomaly_scores - point_anomaly_scores.min()) / ( - point_anomaly_scores.max() - point_anomaly_scores.min() - ) - - return point_anomaly_scores - - def _predict_proba(self, X): + def _inner_predict(self, X: np.ndarray, padding: int) -> np.ndarray: """ - Predicts the probability of anomalies for the input data. + Predict the anomaly score for each time-point in the input data. Parameters ---------- @@ -257,6 +243,14 @@ def _predict_proba(self, X): y_scores[:, idx] = scores # Average the scores to get the final score for each time series - y_scores = y_scores.mean(axis=1) + anomaly_scores = y_scores.mean(axis=1) + + point_anomaly_scores = reverse_windowing( + anomaly_scores, self.window_size, np.nanmean, self.stride, padding + ) + + point_anomaly_scores = (point_anomaly_scores - point_anomaly_scores.min()) / ( + point_anomaly_scores.max() - point_anomaly_scores.min() + ) - return y_scores + return point_anomaly_scores \ No newline at end of file From c5bbc96e8725098ec6b095c3c93dae01ce63b95c Mon Sep 17 00:00:00 2001 From: pattplatt Date: Wed, 14 May 2025 14:39:44 +0200 Subject: [PATCH 17/24] Added reference and example code, updated description --- .../series/distance_based/_rockad.py | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/aeon/anomaly_detection/series/distance_based/_rockad.py b/aeon/anomaly_detection/series/distance_based/_rockad.py index 72d5f67f31..35bebd5713 100644 --- a/aeon/anomaly_detection/series/distance_based/_rockad.py +++ b/aeon/anomaly_detection/series/distance_based/_rockad.py @@ -19,12 +19,14 @@ class ROCKAD(BaseSeriesAnomalyDetector): """ ROCKET-based Anomaly Detector (ROCKAD). + Adapted ROCKAD [1]_ version to detect anomalies on time-points. ROCKAD leverages the ROCKET transformation for feature extraction from time series data and applies the scikit learn k-nearest neighbors (k-NN) approach with bootstrap aggregation for robust anomaly detection. After windowing, the data gets transformed into the ROCKET feature space. Then the windows are compared based on the feature space by - finding the nearest neighbours. + finding the nearest neighbours. Whole-series based ROCKAD as proposed in + [1]_ can be found at aeon/anomaly_detection/whole_series/_rockad.py This class supports both univariate and multivariate time series and provides options for normalizing features, applying power transformations, @@ -53,6 +55,30 @@ class ROCKAD(BaseSeriesAnomalyDetector): random_state : int, default=42 Random seed for reproducibility. + References + ---------- + .. [1] Theissler, A., Wengert, M., Gerschner, F. (2023). + ROCKAD: Transferring ROCKET to Whole Time Series Anomaly Detection. + In: Crémilleux, B., Hess, S., Nijssen, S. (eds) Advances in Intelligent + Data Analysis XXI. IDA 2023. Lecture Notes in Computer Science, + vol 13876. Springer, Cham. https://doi.org/10.1007/978-3-031-30047-9_33 + + Examples + -------- + >>> import numpy as np + >>> from aeon.anomaly_detection.series.distance_based import ROCKAD + >>> rng = np.random.default_rng(seed=42) + >>> X_train = rng.normal(loc=0.0, scale=1.0, size=(1000,)) + >>> X_test = rng.normal(loc=0.0, scale=1.0, size=(20,)) + >>> X_test[15:20] -= 5 + >>> detector = ROCKAD(window_size=15,n_estimators=10,n_kernels=10,n_neighbors=3) + >>> detector.fit(X_train) + >>> detector.predict(X_test) + array([0. 0.00554713 0.06990941 0.2288106 0.32382585 0.43652154 + 0.43652154 0.43652154 0.43652154 0.43652154 0.43652154 0.43652154 + 0.43652154 0.43652154 0.43652154 0.52382585 0.65200875 0.80313368 + 0.85194344 1. ]) + Attributes ---------- rocket_transformer_ : Optional[Rocket] @@ -206,7 +232,7 @@ def _fit_predict(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndar point_anomaly_scores = self._inner_predict(_X, padding) return point_anomaly_scores - def _inner_predict(self, X: np.ndarray, padding: int) -> np.ndarray: + def _inner_predict(self, X: np.ndarray, padding: int) -> np.ndarray: """ Predict the anomaly score for each time-point in the input data. @@ -253,4 +279,4 @@ def _inner_predict(self, X: np.ndarray, padding: int) -> np.ndarray: point_anomaly_scores.max() - point_anomaly_scores.min() ) - return point_anomaly_scores \ No newline at end of file + return point_anomaly_scores From 8844eb15d3c21a7db2656c7134b8fd8969f3ed05 Mon Sep 17 00:00:00 2001 From: pattplatt Date: Wed, 14 May 2025 15:12:48 +0200 Subject: [PATCH 18/24] Adapted tests to be semi-supervised --- .../distance_based/tests/test_rockad.py | 35 ++++++++++++------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/aeon/anomaly_detection/series/distance_based/tests/test_rockad.py b/aeon/anomaly_detection/series/distance_based/tests/test_rockad.py index 51d2425505..16f60edff7 100644 --- a/aeon/anomaly_detection/series/distance_based/tests/test_rockad.py +++ b/aeon/anomaly_detection/series/distance_based/tests/test_rockad.py @@ -10,8 +10,9 @@ def test_rockad_univariate(): """Test ROCKAD univariate output.""" rng = check_random_state(seed=2) - series = rng.normal(size=(100,)) - series[50:58] -= 5 + train_series = rng.normal(size=(100,)) + test_series = rng.normal(size=(100,)) + test_series[50:58] -= 5 ad = ROCKAD( n_estimators=100, @@ -22,7 +23,8 @@ def test_rockad_univariate(): stride=1, ) - pred = ad.fit_predict(series, axis=0) + ad.fit(train_series, axis=0) + pred = ad.predict(test_series, axis=0) assert pred.shape == (100,) assert pred.dtype == np.float64 @@ -32,9 +34,10 @@ def test_rockad_univariate(): def test_rockad_multivariate(): """Test ROCKAD multivariate output.""" rng = check_random_state(seed=2) - series = rng.normal(size=(100, 3)) - series[50:58, 0] -= 5 - series[87:90, 1] += 0.1 + train_series = rng.normal(size=(100, 3)) + test_series = rng.normal(size=(100, 3)) + test_series[50:58, 0] -= 5 + test_series[87:90, 1] += 0.1 ad = ROCKAD( n_estimators=1000, @@ -45,7 +48,8 @@ def test_rockad_multivariate(): stride=1, ) - pred = ad.fit_predict(series, axis=0) + ad.fit(train_series, axis=0) + pred = ad.predict(test_series, axis=0) assert pred.shape == (100,) assert pred.dtype == np.float64 @@ -55,21 +59,28 @@ def test_rockad_multivariate(): def test_rockad_incorrect_input(): """Test ROCKAD incorrect input.""" rng = check_random_state(seed=2) - series = rng.normal(size=(100,)) + train_series = rng.normal(size=(100,)) + test_series = rng.normal(size=(5,)) with pytest.raises(ValueError, match="The window size must be at least 1"): ad = ROCKAD(window_size=0) - ad.fit_predict(series) + ad.fit(train_series) with pytest.raises(ValueError, match="The stride must be at least 1"): ad = ROCKAD(stride=0) - ad.fit_predict(series) + ad.fit(train_series) with pytest.raises( ValueError, match=r"Window count .* has to be larger than n_neighbors .*" ): ad = ROCKAD(stride=1, window_size=100) - ad.fit_predict(series) + ad.fit(train_series) with pytest.warns( UserWarning, match=r"Power Transform failed and thus has been disabled." ): ad = ROCKAD(stride=1, window_size=5) - ad.fit_predict(series) + ad.fit(train_series) + with pytest.raises( + ValueError, match=r"window shape cannot be larger than input array shape" + ): + ad = ROCKAD(stride=1, window_size=10) + ad.fit(train_series) + ad.predict(test_series) From 7feab8f27dbd4277d4fea81a3d9abf7afa5b0be2 Mon Sep 17 00:00:00 2001 From: pattplatt Date: Wed, 14 May 2025 15:14:16 +0200 Subject: [PATCH 19/24] Added semi-supervised labeling to description --- aeon/anomaly_detection/series/distance_based/_rockad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/anomaly_detection/series/distance_based/_rockad.py b/aeon/anomaly_detection/series/distance_based/_rockad.py index 35bebd5713..9409b4f4e5 100644 --- a/aeon/anomaly_detection/series/distance_based/_rockad.py +++ b/aeon/anomaly_detection/series/distance_based/_rockad.py @@ -17,7 +17,7 @@ class ROCKAD(BaseSeriesAnomalyDetector): """ - ROCKET-based Anomaly Detector (ROCKAD). + ROCKET-based Semi-Supervised Anomaly Detector (ROCKAD). Adapted ROCKAD [1]_ version to detect anomalies on time-points. ROCKAD leverages the ROCKET transformation for feature extraction from From 1b036bd22a586307e38334f4784160cbbe901ddb Mon Sep 17 00:00:00 2001 From: pattplatt Date: Mon, 2 Jun 2025 18:35:56 +0200 Subject: [PATCH 20/24] moved Attributes above References and Examples, updated path of collection based rockad implementation --- .../series/distance_based/_rockad.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/aeon/anomaly_detection/series/distance_based/_rockad.py b/aeon/anomaly_detection/series/distance_based/_rockad.py index 9409b4f4e5..ecb3052fd8 100644 --- a/aeon/anomaly_detection/series/distance_based/_rockad.py +++ b/aeon/anomaly_detection/series/distance_based/_rockad.py @@ -26,7 +26,7 @@ class ROCKAD(BaseSeriesAnomalyDetector): After windowing, the data gets transformed into the ROCKET feature space. Then the windows are compared based on the feature space by finding the nearest neighbours. Whole-series based ROCKAD as proposed in - [1]_ can be found at aeon/anomaly_detection/whole_series/_rockad.py + [1]_ can be found at aeon/anomaly_detection/collection/_rockad.py This class supports both univariate and multivariate time series and provides options for normalizing features, applying power transformations, @@ -55,6 +55,15 @@ class ROCKAD(BaseSeriesAnomalyDetector): random_state : int, default=42 Random seed for reproducibility. + Attributes + ---------- + rocket_transformer_ : Optional[Rocket] + Instance of the ROCKET transformer used to extract features, set after fitting. + list_baggers_ : Optional[list[NearestNeighbors]] + List containing k-NN estimators used for anomaly scoring, set after fitting. + power_transformer_ : PowerTransformer + Transformer used to apply power transformation to the features. + References ---------- .. [1] Theissler, A., Wengert, M., Gerschner, F. (2023). @@ -78,15 +87,6 @@ class ROCKAD(BaseSeriesAnomalyDetector): 0.43652154 0.43652154 0.43652154 0.43652154 0.43652154 0.43652154 0.43652154 0.43652154 0.43652154 0.52382585 0.65200875 0.80313368 0.85194344 1. ]) - - Attributes - ---------- - rocket_transformer_ : Optional[Rocket] - Instance of the ROCKET transformer used to extract features, set after fitting. - list_baggers_ : Optional[list[NearestNeighbors]] - List containing k-NN estimators used for anomaly scoring, set after fitting. - power_transformer_ : PowerTransformer - Transformer used to apply power transformation to the features. """ _tags = { From 68a2ccbde30da1e060423580721f81e4914dc354 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Wed, 18 Jun 2025 11:03:50 +0100 Subject: [PATCH 21/24] merge 2 --- aeon/anomaly_detection/__init__.py | 2 ++ aeon/anomaly_detection/base.py | 2 +- aeon/utils/base/_identifier.py | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/aeon/anomaly_detection/__init__.py b/aeon/anomaly_detection/__init__.py index 878e29fd32..65343cd774 100644 --- a/aeon/anomaly_detection/__init__.py +++ b/aeon/anomaly_detection/__init__.py @@ -3,3 +3,5 @@ __all__ = [ "BaseAnomalyDetector", ] + +from aeon.anomaly_detection.base import BaseAnomalyDetector diff --git a/aeon/anomaly_detection/base.py b/aeon/anomaly_detection/base.py index e8004c0b6e..28e6dbc5b9 100644 --- a/aeon/anomaly_detection/base.py +++ b/aeon/anomaly_detection/base.py @@ -46,7 +46,7 @@ def fit(self, X, y=None): Returns ------- - BaseSeriesAnomalyDetector + BaseAnomalyDetector The fitted estimator, reference to self. """ ... diff --git a/aeon/utils/base/_identifier.py b/aeon/utils/base/_identifier.py index 4fc6059821..de724156f8 100644 --- a/aeon/utils/base/_identifier.py +++ b/aeon/utils/base/_identifier.py @@ -49,10 +49,10 @@ def get_identifier(estimator): if len(identifiers) > 1 and "estimator" in identifiers: identifiers.remove("estimator") - if len(identifiers) > 1 and "series-estimator" in identifiers: - identifiers.remove("series-estimator") if len(identifiers) > 1 and "collection-estimator" in identifiers: identifiers.remove("collection-estimator") + if len(identifiers) > 1 and "series-estimator" in identifiers: + identifiers.remove("series-estimator") if len(identifiers) > 1 and "transformer" in identifiers: identifiers.remove("transformer") if len(identifiers) > 1 and "anomaly-detector" in identifiers: From 771dcc9221c03a404c5fc2117aede1cfa79768ac Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Wed, 18 Jun 2025 11:46:57 +0100 Subject: [PATCH 22/24] CI errors --- .github/workflows/periodic_tests.yml | 2 +- .github/workflows/pr_pytest.yml | 2 +- .github/workflows/release.yml | 2 +- aeon/anomaly_detection/series/distance_based/_rockad.py | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/periodic_tests.yml b/.github/workflows/periodic_tests.yml index ef941711d7..4b9829b44e 100644 --- a/.github/workflows/periodic_tests.yml +++ b/.github/workflows/periodic_tests.yml @@ -178,7 +178,7 @@ jobs: if: runner.os == 'Linux' uses: pierotofy/set-swap-space@v1.0 with: - swap-size-gb: 8 + swap-size-gb: 4 - name: Use numba cache to set env variables but not restore cache uses: ./.github/actions/numba_cache diff --git a/.github/workflows/pr_pytest.yml b/.github/workflows/pr_pytest.yml index 77ff1d2f58..6aba9e83c4 100644 --- a/.github/workflows/pr_pytest.yml +++ b/.github/workflows/pr_pytest.yml @@ -76,7 +76,7 @@ jobs: if: runner.os == 'Linux' uses: pierotofy/set-swap-space@v1.0 with: - swap-size-gb: 8 + swap-size-gb: 4 - if: ${{ github.event_name != 'pull_request' || !contains(github.event.pull_request.labels.*.name, 'no numba cache') }} name: Restore numba cache diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 590c4f9342..02ebeea529 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -69,7 +69,7 @@ jobs: if: runner.os == 'Linux' uses: pierotofy/set-swap-space@v1.0 with: - swap-size-gb: 8 + swap-size-gb: 4 - uses: actions/download-artifact@v4 with: diff --git a/aeon/anomaly_detection/series/distance_based/_rockad.py b/aeon/anomaly_detection/series/distance_based/_rockad.py index 5fce51141a..0c91eb508a 100644 --- a/aeon/anomaly_detection/series/distance_based/_rockad.py +++ b/aeon/anomaly_detection/series/distance_based/_rockad.py @@ -82,6 +82,7 @@ class ROCKAD(BaseSeriesAnomalyDetector): >>> X_test[15:20] -= 5 >>> detector = ROCKAD(window_size=15,n_estimators=10,n_kernels=10,n_neighbors=3) >>> detector.fit(X_train) + ROCKAD(...) >>> detector.predict(X_test) array([0. 0.00554713 0.06990941 0.2288106 0.32382585 0.43652154 0.43652154 0.43652154 0.43652154 0.43652154 0.43652154 0.43652154 From 49163c6a06186ff752042be1c0ea5048778d5996 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Wed, 18 Jun 2025 12:04:30 +0100 Subject: [PATCH 23/24] maintainer --- aeon/anomaly_detection/series/distance_based/_rockad.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/aeon/anomaly_detection/series/distance_based/_rockad.py b/aeon/anomaly_detection/series/distance_based/_rockad.py index 0c91eb508a..8b992e5b62 100644 --- a/aeon/anomaly_detection/series/distance_based/_rockad.py +++ b/aeon/anomaly_detection/series/distance_based/_rockad.py @@ -1,5 +1,6 @@ """ROCKAD anomaly detector.""" +__maintainer__ = [] __all__ = ["ROCKAD"] import warnings @@ -113,7 +114,6 @@ def __init__( n_jobs=1, random_state=42, ): - self.n_estimators = n_estimators self.n_kernels = n_kernels self.normalise = normalise @@ -163,7 +163,6 @@ def _check_params(self, X: np.ndarray) -> None: ) def _inner_fit(self, X: np.ndarray) -> None: - self.rocket_transformer_ = Rocket( n_kernels=self.n_kernels, normalise=self.normalise, @@ -216,7 +215,6 @@ def _inner_fit(self, X: np.ndarray) -> None: self.list_baggers_.append(estimator) def _predict(self, X) -> np.ndarray: - _X, padding = sliding_windows( X, window_size=self.window_size, stride=self.stride, axis=0 ) From afbd04b84b3df20909310d65d5e87fb32843ee5d Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Wed, 18 Jun 2025 21:35:17 +0100 Subject: [PATCH 24/24] doctest --- aeon/anomaly_detection/series/distance_based/_rockad.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/aeon/anomaly_detection/series/distance_based/_rockad.py b/aeon/anomaly_detection/series/distance_based/_rockad.py index 8b992e5b62..1fd0fa8f30 100644 --- a/aeon/anomaly_detection/series/distance_based/_rockad.py +++ b/aeon/anomaly_detection/series/distance_based/_rockad.py @@ -85,10 +85,10 @@ class ROCKAD(BaseSeriesAnomalyDetector): >>> detector.fit(X_train) ROCKAD(...) >>> detector.predict(X_test) - array([0. 0.00554713 0.06990941 0.2288106 0.32382585 0.43652154 - 0.43652154 0.43652154 0.43652154 0.43652154 0.43652154 0.43652154 - 0.43652154 0.43652154 0.43652154 0.52382585 0.65200875 0.80313368 - 0.85194344 1. ]) + array([0. , 0.00554713, 0.0699094 , 0.22881059, 0.32382585, + 0.43652154, 0.43652154, 0.43652154, 0.43652154, 0.43652154, + 0.43652154, 0.43652154, 0.43652154, 0.43652154, 0.43652154, + 0.52382585, 0.65200875, 0.80313368, 0.85194345, 1. ]) """ _tags = {