From 01eb33723a8ddce59bdf1bad9aa70ec6964c9231 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Sat, 17 May 2025 23:56:38 +0100 Subject: [PATCH 1/6] transformer mixin, composables and pipeline fix --- .../compose/collection_pipeline.py | 11 +- .../_estimators/compose/series_pipeline.py | 272 ++++++++++++++++++ aeon/transformations/__init__.py | 8 +- aeon/transformations/base.py | 5 +- aeon/transformations/collection/__init__.py | 6 +- ..._broadcaster.py => _series_broadcaster.py} | 8 +- .../collection/compose/tests/test_pipeline.py | 7 +- .../collection/tests/test_broadcaster.py | 19 +- aeon/transformations/series/__init__.py | 4 + aeon/transformations/series/_boxcox.py | 88 +----- .../series/_collection_wrapper.py | 86 ++++++ aeon/transformations/series/_log.py | 92 ++++++ .../series/compose/__init__.py | 8 + .../series/compose/_identity.py | 25 ++ .../series/compose/_pipeline.py | 102 +++++++ .../series/compose/tests/__init__.py | 1 + .../series/compose/tests/test_pipeline.py | 188 ++++++++++++ .../series/tests/test_boxcox.py | 3 - .../series/tests/test_wrapper.py | 24 ++ 19 files changed, 839 insertions(+), 118 deletions(-) create mode 100644 aeon/base/_estimators/compose/series_pipeline.py rename aeon/transformations/collection/{_broadcaster.py => _series_broadcaster.py} (97%) create mode 100644 aeon/transformations/series/_collection_wrapper.py create mode 100644 aeon/transformations/series/_log.py create mode 100644 aeon/transformations/series/compose/__init__.py create mode 100644 aeon/transformations/series/compose/_identity.py create mode 100644 aeon/transformations/series/compose/_pipeline.py create mode 100644 aeon/transformations/series/compose/tests/__init__.py create mode 100644 aeon/transformations/series/compose/tests/test_pipeline.py create mode 100644 aeon/transformations/series/tests/test_wrapper.py diff --git a/aeon/base/_estimators/compose/collection_pipeline.py b/aeon/base/_estimators/compose/collection_pipeline.py index 1b90c6c411..dcc2e5d8ee 100644 --- a/aeon/base/_estimators/compose/collection_pipeline.py +++ b/aeon/base/_estimators/compose/collection_pipeline.py @@ -1,4 +1,4 @@ -"""Base class for pipelines in series collection modules. +"""Base class for pipelines in collection modules. i.e. classification, regression and clustering. """ @@ -47,11 +47,6 @@ class BaseCollectionPipeline(ComposableEstimatorMixin, BaseCollectionEstimator): Clones of transformers and the estimator which are fitted in the pipeline. Will always be in (str, estimator) format, even if transformers input is a singular transform or list of transformers. - - See Also - -------- - ClassifierPipeline : A pipeline for classification tasks. - RegressorPipeline : A pipeline for regression tasks. """ # Attribute name containing an iterable of processed (str, estimator) tuples @@ -285,7 +280,7 @@ def _fit_transform(self, X, y=None) -> np.ndarray: # transform Xt = X for i in range(len(self.steps_)): - Xt = self.steps_[i][1].fit_transform(X=Xt) + Xt = self.steps_[i][1].fit_transform(X=Xt, y=y) return Xt def _transform(self, X, y=None) -> np.ndarray: @@ -302,7 +297,7 @@ def _transform(self, X, y=None) -> np.ndarray: # transform Xt = X for i in range(len(self.steps_)): - Xt = self.steps_[i][1].transform(X=Xt) + Xt = self.steps_[i][1].transform(X=Xt, y=y) return Xt def _clone_steps(self): diff --git a/aeon/base/_estimators/compose/series_pipeline.py b/aeon/base/_estimators/compose/series_pipeline.py new file mode 100644 index 0000000000..81e2c0565b --- /dev/null +++ b/aeon/base/_estimators/compose/series_pipeline.py @@ -0,0 +1,272 @@ +"""Base class for pipelines in series modules. + +i.e. forecasting, anomaly detection, series transforms. +""" + +__maintainer__ = ["MatthewMiddlehurst"] +__all__ = ["BaseSeriesPipeline"] + +from abc import abstractmethod + +import numpy as np +from sklearn.base import BaseEstimator +from sklearn.utils import check_random_state + +from aeon.base import ( + BaseAeonEstimator, + BaseSeriesEstimator, + ComposableEstimatorMixin, +) +from aeon.base._base import _clone_estimator + + +class BaseSeriesPipeline(ComposableEstimatorMixin, BaseSeriesEstimator): + """Base class for composable pipelines in series based modules. + + Parameters + ---------- + transformers : aeon or sklearn transformer or list of transformers + A transform or list of transformers to use prior to fitting or predicting. + List of tuples (str, transformer) of transformers can also be passed, where + the str is used to name the transformer. + The objects are cloned prior, as such the state of the input will not be + modified by fitting the pipeline. + _estimator : aeon or sklearn estimator + A estimator to use at the end of the pipeline. + The object is cloned prior, as such the state of the input will not be modified + by fitting the pipeline. + random_state : int, RandomState instance or None, default=None + Random state used to fit the estimators. If None, no random state is set for + pipeline components (but they may still be seeded prior to input). + If `int`, random_state is the seed used by the random number generator; + If `RandomState` instance, random_state is the random number generator; + + Attributes + ---------- + steps_ : list of tuples (str, estimator) of transformers and estimator + Clones of transformers and the estimator which are fitted in the pipeline. + Will always be in (str, estimator) format, even if transformers input is a + singular transform or list of transformers. + """ + + # Attribute name containing an iterable of processed (str, estimator) tuples + # with unfitted estimators and unique names. Used in get_params and set_params + _estimators_attr = "_steps" + # Attribute name containing an iterable of fitted (str, estimator) tuples. + # Used in get_fitted_params + _fitted_estimators_attr = "steps_" + + @abstractmethod + def __init__(self, transformers, _estimator, random_state=None): + self.transformers = transformers + self._estimator = _estimator + self.random_state = random_state + + self._steps = ( + [t for t in transformers] + if isinstance(transformers, list) + else [transformers] + ) + if _estimator is not None: + self._steps.append(_estimator) + + self._check_estimators( + self._steps, + attr_name="_steps", + class_type=BaseEstimator, + ) + self._steps = self._convert_estimators(self._steps, clone_estimators=False) + + super().__init__() + + # can handle multivariate if: both estimator and all transformers can, + # *or* transformer chain removes multivariate + multivariate_tags = [ + ( + e[1].get_tag( + "capability:multivariate", + raise_error=False, + tag_value_default=False, + ) + if isinstance(e[1], BaseAeonEstimator) + else False + ) + for e in self._steps + ] + + multivariate_rm_tag = False + for e in self._steps: + if ( + isinstance(e[1], BaseAeonEstimator) + and e[1].get_tag( + "capability:multivariate", + raise_error=False, + tag_value_default=False, + ) + and e[1].get_tag("output_data_type", raise_error=False) == "Tabular" + ): + multivariate_rm_tag = True + break + elif not isinstance(e[1], BaseAeonEstimator) or not e[1].get_tag( + "capability:multivariate", raise_error=False, tag_value_default=False + ): + break + + multivariate = all(multivariate_tags) or multivariate_rm_tag + + # can handle missing values if: both estimator and all transformers can, + # *or* transformer chain removes missing data + missing_tags = [ + ( + e[1].get_tag( + "capability:missing_values", + raise_error=False, + tag_value_default=False, + ) + if isinstance(e[1], BaseAeonEstimator) + else False + ) + for e in self._steps + ] + + missing_rm_tag = False + for e in self._steps: + if ( + isinstance(e[1], BaseAeonEstimator) + and e[1].get_tag( + "capability:missing_values", + raise_error=False, + tag_value_default=False, + ) + and e[1].get_tag( + "removes_missing_values", raise_error=False, tag_value_default=False + ) + ): + missing_rm_tag = True + break + elif not isinstance(e[1], BaseAeonEstimator) or not e[1].get_tag( + "capability:missing_values", raise_error=False, tag_value_default=False + ): + break + + missing = all(missing_tags) or missing_rm_tag + + tags_to_set = { + "capability:multivariate": multivariate, + "capability:missing_values": missing, + } + self.set_tags(**tags_to_set) + + def _fit(self, X, y): + """Fit time series estimator to training data. + + Parameters + ---------- + X : Training data of type self.get_tag("X_inner_type") + y : array-like, shape = [n_cases] - the target values + + Returns + ------- + self : reference to self. + """ + self._clone_steps() + + # fit transforms sequentially + Xt = X + for i in range(len(self.steps_) - 1): + Xt = self.steps_[i][1].fit_transform(X=Xt, y=y) + # fit estimator + self.steps_[-1][1].fit(X=Xt, y=y) + + return self + + def _predict(self, X) -> np.ndarray: + """Predict labels for sequences in X. + + Parameters + ---------- + X : data not used in training, of type self.get_tag("X_inner_type") + + Returns + ------- + y : predictions of target values for X, np.ndarray + """ + # transform + Xt = X + for i in range(len(self.steps_) - 1): + Xt = self.steps_[i][1].transform(X=Xt) + # predict + return self.steps_[-1][1].predict(X=Xt) + + def _predict_proba(self, X) -> np.ndarray: + """Predicts labels probabilities for sequences in X. + + Default behaviour is to call _predict and set the predicted class probability + to 1, other class probabilities to 0. Override if better estimates are + obtainable. + + Parameters + ---------- + X : data to predict y with, of type self.get_tag("X_inner_type") + + Returns + ------- + y : predictions of probabilities for target values of X, np.ndarray + """ + # transform + Xt = X + for i in range(len(self.steps_) - 1): + Xt = self.steps_[i][1].transform(X=Xt) + # predict + return self.steps_[-1][1].predict_proba(X=Xt) + + def _fit_transform(self, X, y=None) -> np.ndarray: + """Fit and transform sequences in X. + + Parameters + ---------- + X : data of type self.get_tag("X_inner_type") + + Returns + ------- + Xt : transformed data + """ + self._clone_steps() + + # transform + Xt = X + for i in range(len(self.steps_)): + Xt = self.steps_[i][1].fit_transform(X=Xt, y=y) + return Xt + + def _transform(self, X, y=None) -> np.ndarray: + """Transform sequences in X. + + Parameters + ---------- + X : data of type self.get_tag("X_inner_type") + + Returns + ------- + Xt : transformed data + """ + # transform + Xt = X + for i in range(len(self.steps_)): + Xt = self.steps_[i][1].transform(X=Xt, y=y) + return Xt + + def _clone_steps(self): + if self.random_state is not None: + rng = check_random_state(self.random_state) + self.steps_ = [ + ( + step[0], + _clone_estimator( + step[1], random_state=rng.randint(np.iinfo(np.int32).max) + ), + ) + for step in self._steps + ] + else: + self.steps_ = [(step[0], _clone_estimator(step[1])) for step in self._steps] diff --git a/aeon/transformations/__init__.py b/aeon/transformations/__init__.py index cfb7444ccf..23421c062b 100644 --- a/aeon/transformations/__init__.py +++ b/aeon/transformations/__init__.py @@ -1 +1,7 @@ -"""Transformations.""" +"""Time series transformations.""" + +__all__ = [ + "BaseTransformer", +] + +from aeon.transformations.base import BaseTransformer diff --git a/aeon/transformations/base.py b/aeon/transformations/base.py index f0ae37d008..277559f615 100644 --- a/aeon/transformations/base.py +++ b/aeon/transformations/base.py @@ -7,11 +7,12 @@ import numpy as np import pandas as pd +from sklearn.base import TransformerMixin from aeon.base import BaseAeonEstimator -class BaseTransformer(BaseAeonEstimator): +class BaseTransformer(TransformerMixin, BaseAeonEstimator): """Transformer base class.""" _tags = { @@ -24,8 +25,6 @@ class BaseTransformer(BaseAeonEstimator): @abstractmethod def __init__(self): - self._estimator_type = "transformer" - super().__init__() @abstractmethod diff --git a/aeon/transformations/collection/__init__.py b/aeon/transformations/collection/__init__.py index 11ccc604b0..a970b5e536 100644 --- a/aeon/transformations/collection/__init__.py +++ b/aeon/transformations/collection/__init__.py @@ -1,8 +1,9 @@ """Collection transformations.""" __all__ = [ - # base class and series wrapper + # base class and series broadcaster "BaseCollectionTransformer", + "SeriesToCollectionBroadcaster", # transformers "AutocorrelationFunctionTransformer", "ARCoefficientTransformer", @@ -34,6 +35,9 @@ from aeon.transformations.collection._reduce import Tabularizer from aeon.transformations.collection._rescale import Centerer, MinMaxScaler, Normalizer from aeon.transformations.collection._resize import Resizer +from aeon.transformations.collection._series_broadcaster import ( + SeriesToCollectionBroadcaster, +) from aeon.transformations.collection._slope import SlopeTransformer from aeon.transformations.collection._truncate import Truncator from aeon.transformations.collection.base import BaseCollectionTransformer diff --git a/aeon/transformations/collection/_broadcaster.py b/aeon/transformations/collection/_series_broadcaster.py similarity index 97% rename from aeon/transformations/collection/_broadcaster.py rename to aeon/transformations/collection/_series_broadcaster.py index 742e24d2c1..673217063c 100644 --- a/aeon/transformations/collection/_broadcaster.py +++ b/aeon/transformations/collection/_series_broadcaster.py @@ -45,15 +45,15 @@ def __init__( self, transformer: BaseSeriesTransformer, ) -> None: - # Setting tags before __init__() causes them to be overwritten. Hence we make - # a copy before init from the series transformer, then copy the tags of the - # BaseSeriesTransformer to this BaseCollectionTransformer self.transformer = transformer + + super().__init__() + + # Setting tags before __init__() causes them to be overwritten. tags_to_keep = SeriesToCollectionBroadcaster._tags tags_to_add = transformer.get_tags() for key in tags_to_keep: tags_to_add.pop(key, None) - super().__init__() self.set_tags(**tags_to_add) def _fit(self, X, y=None): diff --git a/aeon/transformations/collection/compose/tests/test_pipeline.py b/aeon/transformations/collection/compose/tests/test_pipeline.py index a3a8ca2d1e..9834a74fcb 100644 --- a/aeon/transformations/collection/compose/tests/test_pipeline.py +++ b/aeon/transformations/collection/compose/tests/test_pipeline.py @@ -1,7 +1,5 @@ """Unit tests for clustering pipeline.""" -__maintainer__ = ["MatthewMiddlehurst"] - import pytest from numpy.testing import assert_array_almost_equal from sklearn.preprocessing import StandardScaler @@ -44,13 +42,16 @@ def test_collection_transform_pipeline(transformers): pipeline.fit(X, y) Xt = pipeline.transform(X) + pipeline2 = CollectionTransformerPipeline(transformers=transformers) + Xt2 = pipeline2.fit_transform(X, y) + if not isinstance(transformers, list): transformers = [transformers] - for t in transformers: X = t.fit_transform(X, y) assert_array_almost_equal(Xt, X) + assert_array_almost_equal(Xt2, X) def test_unequal_tag_inference(): diff --git a/aeon/transformations/collection/tests/test_broadcaster.py b/aeon/transformations/collection/tests/test_broadcaster.py index d8e2cc6be4..5ab6810790 100644 --- a/aeon/transformations/collection/tests/test_broadcaster.py +++ b/aeon/transformations/collection/tests/test_broadcaster.py @@ -1,7 +1,5 @@ """Tests for SeriesToCollectionBroadcaster transformer.""" -__maintainer__ = ["baraline"] - import pytest from numpy.testing import assert_array_almost_equal @@ -14,7 +12,9 @@ MockSeriesTransformerNoFit, MockUnivariateSeriesTransformer, ) -from aeon.transformations.collection._broadcaster import SeriesToCollectionBroadcaster +from aeon.transformations.collection._series_broadcaster import ( + SeriesToCollectionBroadcaster, +) def test_broadcaster_tag_inheritance(): @@ -41,10 +41,9 @@ def test_broadcaster_tag_inheritance(): assert post_constructor_tags[key] == mock_tags[key] -df = [make_example_3d_numpy, make_example_3d_numpy_list] - - -@pytest.mark.parametrize("data_gen", df) +@pytest.mark.parametrize( + "data_gen", [make_example_3d_numpy, make_example_3d_numpy_list] +) def test_broadcaster_methods_univariate(data_gen): """Test the broadcaster fit, transform and inverse transform method.""" X, y = data_gen(n_channels=1) @@ -64,7 +63,9 @@ def test_broadcaster_methods_univariate(data_gen): assert_array_almost_equal(X[i], X2[i]) -@pytest.mark.parametrize("data_gen", df) +@pytest.mark.parametrize( + "data_gen", [make_example_3d_numpy, make_example_3d_numpy_list] +) def test_broadcaster_methods_multivariate(data_gen): """Test the broadcaster fit, transform and inverse transform method.""" X, y = data_gen(n_channels=3) @@ -86,7 +87,7 @@ def test_broadcaster_methods_multivariate(data_gen): @pytest.mark.parametrize( "data_gen", - [make_example_3d_numpy], + [make_example_3d_numpy, make_example_3d_numpy_list], ) def test_broadcaster_no_fit(data_gen): """Test the wrapper for transformers with fit_empty.""" diff --git a/aeon/transformations/series/__init__.py b/aeon/transformations/series/__init__.py index 8b71ba9fc8..321ad8b7d0 100644 --- a/aeon/transformations/series/__init__.py +++ b/aeon/transformations/series/__init__.py @@ -3,6 +3,7 @@ __all__ = [ "AutoCorrelationSeriesTransformer", "BaseSeriesTransformer", + "CollectionToSeriesWrapper", "ClaSPTransformer", "DFTSeriesTransformer", "Dobin", @@ -10,6 +11,7 @@ "GaussSeriesTransformer", "MatrixProfileSeriesTransformer", "MovingAverageSeriesTransformer", + "LogTransformer", "PLASeriesTransformer", "SGSeriesTransformer", "StatsModelsACF", @@ -31,10 +33,12 @@ from aeon.transformations.series._bkfilter import BKFilter from aeon.transformations.series._boxcox import BoxCoxTransformer from aeon.transformations.series._clasp import ClaSPTransformer +from aeon.transformations.series._collection_wrapper import CollectionToSeriesWrapper from aeon.transformations.series._dft import DFTSeriesTransformer from aeon.transformations.series._dobin import Dobin from aeon.transformations.series._exp_smoothing import ExpSmoothingSeriesTransformer from aeon.transformations.series._gauss import GaussSeriesTransformer +from aeon.transformations.series._log import LogTransformer from aeon.transformations.series._matrix_profile import MatrixProfileSeriesTransformer from aeon.transformations.series._moving_average import MovingAverageSeriesTransformer from aeon.transformations.series._pca import PCASeriesTransformer diff --git a/aeon/transformations/series/_boxcox.py b/aeon/transformations/series/_boxcox.py index 4c0f20af46..a722c348eb 100644 --- a/aeon/transformations/series/_boxcox.py +++ b/aeon/transformations/series/_boxcox.py @@ -1,7 +1,7 @@ -"""Box-Cox and Log Transformations.""" +"""Box-Cox transformation.""" __maintainer__ = ["TonyBagnall"] -__all__ = ["BoxCoxTransformer", "LogTransformer"] +__all__ = ["BoxCoxTransformer"] import numpy as np from scipy import optimize, special, stats @@ -184,90 +184,6 @@ def _inverse_transform(self, X, y=None): return Xt -class LogTransformer(BaseSeriesTransformer): - """Natural logarithm transformation. - - The Natural logarithm transformation can be used to make the data more normally - distributed and stabilize its variance. - - Transforms each data point x to log(scale *(x+offset)) - - Parameters - ---------- - offset : float , default = 0 - Additive constant applied to all the data. - scale : float , default = 1 - Multiplicative scaling constant applied to all the data. - - - Notes - ----- - The log transformation is applied as :math:`ln(y)`. - - Examples - -------- - >>> from aeon.transformations.series._boxcox import LogTransformer - >>> from aeon.datasets import load_airline - >>> y = load_airline() - >>> transformer = LogTransformer() - >>> y_hat = transformer.fit_transform(y) - """ - - _tags = { - "X_inner_type": "np.ndarray", - "fit_is_empty": True, - "capability:multivariate": True, - "capability:inverse_transform": True, - } - - def __init__(self, offset=0, scale=1): - self.offset = offset - self.scale = scale - super().__init__(axis=1) - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - private _transform containing the core logic, called from transform - - Parameters - ---------- - X : 2D np.ndarray - Data to be transformed - y : ignored argument for interface compatibility - Additional data, e.g., labels for transformation - - Returns - ------- - Xt : 2D np.ndarray - transformed version of X - """ - offset = self.offset - scale = self.scale - Xt = np.log(scale * (X + offset)) - return Xt - - def _inverse_transform(self, X, y=None): - """Inverse transform X and return an inverse transformed version. - - core logic - - Parameters - ---------- - X : 2D np.ndarray - Data to be transformed - y : ignored argument for interface compatibility - Additional data, e.g., labels for transformation - - Returns - ------- - Xt : 2D np.ndarray - inverse transformed version of X - """ - Xt = (np.exp(X) / self.scale) - self.offset - return Xt - - def _make_boxcox_optimizer(bounds=None, brack=(-2.0, 2.0)): # bounds is None, use simple Brent optimisation if bounds is None: diff --git a/aeon/transformations/series/_collection_wrapper.py b/aeon/transformations/series/_collection_wrapper.py new file mode 100644 index 0000000000..bb68401ec0 --- /dev/null +++ b/aeon/transformations/series/_collection_wrapper.py @@ -0,0 +1,86 @@ +"""Class to wrap a collection transformer for single series.""" + +__maintainer__ = ["MatthewMiddlehurst"] +__all__ = ["CollectionToSeriesWrapper"] + + +from aeon.transformations.collection.base import BaseCollectionTransformer +from aeon.transformations.series.base import BaseSeriesTransformer + + +class CollectionToSeriesWrapper(BaseSeriesTransformer): + """Wrap a ``BaseCollectionTransformer`` to run on single series datatypes. + + Parameters + ---------- + transformer : BaseCollectionTransformer + The collection transformer to wrap. + + Examples + -------- + >>> from aeon.transformations.collection._resize import Resizer + >>> import numpy as np + >>> X = np.random.rand(1, 10) + >>> transformer = Resizer(length=5) + >>> wrapper = CollectionToSeriesWrapper(transformer) + >>> X_t = wrapper.fit_transform(X) + """ + + # These tags are not set from the collection transformer. + _tags = { + "input_data_type": "Series", + "output_data_type": "Series", + "X_inner_type": "numpy2D", + } + + def __init__( + self, + transformer: BaseCollectionTransformer, + ) -> None: + self.transformer = transformer + + super().__init__(axis=1) + + # Setting tags before __init__() causes them to be overwritten. + tags_to_keep = CollectionToSeriesWrapper._tags + tags_to_add = transformer.get_tags() + for key in tags_to_keep: + tags_to_add.pop(key, None) + self.set_tags(**tags_to_add) + + def _fit(self, X, y=None): + X = X.reshape(1, X.shape[0], X.shape[1]) + self.collection_transformer_ = self.transformer.clone() + self.collection_transformer_.fit(X, y) + + def _transform(self, X, y=None): + X = X.reshape(1, X.shape[0], X.shape[1]) + + t = self.transformer + if not self.get_tag("fit_is_empty"): + t = self.collection_transformer_ + + return t.transform(X, y) + + @classmethod + def _get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + Returns + ------- + params : dict or list of dict, default={} + Parameters to create testing instances of the class. + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + """ + from aeon.testing.mock_estimators._mock_collection_transformers import ( + MockCollectionTransformer, + ) + + return {"transformer": MockCollectionTransformer()} diff --git a/aeon/transformations/series/_log.py b/aeon/transformations/series/_log.py new file mode 100644 index 0000000000..9e8835d4c2 --- /dev/null +++ b/aeon/transformations/series/_log.py @@ -0,0 +1,92 @@ +"""Log transformation.""" + +__maintainer__ = ["TonyBagnall"] +__all__ = ["LogTransformer"] + +import numpy as np + +from aeon.transformations.series import BaseSeriesTransformer + + +class LogTransformer(BaseSeriesTransformer): + """Natural logarithm transformation. + + The Natural logarithm transformation can be used to make the data more normally + distributed and stabilize its variance. + + Transforms each data point x to log(scale *(x+offset)) + + Parameters + ---------- + offset : float , default = 0 + Additive constant applied to all the data. + scale : float , default = 1 + Multiplicative scaling constant applied to all the data. + + + Notes + ----- + The log transformation is applied as :math:`ln(y)`. + + Examples + -------- + >>> from aeon.transformations.series._log import LogTransformer + >>> from aeon.datasets import load_airline + >>> y = load_airline() + >>> transformer = LogTransformer() + >>> y_hat = transformer.fit_transform(y) + """ + + _tags = { + "X_inner_type": "np.ndarray", + "fit_is_empty": True, + "capability:multivariate": True, + "capability:inverse_transform": True, + } + + def __init__(self, offset=0, scale=1): + self.offset = offset + self.scale = scale + super().__init__(axis=1) + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + private _transform containing the core logic, called from transform + + Parameters + ---------- + X : 2D np.ndarray + Data to be transformed + y : ignored argument for interface compatibility + Additional data, e.g., labels for transformation + + Returns + ------- + Xt : 2D np.ndarray + transformed version of X + """ + offset = self.offset + scale = self.scale + Xt = np.log(scale * (X + offset)) + return Xt + + def _inverse_transform(self, X, y=None): + """Inverse transform X and return an inverse transformed version. + + core logic + + Parameters + ---------- + X : 2D np.ndarray + Data to be transformed + y : ignored argument for interface compatibility + Additional data, e.g., labels for transformation + + Returns + ------- + Xt : 2D np.ndarray + inverse transformed version of X + """ + Xt = (np.exp(X) / self.scale) - self.offset + return Xt diff --git a/aeon/transformations/series/compose/__init__.py b/aeon/transformations/series/compose/__init__.py new file mode 100644 index 0000000000..1fd4cd3d38 --- /dev/null +++ b/aeon/transformations/series/compose/__init__.py @@ -0,0 +1,8 @@ +"""Compositions for series transforms.""" + +__all__ = [ + "SeriesTransformerPipeline", + "SeriesId", +] + +from aeon.transformations.series.compose._identity import SeriesId diff --git a/aeon/transformations/series/compose/_identity.py b/aeon/transformations/series/compose/_identity.py new file mode 100644 index 0000000000..2c6a592ef4 --- /dev/null +++ b/aeon/transformations/series/compose/_identity.py @@ -0,0 +1,25 @@ +"""Identity transformer.""" + +from aeon.transformations.series import BaseSeriesTransformer +from aeon.utils.data_types import SERIES_DATA_TYPES + + +class SeriesId(BaseSeriesTransformer): + """Identity transformer, returns data unchanged in transform/inverse_transform.""" + + _tags = { + "X_inner_type": SERIES_DATA_TYPES, + "fit_is_empty": True, + "capability:inverse_transform": True, + "capability:multivariate": True, + "capability:missing_values": True, + } + + def __init__(self): + super().__init__(axis=1) + + def _transform(self, X, y=None): + return X + + def _inverse_transform(self, X, y=None): + return X diff --git a/aeon/transformations/series/compose/_pipeline.py b/aeon/transformations/series/compose/_pipeline.py new file mode 100644 index 0000000000..0a64f075de --- /dev/null +++ b/aeon/transformations/series/compose/_pipeline.py @@ -0,0 +1,102 @@ +"""Pipeline with series transformers.""" + +__maintainer__ = ["MatthewMiddlehurst"] +__all__ = ["SeriesTransformerPipeline"] + +from aeon.base._estimators.compose.series_pipeline import BaseSeriesPipeline +from aeon.transformations.series import BaseSeriesTransformer +from aeon.transformations.series.compose import SeriesId + + +class SeriesTransformerPipeline(BaseSeriesPipeline, BaseSeriesTransformer): + """Pipeline of series transformers. + + The `SeriesTransformerPipeline` compositor chains transformers. + The pipeline is constructed with a list of aeon transformers, + i.e., estimators following the BaseTransformer interface. + The transformer list can be unnamed - a simple list of transformers - + or string named - a list of pairs of string, estimator. + + For a list of transformers `trafo1`, `trafo2`, ..., `trafoN`, + the pipeline behaves as follows: + `fit(X, y)` - changes state by running `trafo1.fit_transform` on `X`, + them `trafo2.fit_transform` on the output of `trafo1.fit_transform`, etc + sequentially, with `trafo[i]` receiving the output of `trafo[i-1]`, + and then running `trafo[N].fit` with `X` being the output of `trafo[N-1]`, + and `y` identical with the input to `self.fit` + `transform(X, y)` - result is of executing `trafo1.transform`, `trafo2.transform`, + etc with `trafo[i].transform` input = output of `trafo[i-1].transform`, + then running `trafo[N].transform` on the output of `trafo[N-1].transform`, + and returning the output. + + Parameters + ---------- + transformers : aeon or sklearn transformer or list of transformers + A transform or list of transformers. + List of tuples (str, transformer) of transformers can also be passed, where + the str is used to name the transformer. + The objecst are cloned prior, as such the state of the input will not be + modified by fitting the pipeline. + + Attributes + ---------- + steps_ : list of tuples (str, estimator) of transformers + Clones of transformers which are fitted in the pipeline. + Will always be in (str, estimator) format, even if transformers input is a + singular transform or list of transformers. + + Examples + -------- + >>> from aeon.transformations.series import LogTransformer + >>> from aeon.transformations.series import MovingAverageSeriesTransformer + >>> from aeon.datasets import load_airline + >>> from aeon.transformations.series.compose import SeriesTransformerPipeline + >>> X = load_airline() + >>> pipeline = SeriesTransformerPipeline( + ... [LogTransformer, MovingAverageSeriesTransformer] + ... ) + >>> pipeline.fit(X) + SeriesTransformerPipeline(...) + >>> Xt = pipeline.transform(X) + """ + + _tags = { + "X_inner_type": "numpy2D", + } + + def __init__(self, transformers): + if not isinstance(transformers, list): + transformers = [SeriesId(), transformers] + elif len(transformers) < 2: + transformers = [SeriesId(), transformers[0]] + + super().__init__(transformers=transformers, _estimator=None) + + @classmethod + def _get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + Returns + ------- + params : dict or list of dict, default={} + Parameters to create testing instances of the class. + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + """ + from aeon.transformations.series import ( + LogTransformer, + MovingAverageSeriesTransformer, + ) + + return { + "transformers": [ + LogTransformer(), + MovingAverageSeriesTransformer(), + ] + } diff --git a/aeon/transformations/series/compose/tests/__init__.py b/aeon/transformations/series/compose/tests/__init__.py new file mode 100644 index 0000000000..363dad1ea8 --- /dev/null +++ b/aeon/transformations/series/compose/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for composable series transformers.""" diff --git a/aeon/transformations/series/compose/tests/test_pipeline.py b/aeon/transformations/series/compose/tests/test_pipeline.py new file mode 100644 index 0000000000..9834a74fcb --- /dev/null +++ b/aeon/transformations/series/compose/tests/test_pipeline.py @@ -0,0 +1,188 @@ +"""Unit tests for clustering pipeline.""" + +import pytest +from numpy.testing import assert_array_almost_equal +from sklearn.preprocessing import StandardScaler + +from aeon.testing.data_generation import ( + make_example_3d_numpy, + make_example_3d_numpy_list, +) +from aeon.testing.mock_estimators import MockCollectionTransformer +from aeon.transformations.collection import ( + AutocorrelationFunctionTransformer, + HOG1DTransformer, + Normalizer, + Padder, + Tabularizer, +) +from aeon.transformations.collection.compose import CollectionTransformerPipeline +from aeon.transformations.collection.feature_based import SevenNumberSummary + + +@pytest.mark.parametrize( + "transformers", + [ + Padder(pad_length=15), + SevenNumberSummary(), + [Padder(pad_length=15), Tabularizer(), StandardScaler()], + [Padder(pad_length=15), SevenNumberSummary()], + [Tabularizer(), StandardScaler(), SevenNumberSummary()], + [ + Padder(pad_length=15), + SevenNumberSummary(), + ], + ], +) +def test_collection_transform_pipeline(transformers): + """Test the collection transform pipeline.""" + X, y = make_example_3d_numpy(n_cases=10, n_timepoints=12) + + pipeline = CollectionTransformerPipeline(transformers=transformers) + pipeline.fit(X, y) + Xt = pipeline.transform(X) + + pipeline2 = CollectionTransformerPipeline(transformers=transformers) + Xt2 = pipeline2.fit_transform(X, y) + + if not isinstance(transformers, list): + transformers = [transformers] + for t in transformers: + X = t.fit_transform(X, y) + + assert_array_almost_equal(Xt, X) + assert_array_almost_equal(Xt2, X) + + +def test_unequal_tag_inference(): + """Test that CollectionTransformerPipeline infers unequal length tag correctly.""" + X, y = make_example_3d_numpy_list( + n_cases=10, min_n_timepoints=8, max_n_timepoints=12 + ) + + t1 = SevenNumberSummary() + t2 = Padder() + t3 = Normalizer() + t4 = AutocorrelationFunctionTransformer(n_lags=5) + t5 = StandardScaler() + t6 = Tabularizer() + + assert t1.get_tag("capability:unequal_length") + assert t1.get_tag("output_data_type") == "Tabular" + assert t2.get_tag("capability:unequal_length") + assert t2.get_tag("removes_unequal_length") + assert not t2.get_tag("output_data_type") == "Tabular" + assert t3.get_tag("capability:unequal_length") + assert not t3.get_tag("removes_unequal_length") + assert not t3.get_tag("output_data_type") == "Tabular" + assert not t4.get_tag("capability:unequal_length") + + # all handle unequal length + p1 = CollectionTransformerPipeline(transformers=t3) + assert p1.get_tag("capability:unequal_length") + p1.fit(X, y) + + # transformer chain removes unequal length + p2 = CollectionTransformerPipeline(transformers=[t3, t2]) + assert p2.get_tag("capability:unequal_length") + p2.fit(X, y) + + # transformer chain removes unequal length (sklearn) + p3 = CollectionTransformerPipeline(transformers=[t3, t2, t6, t5]) + assert p3.get_tag("capability:unequal_length") + p3.fit(X, y) + + # transformers handle unequal length and output is tabular + p4 = CollectionTransformerPipeline(transformers=[t3, t1]) + assert p4.get_tag("capability:unequal_length") + p4.fit(X, y) + + # test they fit even if they cannot handle unequal length + X, y = make_example_3d_numpy(n_cases=10, n_timepoints=12) + + # transformer does not unequal length + p5 = CollectionTransformerPipeline(transformers=t4) + assert not p5.get_tag("capability:unequal_length") + p5.fit(X, y) + + # transformer removes unequal length but prior cannot handle + p6 = CollectionTransformerPipeline(transformers=[t4, t2]) + assert not p6.get_tag("capability:unequal_length") + p6.fit(X, y) + + +def test_missing_tag_inference(): + """Test that CollectionTransformerPipeline infers missing data tag correctly.""" + X, y = make_example_3d_numpy(n_cases=10, n_timepoints=12) + + t1 = MockCollectionTransformer() + t1.set_tags(**{"capability:missing_values": True, "removes_missing_values": True}) + t2 = Normalizer() + t3 = StandardScaler() + t4 = Tabularizer() + + assert t1.get_tag("capability:missing_values") + assert t1.get_tag("removes_missing_values") + assert not t2.get_tag("capability:missing_values") + + # transformer chain removes missing values + p1 = CollectionTransformerPipeline(transformers=t1) + assert p1.get_tag("capability:missing_values") + p1.fit(X, y) + + # transformer removes missing values(sklearn) + p2 = CollectionTransformerPipeline(transformers=[t1, t4, t3]) + assert p2.get_tag("capability:missing_values") + p2.fit(X, y) + + # test they fit even if they cannot handle missing data + X, y = make_example_3d_numpy(n_cases=10, n_timepoints=12) + + # transformers cannot handle missing data + p3 = CollectionTransformerPipeline(transformers=t2) + assert not p3.get_tag("capability:missing_values") + p3.fit(X, y) + + # transformer removes missing values but prior cannot handle + p5 = CollectionTransformerPipeline(transformers=[t2, t1]) + assert not p5.get_tag("capability:missing_values") + p5.fit(X, y) + + +def test_multivariate_tag_inference(): + """Test that CollectionTransformerPipeline infers multivariate tag correctly.""" + X, y = make_example_3d_numpy(n_cases=10, n_channels=2, n_timepoints=12) + + t1 = SevenNumberSummary() + t2 = Normalizer() + t3 = HOG1DTransformer() + t4 = StandardScaler() + + assert t1.get_tag("capability:multivariate") + assert t1.get_tag("output_data_type") == "Tabular" + assert t2.get_tag("capability:multivariate") + assert not t2.get_tag("output_data_type") == "Tabular" + assert not t3.get_tag("capability:multivariate") + + # all handle multivariate + p1 = CollectionTransformerPipeline(transformers=t2) + assert p1.get_tag("capability:multivariate") + p1.fit(X, y) + + # transformers handle multivariate and output is tabular + p2 = CollectionTransformerPipeline(transformers=[t1, t4]) + assert p2.get_tag("capability:multivariate") + p2.fit(X, y) + + # test they fit even if they cannot handle multivariate + X, y = make_example_3d_numpy(n_cases=10, n_timepoints=12) + + # transformer does not handle multivariate + p3 = CollectionTransformerPipeline(transformers=t3) + assert not p3.get_tag("capability:multivariate") + p3.fit(X, y) + + # transformer converts multivariate to tabular but prior cannot handle + p4 = CollectionTransformerPipeline(transformers=[t3, t1]) + assert not p4.get_tag("capability:multivariate") + p4.fit(X, y) diff --git a/aeon/transformations/series/tests/test_boxcox.py b/aeon/transformations/series/tests/test_boxcox.py index 6274924ddb..49fe0ebeef 100644 --- a/aeon/transformations/series/tests/test_boxcox.py +++ b/aeon/transformations/series/tests/test_boxcox.py @@ -1,8 +1,5 @@ """Tests for BoxCoxTransformer.""" -__maintainer__ = [] -__all__ = [] - import numpy as np import pytest from scipy.stats import boxcox diff --git a/aeon/transformations/series/tests/test_wrapper.py b/aeon/transformations/series/tests/test_wrapper.py new file mode 100644 index 0000000000..a8529857b3 --- /dev/null +++ b/aeon/transformations/series/tests/test_wrapper.py @@ -0,0 +1,24 @@ +"""Tests for SeriesToCollectionBroadcaster transformer.""" + +from aeon.testing.mock_estimators import MockCollectionTransformer +from aeon.transformations.series import CollectionToSeriesWrapper + + +def test_broadcaster_tag_inheritance(): + """Test the ability to inherit tags from the BaseCollectionTransformer. + + The broadcaster should always keep some tags related to single series + """ + trans = MockCollectionTransformer() + class_tags = CollectionToSeriesWrapper._tags + + bc = CollectionToSeriesWrapper(trans) + + post_constructor_tags = bc.get_tags() + mock_tags = trans.get_tags() + # constructor_tags should match class_tags or, if not present, tags in transformer + for key in post_constructor_tags: + if key in class_tags: + assert post_constructor_tags[key] == class_tags[key] + elif key in mock_tags: + assert post_constructor_tags[key] == mock_tags[key] From 494e3c6a62e9c39cc1c20e1369b0c6d2effcf3cf Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Wed, 21 May 2025 23:22:23 +0100 Subject: [PATCH 2/6] transform changes --- aeon/anomaly_detection/base.py | 2 +- aeon/base/_base_series.py | 8 +- aeon/base/_compose.py | 9 +- aeon/base/_estimators/compose/_commons.py | 41 +++++++ .../compose/collection_channel_ensemble.py | 16 +-- .../compose/collection_ensemble.py | 2 +- .../compose/collection_pipeline.py | 13 +- .../_estimators/compose/series_pipeline.py | 19 ++- .../compose/_channel_ensemble.py | 5 +- aeon/forecasting/base.py | 2 +- aeon/pipeline/_sklearn_to_aeon.py | 15 +-- aeon/pipeline/tests/test_sklearn_to_aeon.py | 6 +- aeon/segmentation/base.py | 2 +- .../_yield_anomaly_detection_checks.py | 2 +- .../_yield_forecasting_checks.py | 2 +- .../_yield_segmentation_checks.py | 2 +- .../_yield_transformation_checks.py | 3 +- aeon/testing/utils/estimator_checks.py | 5 +- .../collection/compose/_pipeline.py | 5 +- .../collection/tests/test_broadcaster.py | 2 +- .../series/_collection_wrapper.py | 18 ++- aeon/transformations/series/_log.py | 2 +- .../series/compose/__init__.py | 1 + .../series/compose/_identity.py | 4 +- .../series/compose/_pipeline.py | 5 +- aeon/utils/data_types.py | 111 +++++++++++++----- 26 files changed, 202 insertions(+), 100 deletions(-) create mode 100644 aeon/base/_estimators/compose/_commons.py diff --git a/aeon/anomaly_detection/base.py b/aeon/anomaly_detection/base.py index 2e333cf755..8d3b4454ca 100644 --- a/aeon/anomaly_detection/base.py +++ b/aeon/anomaly_detection/base.py @@ -10,7 +10,7 @@ import pandas as pd from aeon.base import BaseSeriesEstimator -from aeon.base._base_series import VALID_SERIES_INPUT_TYPES +from aeon.utils.data_types import VALID_SERIES_INPUT_TYPES class BaseAnomalyDetector(BaseSeriesEstimator): diff --git a/aeon/base/_base_series.py b/aeon/base/_base_series.py index f46091142a..be28009925 100644 --- a/aeon/base/_base_series.py +++ b/aeon/base/_base_series.py @@ -13,13 +13,7 @@ import pandas as pd from aeon.base._base import BaseAeonEstimator - -# allowed input and internal data types for Series -VALID_SERIES_INNER_TYPES = [ - "np.ndarray", - "pd.DataFrame", -] -VALID_SERIES_INPUT_TYPES = [pd.DataFrame, pd.Series, np.ndarray] +from aeon.utils.data_types import VALID_SERIES_INNER_TYPES class BaseSeriesEstimator(BaseAeonEstimator): diff --git a/aeon/base/_compose.py b/aeon/base/_compose.py index 8661245806..3fd1b3ff1d 100644 --- a/aeon/base/_compose.py +++ b/aeon/base/_compose.py @@ -5,7 +5,7 @@ from abc import ABC, abstractmethod -from aeon.base import BaseAeonEstimator +from aeon.base import BaseAeonEstimator, BaseCollectionEstimator from aeon.base._base import _clone_estimator @@ -26,8 +26,11 @@ class ComposableEstimatorMixin(ABC): _fitted_estimators_attr = "estimators_" @abstractmethod - def __init__(self): - super().__init__() + def __init__(self, axis): + if isinstance(self, BaseCollectionEstimator): + super().__init__() + else: + super().__init__(axis=axis) def get_params(self, deep=True): """Get parameters for this estimator. diff --git a/aeon/base/_estimators/compose/_commons.py b/aeon/base/_estimators/compose/_commons.py new file mode 100644 index 0000000000..1a578a0af8 --- /dev/null +++ b/aeon/base/_estimators/compose/_commons.py @@ -0,0 +1,41 @@ +"""Common compose base functions.""" + +from inspect import signature + +import numpy as np + + +def _get_channel(X, key): + """Get time series channel(s) from input data X.""" + if isinstance(X, np.ndarray): + return X[:, key] + else: + li = [x[key] for x in X] + if li[0].ndim == 1: + li = [x.reshape(1, -1) for x in li] + return li + + +def _transform_args_wrapper(estimator, method_name, X, y=None, axis=None): + method = getattr(estimator, method_name) + args = list(signature(method).parameters.keys()) + + has_X = "X" in args + has_y = "y" in args + has_axis = "axis" in args + + # aeon transforms should always have X and y, other transforms i.e. sklearn may + # only have X + if has_X and has_y and has_axis: + return method(X=X, y=y, axis=axis) + elif has_X and has_y: + return method(X=X, y=y) + elif has_X and has_axis: + return method(X=X, axis=axis) + elif has_X: + return method(X=X) + else: + raise ValueError( + f"Method {method_name} of {estimator.__class__.__name__} " + "does not have the required arguments." + ) diff --git a/aeon/base/_estimators/compose/collection_channel_ensemble.py b/aeon/base/_estimators/compose/collection_channel_ensemble.py index 91ed44ef25..04cab42519 100644 --- a/aeon/base/_estimators/compose/collection_channel_ensemble.py +++ b/aeon/base/_estimators/compose/collection_channel_ensemble.py @@ -18,6 +18,7 @@ ComposableEstimatorMixin, ) from aeon.base._base import _clone_estimator +from aeon.base._estimators.compose._commons import _get_channel class BaseCollectionChannelEnsemble(ComposableEstimatorMixin, BaseCollectionEstimator): @@ -98,7 +99,7 @@ def __init__( self._ensemble, clone_estimators=False ) - super().__init__() + super().__init__(axis=1) # can handle missing values if all estimators can missing = all( @@ -241,17 +242,6 @@ def _fit(self, X, y): # fit estimators for i, (_, estimator) in enumerate(self.ensemble_): - estimator.fit(self._get_channel(X, self.channels_[i]), y) + estimator.fit(_get_channel(X, self.channels_[i]), y) return self - - @staticmethod - def _get_channel(X, key): - """Get time series channel(s) from input data X.""" - if isinstance(X, np.ndarray): - return X[:, key] - else: - li = [x[key] for x in X] - if li[0].ndim == 1: - li = [x.reshape(1, -1) for x in li] - return li diff --git a/aeon/base/_estimators/compose/collection_ensemble.py b/aeon/base/_estimators/compose/collection_ensemble.py index 11e9fbc81b..9b20e11007 100644 --- a/aeon/base/_estimators/compose/collection_ensemble.py +++ b/aeon/base/_estimators/compose/collection_ensemble.py @@ -108,7 +108,7 @@ def __init__( self._ensemble, clone_estimators=False ) - super().__init__() + super().__init__(axis=1) # can handle multivariate if all estimators can multivariate = all( diff --git a/aeon/base/_estimators/compose/collection_pipeline.py b/aeon/base/_estimators/compose/collection_pipeline.py index dcc2e5d8ee..73b8ce3167 100644 --- a/aeon/base/_estimators/compose/collection_pipeline.py +++ b/aeon/base/_estimators/compose/collection_pipeline.py @@ -18,6 +18,7 @@ ComposableEstimatorMixin, ) from aeon.base._base import _clone_estimator +from aeon.base._estimators.compose._commons import _transform_args_wrapper class BaseCollectionPipeline(ComposableEstimatorMixin, BaseCollectionEstimator): @@ -77,7 +78,7 @@ def __init__(self, transformers, _estimator, random_state=None): ) self._steps = self._convert_estimators(self._steps, clone_estimators=False) - super().__init__() + super().__init__(axis=1) # can handle multivariate if: both estimator and all transformers can, # *or* transformer chain removes multivariate @@ -218,7 +219,7 @@ def _fit(self, X, y): # fit transforms sequentially Xt = X for i in range(len(self.steps_) - 1): - Xt = self.steps_[i][1].fit_transform(X=Xt, y=y) + Xt = _transform_args_wrapper(self.steps_[i][1], "fit_transform", Xt, y) # fit estimator self.steps_[-1][1].fit(X=Xt, y=y) @@ -238,7 +239,7 @@ def _predict(self, X) -> np.ndarray: # transform Xt = X for i in range(len(self.steps_) - 1): - Xt = self.steps_[i][1].transform(X=Xt) + Xt = _transform_args_wrapper(self.steps_[i][1], "transform", Xt) # predict return self.steps_[-1][1].predict(X=Xt) @@ -260,7 +261,7 @@ def _predict_proba(self, X) -> np.ndarray: # transform Xt = X for i in range(len(self.steps_) - 1): - Xt = self.steps_[i][1].transform(X=Xt) + Xt = _transform_args_wrapper(self.steps_[i][1], "transform", Xt) # predict return self.steps_[-1][1].predict_proba(X=Xt) @@ -280,7 +281,7 @@ def _fit_transform(self, X, y=None) -> np.ndarray: # transform Xt = X for i in range(len(self.steps_)): - Xt = self.steps_[i][1].fit_transform(X=Xt, y=y) + Xt = _transform_args_wrapper(self.steps_[i][1], "fit_transform", Xt, y) return Xt def _transform(self, X, y=None) -> np.ndarray: @@ -297,7 +298,7 @@ def _transform(self, X, y=None) -> np.ndarray: # transform Xt = X for i in range(len(self.steps_)): - Xt = self.steps_[i][1].transform(X=Xt, y=y) + Xt = _transform_args_wrapper(self.steps_[i][1], "transform", Xt, y) return Xt def _clone_steps(self): diff --git a/aeon/base/_estimators/compose/series_pipeline.py b/aeon/base/_estimators/compose/series_pipeline.py index 81e2c0565b..40c448d8bf 100644 --- a/aeon/base/_estimators/compose/series_pipeline.py +++ b/aeon/base/_estimators/compose/series_pipeline.py @@ -18,6 +18,7 @@ ComposableEstimatorMixin, ) from aeon.base._base import _clone_estimator +from aeon.base._estimators.compose._commons import _transform_args_wrapper class BaseSeriesPipeline(ComposableEstimatorMixin, BaseSeriesEstimator): @@ -77,7 +78,7 @@ def __init__(self, transformers, _estimator, random_state=None): ) self._steps = self._convert_estimators(self._steps, clone_estimators=False) - super().__init__() + super().__init__(axis=1) # can handle multivariate if: both estimator and all transformers can, # *or* transformer chain removes multivariate @@ -174,7 +175,9 @@ def _fit(self, X, y): # fit transforms sequentially Xt = X for i in range(len(self.steps_) - 1): - Xt = self.steps_[i][1].fit_transform(X=Xt, y=y) + Xt = _transform_args_wrapper( + self.steps_[i][1], "fit_transform", Xt, y=y, axis=1 + ) # fit estimator self.steps_[-1][1].fit(X=Xt, y=y) @@ -194,7 +197,7 @@ def _predict(self, X) -> np.ndarray: # transform Xt = X for i in range(len(self.steps_) - 1): - Xt = self.steps_[i][1].transform(X=Xt) + Xt = _transform_args_wrapper(self.steps_[i][1], "transform", Xt, axis=1) # predict return self.steps_[-1][1].predict(X=Xt) @@ -216,7 +219,7 @@ def _predict_proba(self, X) -> np.ndarray: # transform Xt = X for i in range(len(self.steps_) - 1): - Xt = self.steps_[i][1].transform(X=Xt) + Xt = _transform_args_wrapper(self.steps_[i][1], "transform", Xt, axis=1) # predict return self.steps_[-1][1].predict_proba(X=Xt) @@ -236,7 +239,9 @@ def _fit_transform(self, X, y=None) -> np.ndarray: # transform Xt = X for i in range(len(self.steps_)): - Xt = self.steps_[i][1].fit_transform(X=Xt, y=y) + Xt = _transform_args_wrapper( + self.steps_[i][1], "fit_transform", Xt, y=y, axis=1 + ) return Xt def _transform(self, X, y=None) -> np.ndarray: @@ -253,7 +258,9 @@ def _transform(self, X, y=None) -> np.ndarray: # transform Xt = X for i in range(len(self.steps_)): - Xt = self.steps_[i][1].transform(X=Xt, y=y) + Xt = _transform_args_wrapper( + self.steps_[i][1], "transform", Xt, y=y, axis=1 + ) return Xt def _clone_steps(self): diff --git a/aeon/classification/compose/_channel_ensemble.py b/aeon/classification/compose/_channel_ensemble.py index 3605debe32..ce3832d25f 100644 --- a/aeon/classification/compose/_channel_ensemble.py +++ b/aeon/classification/compose/_channel_ensemble.py @@ -10,6 +10,7 @@ import numpy as np from sklearn.utils import check_random_state +from aeon.base._estimators.compose._commons import _get_channel from aeon.base._estimators.compose.collection_channel_ensemble import ( BaseCollectionChannelEnsemble, ) @@ -114,14 +115,14 @@ def _predict_proba(self, X) -> np.ndarray: # Call predict on each classifier, add the predictions to the # current probabilities for i, (_, clf) in enumerate(self.ensemble_): - preds = clf.predict(X=self._get_channel(X, self.channels_[i])) + preds = clf.predict(X=_get_channel(X, self.channels_[i])) for n in range(X.shape[0]): dists[n, self._class_dictionary[preds[n]]] += 1 else: # Call predict_proba on each classifier, then add them to the current # probabilities for i, (_, clf) in enumerate(self.ensemble_): - dists += clf.predict_proba(X=self._get_channel(X, self.channels_[i])) + dists += clf.predict_proba(X=_get_channel(X, self.channels_[i])) # Make each instances probability array sum to 1 and return y_proba = dists / dists.sum(axis=1, keepdims=True) diff --git a/aeon/forecasting/base.py b/aeon/forecasting/base.py index cf2db8d80c..3d4fe9e560 100644 --- a/aeon/forecasting/base.py +++ b/aeon/forecasting/base.py @@ -10,7 +10,7 @@ import pandas as pd from aeon.base import BaseSeriesEstimator -from aeon.base._base_series import VALID_SERIES_INNER_TYPES +from aeon.utils.data_types import VALID_SERIES_INNER_TYPES class BaseForecaster(BaseSeriesEstimator): diff --git a/aeon/pipeline/_sklearn_to_aeon.py b/aeon/pipeline/_sklearn_to_aeon.py index e5639572c8..eb9f25b5a2 100644 --- a/aeon/pipeline/_sklearn_to_aeon.py +++ b/aeon/pipeline/_sklearn_to_aeon.py @@ -1,6 +1,7 @@ """Sklearn to aeon coercion utility.""" __maintainer__ = ["MatthewMiddlehurst"] +__all__ = ["sklearn_to_aeon"] from aeon.pipeline._make_pipeline import make_pipeline from aeon.transformations.collection import Tabularizer @@ -9,19 +10,19 @@ def sklearn_to_aeon(estimator): """Coerces an sklearn estimator to the aeon pipeline interface. - Creates a pipeline of two elements, the identity transformer and the estimator. - The identity transformer acts as adapter and holds aeon base class logic. + Creates a pipeline of two elements, the Tabularizer transformer and the estimator. + The Tabularizer transformer acts as adapter and holds aeon base class logic, as + well as converting aeon datatypes to a feature vector format. Multivariate series + will be concatenated into a single feature vector. Data must be of equal length. Parameters ---------- estimator : sklearn compatible estimator - can be classifier, regressor, transformer, clusterer + Can be a classifier, regressor, clusterer, or transformer. Returns ------- - pipe : aeon estimator of corresponding time series type - classifiers, regressors, clusterers are converted to time series counterparts - by flattening time series. Assumes equal length time series. - transformers are converted to time series transformer by application per series + pipe : aeon pipeline estimator + A pipeline of the Tabularizer transformer and input estimator. """ return make_pipeline(Tabularizer(), estimator) diff --git a/aeon/pipeline/tests/test_sklearn_to_aeon.py b/aeon/pipeline/tests/test_sklearn_to_aeon.py index fd47c84434..3cd1ba28cf 100644 --- a/aeon/pipeline/tests/test_sklearn_to_aeon.py +++ b/aeon/pipeline/tests/test_sklearn_to_aeon.py @@ -5,6 +5,7 @@ from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor from sklearn.preprocessing import StandardScaler +from aeon.base import BaseAeonEstimator from aeon.pipeline import sklearn_to_aeon from aeon.testing.data_generation import make_example_3d_numpy @@ -23,11 +24,12 @@ def test_sklearn_to_aeon(estimator): X, y = make_example_3d_numpy() est = sklearn_to_aeon(estimator) + + assert isinstance(est, BaseAeonEstimator) + est.fit(X, y) if hasattr(est, "predict"): est.predict(X) else: est.transform(X) - - assert est._estimator_type == getattr(estimator, "_estimator_type", "transformer") diff --git a/aeon/segmentation/base.py b/aeon/segmentation/base.py index 05f746d53c..d93cbabc5e 100644 --- a/aeon/segmentation/base.py +++ b/aeon/segmentation/base.py @@ -10,7 +10,7 @@ import pandas as pd from aeon.base import BaseSeriesEstimator -from aeon.base._base_series import VALID_SERIES_INPUT_TYPES +from aeon.utils.data_types import VALID_SERIES_INPUT_TYPES class BaseSegmenter(BaseSeriesEstimator): diff --git a/aeon/testing/estimator_checking/_yield_anomaly_detection_checks.py b/aeon/testing/estimator_checking/_yield_anomaly_detection_checks.py index 5f2f05aaa9..3922f61696 100644 --- a/aeon/testing/estimator_checking/_yield_anomaly_detection_checks.py +++ b/aeon/testing/estimator_checking/_yield_anomaly_detection_checks.py @@ -5,11 +5,11 @@ import numpy as np from aeon.base._base import _clone_estimator -from aeon.base._base_series import VALID_SERIES_INNER_TYPES from aeon.testing.data_generation import ( make_example_1d_numpy, make_example_2d_numpy_series, ) +from aeon.utils.data_types import VALID_SERIES_INNER_TYPES def _yield_anomaly_detection_checks(estimator_class, estimator_instances, datatypes): diff --git a/aeon/testing/estimator_checking/_yield_forecasting_checks.py b/aeon/testing/estimator_checking/_yield_forecasting_checks.py index 5c62d2f05d..e8dbaa0dd7 100644 --- a/aeon/testing/estimator_checking/_yield_forecasting_checks.py +++ b/aeon/testing/estimator_checking/_yield_forecasting_checks.py @@ -5,7 +5,7 @@ import numpy as np from aeon.base._base import _clone_estimator -from aeon.base._base_series import VALID_SERIES_INPUT_TYPES +from aeon.utils.data_types import VALID_SERIES_INPUT_TYPES def _yield_forecasting_checks(estimator_class, estimator_instances, datatypes): diff --git a/aeon/testing/estimator_checking/_yield_segmentation_checks.py b/aeon/testing/estimator_checking/_yield_segmentation_checks.py index 054ca56bd4..62d5a1d391 100644 --- a/aeon/testing/estimator_checking/_yield_segmentation_checks.py +++ b/aeon/testing/estimator_checking/_yield_segmentation_checks.py @@ -5,7 +5,7 @@ import numpy as np from aeon.base._base import _clone_estimator -from aeon.base._base_series import VALID_SERIES_INNER_TYPES +from aeon.utils.data_types import VALID_SERIES_INNER_TYPES def _yield_segmentation_checks(estimator_class, estimator_instances, datatypes): diff --git a/aeon/testing/estimator_checking/_yield_transformation_checks.py b/aeon/testing/estimator_checking/_yield_transformation_checks.py index 63538ba2dd..7fb8f00ce0 100644 --- a/aeon/testing/estimator_checking/_yield_transformation_checks.py +++ b/aeon/testing/estimator_checking/_yield_transformation_checks.py @@ -9,7 +9,6 @@ from sklearn.utils._testing import set_random_state from aeon.base._base import _clone_estimator -from aeon.base._base_series import VALID_SERIES_INNER_TYPES from aeon.datasets import load_basic_motions, load_unit_test from aeon.testing.expected_results.expected_transform_outputs import ( basic_motions_result, @@ -20,7 +19,7 @@ from aeon.testing.utils.estimator_checks import _run_estimator_method from aeon.transformations.collection.channel_selection.base import BaseChannelSelector from aeon.transformations.series import BaseSeriesTransformer -from aeon.utils.data_types import COLLECTIONS_DATA_TYPES +from aeon.utils.data_types import COLLECTIONS_DATA_TYPES, VALID_SERIES_INNER_TYPES def _yield_transformation_checks(estimator_class, estimator_instances, datatypes): diff --git a/aeon/testing/utils/estimator_checks.py b/aeon/testing/utils/estimator_checks.py index d556ff0249..30707837fe 100644 --- a/aeon/testing/utils/estimator_checks.py +++ b/aeon/testing/utils/estimator_checks.py @@ -2,8 +2,7 @@ __maintainer__ = ["MatthewMiddlehurst"] -import inspect -from inspect import isclass +from inspect import isclass, signature import numpy as np @@ -14,7 +13,7 @@ def _run_estimator_method(estimator, method_name, datatype, split): method = getattr(estimator, method_name) - args = inspect.getfullargspec(method)[0] + args = list(signature(method).parameters.keys()) try: # forecasting if "y" in args and "exog" in args: diff --git a/aeon/transformations/collection/compose/_pipeline.py b/aeon/transformations/collection/compose/_pipeline.py index 796450706d..9e0acafdd8 100644 --- a/aeon/transformations/collection/compose/_pipeline.py +++ b/aeon/transformations/collection/compose/_pipeline.py @@ -49,11 +49,10 @@ class CollectionTransformerPipeline(BaseCollectionPipeline, BaseCollectionTransf Examples -------- >>> from aeon.transformations.collection import Resizer - >>> from aeon.transformations.collection.feature_based import ( - ... SevenNumberSummary) + >>> from aeon.transformations.collection.feature_based import SevenNumberSummary >>> from aeon.datasets import load_unit_test >>> from aeon.transformations.collection.compose import ( - ... CollectionTransformerPipeline) + ... CollectionTransformerPipeline) >>> X, y = load_unit_test(split="train") >>> pipeline = CollectionTransformerPipeline( ... [Resizer(length=10), SevenNumberSummary()] diff --git a/aeon/transformations/collection/tests/test_broadcaster.py b/aeon/transformations/collection/tests/test_broadcaster.py index 5ab6810790..3508924679 100644 --- a/aeon/transformations/collection/tests/test_broadcaster.py +++ b/aeon/transformations/collection/tests/test_broadcaster.py @@ -87,7 +87,7 @@ def test_broadcaster_methods_multivariate(data_gen): @pytest.mark.parametrize( "data_gen", - [make_example_3d_numpy, make_example_3d_numpy_list], + [make_example_3d_numpy], ) def test_broadcaster_no_fit(data_gen): """Test the wrapper for transformers with fit_empty.""" diff --git a/aeon/transformations/series/_collection_wrapper.py b/aeon/transformations/series/_collection_wrapper.py index bb68401ec0..77b095a5a1 100644 --- a/aeon/transformations/series/_collection_wrapper.py +++ b/aeon/transformations/series/_collection_wrapper.py @@ -30,7 +30,7 @@ class CollectionToSeriesWrapper(BaseSeriesTransformer): _tags = { "input_data_type": "Series", "output_data_type": "Series", - "X_inner_type": "numpy2D", + "X_inner_type": "np.ndarray", } def __init__( @@ -46,6 +46,8 @@ def __init__( tags_to_add = transformer.get_tags() for key in tags_to_keep: tags_to_add.pop(key, None) + for key in ["capability:unequal_length", "removes_unequal_length"]: + tags_to_add.pop(key, None) self.set_tags(**tags_to_add) def _fit(self, X, y=None): @@ -62,6 +64,20 @@ def _transform(self, X, y=None): return t.transform(X, y) + def _fit_transform(self, X, y=None): + X = X.reshape(1, X.shape[0], X.shape[1]) + self.collection_transformer_ = self.transformer.clone() + return self.collection_transformer_.fit_transform(X, y) + + def _inverse_transform(self, X, y=None): + X = X.reshape(1, X.shape[0], X.shape[1]) + + t = self.transformer + if not self.get_tag("fit_is_empty"): + t = self.collection_transformer_ + + return t.inverse_transform(X, y) + @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. diff --git a/aeon/transformations/series/_log.py b/aeon/transformations/series/_log.py index 9e8835d4c2..45d72664f5 100644 --- a/aeon/transformations/series/_log.py +++ b/aeon/transformations/series/_log.py @@ -5,7 +5,7 @@ import numpy as np -from aeon.transformations.series import BaseSeriesTransformer +from aeon.transformations.series.base import BaseSeriesTransformer class LogTransformer(BaseSeriesTransformer): diff --git a/aeon/transformations/series/compose/__init__.py b/aeon/transformations/series/compose/__init__.py index 1fd4cd3d38..4855a210ee 100644 --- a/aeon/transformations/series/compose/__init__.py +++ b/aeon/transformations/series/compose/__init__.py @@ -6,3 +6,4 @@ ] from aeon.transformations.series.compose._identity import SeriesId +from aeon.transformations.series.compose._pipeline import SeriesTransformerPipeline diff --git a/aeon/transformations/series/compose/_identity.py b/aeon/transformations/series/compose/_identity.py index 2c6a592ef4..d9135fae36 100644 --- a/aeon/transformations/series/compose/_identity.py +++ b/aeon/transformations/series/compose/_identity.py @@ -1,14 +1,14 @@ """Identity transformer.""" from aeon.transformations.series import BaseSeriesTransformer -from aeon.utils.data_types import SERIES_DATA_TYPES +from aeon.utils.data_types import VALID_SERIES_INNER_TYPES class SeriesId(BaseSeriesTransformer): """Identity transformer, returns data unchanged in transform/inverse_transform.""" _tags = { - "X_inner_type": SERIES_DATA_TYPES, + "X_inner_type": VALID_SERIES_INNER_TYPES, "fit_is_empty": True, "capability:inverse_transform": True, "capability:multivariate": True, diff --git a/aeon/transformations/series/compose/_pipeline.py b/aeon/transformations/series/compose/_pipeline.py index 0a64f075de..0f1aaef6be 100644 --- a/aeon/transformations/series/compose/_pipeline.py +++ b/aeon/transformations/series/compose/_pipeline.py @@ -6,6 +6,7 @@ from aeon.base._estimators.compose.series_pipeline import BaseSeriesPipeline from aeon.transformations.series import BaseSeriesTransformer from aeon.transformations.series.compose import SeriesId +from aeon.utils.data_types import VALID_SERIES_INNER_TYPES class SeriesTransformerPipeline(BaseSeriesPipeline, BaseSeriesTransformer): @@ -53,7 +54,7 @@ class SeriesTransformerPipeline(BaseSeriesPipeline, BaseSeriesTransformer): >>> from aeon.transformations.series.compose import SeriesTransformerPipeline >>> X = load_airline() >>> pipeline = SeriesTransformerPipeline( - ... [LogTransformer, MovingAverageSeriesTransformer] + ... [LogTransformer(), MovingAverageSeriesTransformer()] ... ) >>> pipeline.fit(X) SeriesTransformerPipeline(...) @@ -61,7 +62,7 @@ class SeriesTransformerPipeline(BaseSeriesPipeline, BaseSeriesTransformer): """ _tags = { - "X_inner_type": "numpy2D", + "X_inner_type": VALID_SERIES_INNER_TYPES, } def __init__(self, transformers): diff --git a/aeon/utils/data_types.py b/aeon/utils/data_types.py index aa6b14ba49..cb5aa1d292 100644 --- a/aeon/utils/data_types.py +++ b/aeon/utils/data_types.py @@ -7,53 +7,100 @@ the index. Checks of input data are handled in the `aeon.utils.validation` module, -and conversion is handled in the `aeon.utils.conversion` module. +and conversion is handled in the `aeon.utils.conversion` module. """ +__all__ = [ + "SERIES_DATA_TYPES", + "SERIES_MULTIVARIATE_DATA_TYPES", + "VALID_SERIES_INNER_TYPES", + "VALID_SERIES_INPUT_TYPES", + "COLLECTIONS_DATA_TYPES", + "COLLECTIONS_MULTIVARIATE_DATA_TYPES", + "COLLECTIONS_UNEQUAL_DATA_TYPES", + "VALID_COLLECTIONS_INNER_TYPES", + "VALID_COLLECTIONS_INPUT_TYPES", + "HIERARCHICAL_DATA_TYPES", + "ALL_TIME_SERIES_TYPES", +] + +import numpy as np +import pandas as pd + +# SERIES + SERIES_DATA_TYPES = [ - "pd.Series", # univariate time series of shape (n_timepoints) - "pd.DataFrame", # uni/multivariate time series of shape (n_timepoints, - # n_channels) by default or (n_channels, n_timepoints) if set by axis == 1 - "np.ndarray", # uni/multivariate time series of shape (n_timepoints, - # n_channels) by default or (n_channels, n_timepoints) if set by axis ==1 + "pd.Series", # univariate 1D pandas Series time series of shape (n_timepoints) + "pd.DataFrame", # uni/multivariate 2D pandas DataFrame time series of shape + # (n_channels, n_timepoints) by default, or (n_timepoints, n_channels) if set by + # axis == 0 + "np.ndarray", # uni/multivariate 2D numpy ndarray time series of shape + # (n_channels, n_timepoints) by default, or (n_timepoints, n_channels) if set by + # axis == 0 +] + +# subset of series dtypes capable of handling multivariate time series +SERIES_MULTIVARIATE_DATA_TYPES = [ + "pd.DataFrame", + "np.ndarray", +] + +# datatypes which are valid for BaseSeriesEstimator estimators +VALID_SERIES_INNER_TYPES = [ + "np.ndarray", + "pd.DataFrame", ] +VALID_SERIES_INPUT_TYPES = [pd.Series, pd.DataFrame, np.ndarray] + +# COLLECTIONS COLLECTIONS_DATA_TYPES = [ - "numpy3D", # 3D np.ndarray of format (n_cases, n_channels, n_timepoints) - "np-list", # python list of 2D np.ndarray of length [n_cases], - # each of shape (n_channels, n_timepoints_i) - "df-list", # python list of 2D pd.DataFrames of length [n_cases], each - # of shape (n_channels, n_timepoints_i) - "numpy2D", # 2D np.ndarray of shape (n_cases, n_timepoints) - "pd-wide", # 2D pd.DataFrame of shape (n_cases, n_timepoints) - "pd-multiindex", # pd.DataFrame with MultiIndex, index [case, timepoint], - # columns [channel] -] - -# subset of collections capable of handling multivariate time series + "numpy3D", # uni/multivariate 3D numpy ndarray of shape + # (n_cases, n_channels, n_timepoints) + "np-list", # uni/multivariate length [n_cases] Python list of 2D numpy ndarray + # with shape (n_channels, n_timepoints_i) + "df-list", # uni/multivariate length [n_cases] Python list of 2D pandas DataFrame + # with shape (n_channels, n_timepoints_i) + "numpy2D", # univariate 2D numpy ndarray of shape (n_cases, n_timepoints) + "pd-wide", # univariate 2D pandas DataFrame of shape (n_cases, n_timepoints) + "pd-multiindex", # uni/multivariate pandas DataFrame with MultiIndex, + # index [case, timepoint], columns [channel] +] + +# subset of collection dtypes capable of handling multivariate time series COLLECTIONS_MULTIVARIATE_DATA_TYPES = [ - "numpy3D", # 3D np.ndarray of format (n_cases, n_channels, n_timepoints) - "np-list", # python list of 2D np.ndarray of length [n_cases], - # each of shape (n_channels, n_timepoints_i) - "df-list", # python list of 2D pd.DataFrames of length [n_cases], each - # of shape (n_channels, n_timepoints_i) - "pd-multiindex", # pd.DataFrame with MultiIndex, index [case, timepoint], - # columns [channel] + "numpy3D", + "np-list", + "df-list", + "pd-multiindex", ] -# subset of collections capable of handling unequal length time series +# subset of collection dtypes capable of handling unequal length time series COLLECTIONS_UNEQUAL_DATA_TYPES = [ - "np-list", # python list of 2D np.ndarray of length [n_cases], - # each of shape (n_channels, n_timepoints_i) - "df-list", # python list of 2D pd.DataFrames of length [n_cases], each - # of shape (n_channels, n_timepoints_i) - "pd-multiindex", # pd.DataFrame with MultiIndex, index [case, timepoint], - # columns [channel] + "np-list", + "df-list", + "pd-multiindex", +] + +# datatypes which are valid for BaseCollectionEstimator estimators +VALID_COLLECTIONS_INNER_TYPES = [ + "numpy3D", + "np-list", + "df-list", + "numpy2D", + "pd-wide", + "pd-multiindex", ] +VALID_COLLECTIONS_INPUT_TYPES = [list, pd.DataFrame, np.ndarray] + +# HIERARCHICAL + HIERARCHICAL_DATA_TYPES = ["pd_multiindex_hier"] # pd.DataFrame +# ALL + ALL_TIME_SERIES_TYPES = ( SERIES_DATA_TYPES + COLLECTIONS_DATA_TYPES + HIERARCHICAL_DATA_TYPES ) From 8aaafe5ba056f44be3d16ef52e3056aac8d211c6 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Thu, 22 May 2025 19:17:23 +0100 Subject: [PATCH 3/6] fixing tests --- .../estimator_checking/_yield_transformation_checks.py | 3 ++- aeon/testing/testing_config.py | 1 + aeon/transformations/collection/_series_broadcaster.py | 10 ++++++++-- aeon/transformations/series/_collection_wrapper.py | 1 + 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/aeon/testing/estimator_checking/_yield_transformation_checks.py b/aeon/testing/estimator_checking/_yield_transformation_checks.py index 7fb8f00ce0..14bd54fa90 100644 --- a/aeon/testing/estimator_checking/_yield_transformation_checks.py +++ b/aeon/testing/estimator_checking/_yield_transformation_checks.py @@ -198,4 +198,5 @@ def check_transform_inverse_transform_equivalent(estimator, datatype): if isinstance(Xit, (np.ndarray, pd.DataFrame)): Xit = Xit.squeeze() - assert deep_equals(X, Xit, ignore_index=True) + eq, msg = deep_equals(X, Xit, ignore_index=True, return_msg=True) + assert eq, msg diff --git a/aeon/testing/testing_config.py b/aeon/testing/testing_config.py index 3e47dcf08f..c2e427e6f7 100644 --- a/aeon/testing/testing_config.py +++ b/aeon/testing/testing_config.py @@ -53,6 +53,7 @@ "RSAST": ["check_fit_deterministic"], "MatrixProfile": ["check_fit_deterministic", "check_persistence_via_pickle"], "LeftSTAMPi": ["check_anomaly_detector_output"], + "SeriesToCollectionBroadcaster": ["check_transform_inverse_transform_equivalent"], # missed in legacy testing, changes state in predict/transform "FLUSSSegmenter": ["check_non_state_changing_method"], "InformationGainSegmenter": ["check_non_state_changing_method"], diff --git a/aeon/transformations/collection/_series_broadcaster.py b/aeon/transformations/collection/_series_broadcaster.py index 673217063c..f2e65c824e 100644 --- a/aeon/transformations/collection/_series_broadcaster.py +++ b/aeon/transformations/collection/_series_broadcaster.py @@ -38,6 +38,7 @@ class SeriesToCollectionBroadcaster(BaseCollectionTransformer): "input_data_type": "Collection", "output_data_type": "Collection", "capability:unequal_length": True, + "capability:inverse_transform": True, "X_inner_type": ["numpy3D", "np-list"], } @@ -100,7 +101,6 @@ def _transform(self, X, y=None): """ n_cases = get_n_cases(X) - """If fit is empty is true only single transform is used.""" Xt = [] if self.get_tag("fit_is_empty"): for i in range(n_cases): @@ -108,6 +108,7 @@ def _transform(self, X, y=None): else: for i in range(n_cases): Xt.append(self.single_transformers_[i]._transform(X[i])) + # Need to make it a valid collection for i in range(n_cases): if isinstance(Xt[i], np.ndarray) and Xt[i].ndim == 1: @@ -134,7 +135,7 @@ def _inverse_transform(self, X, y=None): The transformed collection of time series, either a 3D numpy or a list of 2D numpy. """ - n_cases = len(X) + n_cases = get_n_cases(X) Xt = [] if self.get_tag("fit_is_empty"): for i in range(n_cases): @@ -142,6 +143,11 @@ def _inverse_transform(self, X, y=None): else: for i in range(n_cases): Xt.append(self.single_transformers_[i]._inverse_transform(X[i])) + + # Need to make it a valid collection + for i in range(n_cases): + if isinstance(Xt[i], np.ndarray) and Xt[i].ndim == 1: + Xt[i] = Xt[i].reshape(1, -1) return Xt @classmethod diff --git a/aeon/transformations/series/_collection_wrapper.py b/aeon/transformations/series/_collection_wrapper.py index 77b095a5a1..af6f9d981b 100644 --- a/aeon/transformations/series/_collection_wrapper.py +++ b/aeon/transformations/series/_collection_wrapper.py @@ -30,6 +30,7 @@ class CollectionToSeriesWrapper(BaseSeriesTransformer): _tags = { "input_data_type": "Series", "output_data_type": "Series", + "capability:inverse_transform": True, "X_inner_type": "np.ndarray", } From daf56f29b7fb0e767b1d1765f8fb4e06a9c34851 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Thu, 12 Jun 2025 18:46:04 +0100 Subject: [PATCH 4/6] skip test --- aeon/testing/testing_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aeon/testing/testing_config.py b/aeon/testing/testing_config.py index fe8f3bafd6..c8ed71ff15 100644 --- a/aeon/testing/testing_config.py +++ b/aeon/testing/testing_config.py @@ -55,6 +55,7 @@ "LeftSTAMPi": ["check_series_anomaly_detector_output"], "SignatureClassifier": ["check_classifier_against_expected_results"], "SeriesToCollectionBroadcaster": ["check_transform_inverse_transform_equivalent"], + "CollectionToSeriesWrapper": ["check_transform_inverse_transform_equivalent"], # missed in legacy testing, changes state in predict/transform "FLUSSSegmenter": ["check_non_state_changing_method"], "ClaSPSegmenter": ["check_non_state_changing_method"], From 91513c0e58498ad12065d4ee4dbc3bf4334d1248 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Thu, 12 Jun 2025 19:09:43 +0100 Subject: [PATCH 5/6] no mixin --- aeon/transformations/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/aeon/transformations/base.py b/aeon/transformations/base.py index 277559f615..2778978625 100644 --- a/aeon/transformations/base.py +++ b/aeon/transformations/base.py @@ -7,12 +7,11 @@ import numpy as np import pandas as pd -from sklearn.base import TransformerMixin from aeon.base import BaseAeonEstimator -class BaseTransformer(TransformerMixin, BaseAeonEstimator): +class BaseTransformer(BaseAeonEstimator): """Transformer base class.""" _tags = { From 5e434dece82a972c7ab22c50376da0b7f8738614 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Thu, 12 Jun 2025 19:25:56 +0100 Subject: [PATCH 6/6] yes mixin --- aeon/transformations/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aeon/transformations/base.py b/aeon/transformations/base.py index 2778978625..277559f615 100644 --- a/aeon/transformations/base.py +++ b/aeon/transformations/base.py @@ -7,11 +7,12 @@ import numpy as np import pandas as pd +from sklearn.base import TransformerMixin from aeon.base import BaseAeonEstimator -class BaseTransformer(BaseAeonEstimator): +class BaseTransformer(TransformerMixin, BaseAeonEstimator): """Transformer base class.""" _tags = {