Skip to content

[ENH] Resolve failing check_fit_deterministic #2575

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 15 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 35 additions & 4 deletions aeon/classification/shapelet_based/_rsast.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
__all__ = ["RSASTClassifier"]

import numpy as np
from deprecated.sphinx import deprecated
from sklearn.linear_model import RidgeClassifierCV
from sklearn.pipeline import make_pipeline

Expand All @@ -30,8 +31,10 @@ class RSASTClassifier(BaseClassifier):
"None"=Extract randomly any length from the TS
nb_inst_per_class : int default = 10
the number of reference time series to select per class
seed : int, default = None
random_state : int, default = None
the seed of the random generator
seed : int, default= None
Deprecated and will be removed in v1.2. Use `random_state` instead.
estimator : sklearn compatible classifier, default = None
if None, a RidgeClassifierCV(alphas=np.logspace(-3, 3, 10)) is used.
n_jobs : int, default -1
Expand Down Expand Up @@ -63,21 +66,49 @@ class RSASTClassifier(BaseClassifier):
"python_dependencies": "statsmodels",
}

# TODO: remove 'seed' in v1.2
@deprecated(
version="1.1",
reason="The 'seed' parameter will be removed in v1.2.",
category=FutureWarning,
)
def __init__(
self,
n_random_points=10,
len_method="both",
nb_inst_per_class=10,
seed=None,
random_state=None,
classifier=None,
n_jobs=1,
seed=None,
):
super().__init__()
self.n_random_points = n_random_points
self.len_method = len_method
self.nb_inst_per_class = nb_inst_per_class
self.n_jobs = n_jobs
# Store the seed parameter (required for sklearn compatibility)
self.seed = seed

# Handle deprecated seed parameter
if seed is not None:
import warnings

warnings.warn(
"The 'seed' parameter is deprecated and will be removed in v1.2. "
"Use 'random_state' instead.",
FutureWarning,
stacklevel=2,
)
if random_state is None:
random_state = seed
else:
raise ValueError(
"Cannot specify both 'seed' and 'random_state'. "
"Use 'random_state' only."
)

self.random_state = random_state
self.classifier = classifier

def _fit(self, X, y):
Expand All @@ -100,7 +131,7 @@ def _fit(self, X, y):
self.n_random_points,
self.len_method,
self.nb_inst_per_class,
self.seed,
self.random_state,
self.n_jobs,
)

Expand All @@ -110,7 +141,7 @@ def _fit(self, X, y):
if self.classifier is None
else self.classifier
),
self.seed,
self.random_state,
)

self._pipeline = make_pipeline(self._transformer, self._classifier)
Expand Down
39 changes: 35 additions & 4 deletions aeon/classification/shapelet_based/_sast.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from operator import itemgetter

import numpy as np
from deprecated.sphinx import deprecated
from sklearn.linear_model import RidgeClassifierCV
from sklearn.pipeline import make_pipeline

Expand All @@ -32,8 +33,10 @@ class SASTClassifier(BaseClassifier):
the stride used when generating subsquences
nb_inst_per_class : int default = 1
the number of reference time series to select per class
seed : int, default = None
random_state : int, default = None
the seed of the random generator
seed : int, default=None
Deprecated and will be removed in v1.2. Use `random_state` instead.
estimator : sklearn compatible classifier, default = None
if None, a RidgeClassifierCV(alphas=np.logspace(-3, 3, 10)) is used.
n_jobs : int, default -1
Expand Down Expand Up @@ -66,22 +69,50 @@ class SASTClassifier(BaseClassifier):
"algorithm_type": "shapelet",
}

# TODO: remove 'seed' in v1.2
@deprecated(
version="1.1",
reason="The 'seed' parameter will be removed in v1.2.",
category=FutureWarning,
)
def __init__(
self,
length_list=None,
stride: int = 1,
nb_inst_per_class: int = 1,
seed: Optional[int] = None,
random_state: Optional[int] = None,
classifier=None,
n_jobs: int = 1,
seed=None,
) -> None:
super().__init__()
self.length_list = length_list
self.stride = stride
self.nb_inst_per_class = nb_inst_per_class
self.n_jobs = n_jobs

# Store the seed parameter (required for sklearn compatibility)
self.seed = seed

# Handle deprecated seed parameter
if seed is not None:
import warnings

warnings.warn(
"The 'seed' parameter is deprecated and will be removed in v1.2. "
"Use 'random_state' instead.",
FutureWarning,
stacklevel=2,
)
if random_state is None:
random_state = seed
else:
raise ValueError(
"Cannot specify both 'seed' and 'random_state'. "
"Use 'random_state' only."
)

self.random_state = random_state
self.classifier = classifier

def _fit(self, X, y):
Expand All @@ -104,7 +135,7 @@ def _fit(self, X, y):
self.length_list,
self.stride,
self.nb_inst_per_class,
self.seed,
self.random_state,
self.n_jobs,
)

Expand All @@ -114,7 +145,7 @@ def _fit(self, X, y):
if self.classifier is None
else self.classifier
),
self.seed,
self.random_state,
)

self._pipeline = make_pipeline(self._transformer, self._classifier)
Expand Down
5 changes: 0 additions & 5 deletions aeon/testing/testing_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,6 @@
"check_persistence_via_pickle",
"check_save_estimators_to_file",
],
# needs investigation
"SASTClassifier": ["check_fit_deterministic"],
"RSASTClassifier": ["check_fit_deterministic"],
"SAST": ["check_fit_deterministic"],
"RSAST": ["check_fit_deterministic"],
"MatrixProfile": ["check_fit_deterministic", "check_persistence_via_pickle"],
"LeftSTAMPi": ["check_anomaly_detector_output"],
# missed in legacy testing, changes state in predict/transform
Expand Down
42 changes: 36 additions & 6 deletions aeon/transformations/collection/shapelet_based/_rsast.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import numpy as np
import pandas as pd
from deprecated.sphinx import deprecated
from numba import get_num_threads, njit, prange, set_num_threads

from aeon.transformations.collection import BaseCollectionTransformer
Expand Down Expand Up @@ -64,8 +65,10 @@ class RSAST(BaseCollectionTransformer):

nb_inst_per_class : int default = 10
the number of reference time series to select per class
seed : int, default = None
random_state : int, default = None
the seed of the random generator
seed : int, default= None
Deprecated and will be removed in v1.2. Use `random_state` instead.
n_jobs : int, default -1
Number of threads to use for the transform.

Expand Down Expand Up @@ -98,26 +101,53 @@ class RSAST(BaseCollectionTransformer):
"python_dependencies": "statsmodels",
}

# TODO: remove 'seed' in v1.2
@deprecated(
version="1.1",
reason="The 'seed' parameter will be removed in v1.2.",
category=FutureWarning,
)
def __init__(
self,
n_random_points: int = 10,
len_method: str = "both",
nb_inst_per_class: int = 10,
seed: Optional[int] = None,
random_state: Optional[int] = None,
n_jobs: int = 1, # Parllel Processing
seed=None,
):
self.n_random_points = n_random_points
self.len_method = len_method
self.nb_inst_per_class = nb_inst_per_class
self.n_jobs = n_jobs
self.seed = seed
self._kernels = None # z-normalized subsequences
self._cand_length_list = {}
self._kernel_orig = []
self._start_points = []
self._classes = []
self._source_series = [] # To store the index of the original time series
self._kernels_generators = {} # Reference time series

# Handle deprecated seed parameter
# Store the seed parameter (required for sklearn compatibility)
self.seed = seed
if seed is not None:
import warnings

warnings.warn(
"The 'seed' parameter is deprecated and will be removed in v1.2. "
"Use 'random_state' instead.",
FutureWarning,
stacklevel=2,
)
if random_state is None:
random_state = seed
else:
raise ValueError(
"Cannot specify both 'seed' and 'random_state'. "
"Use 'random_state' only."
)
self.random_state = random_state
super().__init__()

def _fit(self, X: np.ndarray, y: Union[np.ndarray, list]) -> "RSAST":
Expand All @@ -144,9 +174,9 @@ def _fit(self, X: np.ndarray, y: Union[np.ndarray, list]) -> "RSAST":
X_ = np.reshape(X, (X.shape[0], X.shape[-1]))

self._random_state = (
np.random.RandomState(self.seed)
if not isinstance(self.seed, np.random.RandomState)
else self.seed
np.random.RandomState(self.random_state)
if not isinstance(self.random_state, np.random.RandomState)
else self.random_state
)

classes = np.unique(y)
Expand Down
57 changes: 45 additions & 12 deletions aeon/transformations/collection/shapelet_based/_sast.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Optional, Union

import numpy as np
from deprecated.sphinx import deprecated
from numba import get_num_threads, njit, prange, set_num_threads

from aeon.transformations.collection import BaseCollectionTransformer
Expand Down Expand Up @@ -56,8 +57,10 @@ class SAST(BaseCollectionTransformer):
the stride used when generating subsequences
nb_inst_per_class : int, default = 1
the number of reference time series to select per class
seed : int, default = None
random_state : int, default = None
the seed of the random generator
seed : int, default=None
Deprecated and will be removed in v1.2. Use `random_state` instead.
n_jobs : int, default -1
Number of threads to use for the transform.
The available CPU count is used if this value is less than 1
Expand Down Expand Up @@ -92,27 +95,57 @@ class SAST(BaseCollectionTransformer):
"algorithm_type": "shapelet",
}

# TODO: remove 'seed' in v1.2
@deprecated(
version="1.1",
reason="The 'seed' parameter will be removed in v1.2.",
category=FutureWarning,
)
def __init__(
self,
lengths: Optional[np.ndarray] = None,
stride: int = 1,
nb_inst_per_class: int = 1,
seed: Optional[int] = None,
n_jobs: int = 1, # Parallel processing
random_state: Optional[int] = None,
n_jobs: int = 1,
seed=None,
):

super().__init__()
self.lengths = lengths
self.stride = stride
self.nb_inst_per_class = nb_inst_per_class
self._kernels = None # z-normalized subsequences
self._kernel_orig = None # non z-normalized subsequences
self._start_points = [] # To store the start positions
self._classes = [] # To store the class of each shapelet
self._source_series = [] # To store the index of the original time series
self.kernels_generators_ = {} # Reference time series
self._kernels = None
self._kernel_orig = None
self._start_points = []
self._classes = []
self._source_series = []
self.kernels_generators_ = {}
self.n_jobs = n_jobs

# Store the seed parameter (required for sklearn compatibility)
self.seed = seed

# Handle deprecated seed parameter
if seed is not None:
import warnings

warnings.warn(
"The 'seed' parameter is deprecated and will be removed in v1.2. "
"Use 'random_state' instead.",
FutureWarning,
stacklevel=2,
)
if random_state is None:
random_state = seed
else:
raise ValueError(
"Cannot specify both 'seed' and 'random_state'. "
"Use 'random_state' only."
)

self.random_state = random_state

def _fit(self, X: np.ndarray, y: Union[np.ndarray, list]) -> "SAST":
"""Select reference time series and generate subsequences from them.

Expand All @@ -135,9 +168,9 @@ def _fit(self, X: np.ndarray, y: Union[np.ndarray, list]) -> "SAST":
)

self._random_state = (
np.random.RandomState(self.seed)
if not isinstance(self.seed, np.random.RandomState)
else self.seed
np.random.RandomState(self.random_state)
if not isinstance(self.random_state, np.random.RandomState)
else self.random_state
)

classes = np.unique(y)
Expand Down
Loading