From aff5ed57393b418bc0669000dda45f4df42e63b6 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Tue, 14 Jan 2025 10:26:05 +0000
Subject: [PATCH 01/19] first draft

---
 .../collection/imbalance/__init__.py          |  1 +
 .../collection/imbalance/_smote.py            | 30 +++++++++++++++++++
 2 files changed, 31 insertions(+)
 create mode 100644 aeon/transformations/collection/imbalance/__init__.py
 create mode 100644 aeon/transformations/collection/imbalance/_smote.py

diff --git a/aeon/transformations/collection/imbalance/__init__.py b/aeon/transformations/collection/imbalance/__init__.py
new file mode 100644
index 0000000000..eeff2f5d85
--- /dev/null
+++ b/aeon/transformations/collection/imbalance/__init__.py
@@ -0,0 +1 @@
+"""Supervised transformers to rebalance colelctions of time series."""
diff --git a/aeon/transformations/collection/imbalance/_smote.py b/aeon/transformations/collection/imbalance/_smote.py
new file mode 100644
index 0000000000..24078d180e
--- /dev/null
+++ b/aeon/transformations/collection/imbalance/_smote.py
@@ -0,0 +1,30 @@
+"""Wrapper for imblearn minority class rebalancer SMOTE."""
+
+from imblearn.over_sampling import SMOTE as smote
+
+from aeon.transformations.collection import BaseCollectionTransformer
+
+__maintainer__ = ["TonyBagnall"]
+__all__ = ["SMOTE"]
+
+
+class SMOTE(BaseCollectionTransformer):
+    """Wrapper for SMOTE transform."""
+
+    _tags = {
+        "capability:multivariate": True,
+        "capability:unequal_length": True,
+        "requires_y": True,
+    }
+
+    def __init__(self, sampling_strategy="auto", random_state=None, k_neighbors=5):
+        self.sampling_strategy = sampling_strategy
+        self.random_state = random_state
+        self.k_neighbors = k_neighbors
+
+    def _fit(self, X, y=None):
+        self.smote_ = smote(self.sampling_strategy, self.random_state, self.k_neighbors)
+        self.smote_.fit(X, y)
+
+    def _transform(self, X, y=None):
+        return self.smote_.resample(X, y)

From 4bec820c8b9995cacad10261b736f004541d53ce Mon Sep 17 00:00:00 2001
From: Chuanhang Qiu <80885865+LinGinQiu@users.noreply.github.com>
Date: Thu, 23 Jan 2025 15:00:27 +0000
Subject: [PATCH 02/19] [ENH] wrapper for smote and adasyn of the imbalance
 module in collection transformers (#2501)

* smote & adasyn in aeon.transformation.imbalance

* smote & adasyn in aeon.transformation.imbalance

* smote & adasyn in aeon.transformation.imbalance

* smote & adasyn in aeon.transformation.imbalance
---
 .../collection/imbalance/__init__.py          |   5 +
 .../collection/imbalance/_adasyn.py           | 140 ++++++++++++
 .../collection/imbalance/_smote.py            | 216 +++++++++++++++++-
 .../collection/tests/test_imbalance.py        |  60 +++++
 4 files changed, 411 insertions(+), 10 deletions(-)
 create mode 100644 aeon/transformations/collection/imbalance/_adasyn.py
 create mode 100644 aeon/transformations/collection/tests/test_imbalance.py

diff --git a/aeon/transformations/collection/imbalance/__init__.py b/aeon/transformations/collection/imbalance/__init__.py
index eeff2f5d85..280251ad04 100644
--- a/aeon/transformations/collection/imbalance/__init__.py
+++ b/aeon/transformations/collection/imbalance/__init__.py
@@ -1 +1,6 @@
 """Supervised transformers to rebalance colelctions of time series."""
+
+__all__ = ["SMOTE", "ADASYN"]
+
+from aeon.transformations.collection.imbalance._smote import SMOTE
+from aeon.transformations.collection.imbalance._adasyn import ADASYN
diff --git a/aeon/transformations/collection/imbalance/_adasyn.py b/aeon/transformations/collection/imbalance/_adasyn.py
new file mode 100644
index 0000000000..72818b72a8
--- /dev/null
+++ b/aeon/transformations/collection/imbalance/_adasyn.py
@@ -0,0 +1,140 @@
+"""
+implement for imblearn minority class rebalancer ADASYN.
+see more in imblearn.over_sampling.ADASYN
+original authors:
+#          Guillaume Lemaitre <g.lemaitre58@gmail.com>
+#          Christos Aridas
+# License: MIT
+"""
+import numpy as np
+from aeon.transformations.collection import BaseCollectionTransformer
+from sklearn.neighbors import NearestNeighbors
+from sklearn.utils import check_random_state
+from scipy import sparse
+from collections import OrderedDict
+
+__maintainer__ = ["TonyBagnall, Chris Qiu"]
+__all__ = ["ADASYN"]
+
+
+class ADASYN(BaseCollectionTransformer):
+    """
+    Class to perform over-sampling using ADASYN .
+
+    This object is a simplified implementation of ADASYN - Adaptive
+    Synthetic (ADASYN) algorithm as presented in imblearn.over_sampling.ADASYN
+    This method is similar to SMOTE, but it generates different number of
+    samples depending on an estimate of the local distribution of the class
+    to be oversampled.
+    Parameters
+    ----------
+    {random_state}
+
+    k_neighbors : int or object, default=5
+        The nearest neighbors used to define the neighborhood of samples to use
+        to generate the synthetic samples. `~sklearn.neighbors.NearestNeighbors`
+        instance will be fitted in this case.
+    """
+
+    _tags = {
+        "capability:multivariate": True,
+        "capability:unequal_length": True,
+        "requires_y": True,
+    }
+
+    def __init__(self, random_state=None, k_neighbors=5):
+        self.random_state = random_state
+        self.k_neighbors = k_neighbors
+        super().__init__()
+
+    def _fit(self, X, y=None):
+        # set the additional_neighbor=1
+        self.nn_ = NearestNeighbors(n_neighbors=self.k_neighbors + 1)
+
+        # generate sampling target by targeting all classes but not the majority
+        unique, counts = np.unique(y, return_counts=True)
+        target_stats = dict(zip(unique, counts))
+        n_sample_majority = max(target_stats.values())
+        class_majority = max(target_stats, key=target_stats.get)
+        sampling_strategy = {
+            key: n_sample_majority - value
+            for (key, value) in target_stats.items()
+            if key != class_majority
+        }
+        self.sampling_strategy_ = OrderedDict(
+            sorted(sampling_strategy.items())
+        )
+        return self
+
+    def _transform(self, X, y=None):
+        shape_recover = False  # use to recover the shape of X
+        if X.ndim == 3 and X.shape[1] == 1:
+            X = np.squeeze(X, axis=1)  # remove the middle dimension to be compatible with sklearn
+            shape_recover = True
+        random_state = check_random_state(self.random_state)
+        X_resampled = [X.copy()]
+        y_resampled = [y.copy()]
+
+        # got the minority class label and the number needs to be generated i.e. num_majority - num_minority
+        for class_sample, n_samples in self.sampling_strategy_.items():
+            if n_samples == 0:
+                continue
+            target_class_indices = np.flatnonzero(y == class_sample)
+            X_class = X[target_class_indices]
+
+            self.nn_.fit(X)
+            nns = self.nn_.kneighbors(X_class, return_distance=False)[:, 1:]
+            # The ratio is computed using a one-vs-rest manner. Using majority
+            # in multi-class would lead to slightly different results at the
+            # cost of introducing a new parameter.
+            n_neighbors = self.nn_.n_neighbors - 1
+            ratio_nn = np.sum(y[nns] != class_sample, axis=1) / n_neighbors
+            if not np.sum(ratio_nn):
+                raise RuntimeError(
+                    "Not any neigbours belong to the majority"
+                    " class. This case will induce a NaN case"
+                    " with a division by zero. ADASYN is not"
+                    " suited for this specific dataset."
+                    " Use SMOTE instead."
+                )
+            ratio_nn /= np.sum(ratio_nn)
+            n_samples_generate = np.rint(ratio_nn * n_samples).astype(int)
+            # rounding may cause new amount for n_samples
+            n_samples = np.sum(n_samples_generate)
+            if not n_samples:
+                raise ValueError(
+                    "No samples will be generated with the provided ratio settings."
+                )
+
+            # the nearest neighbors need to be fitted only on the current class
+            # to find the class NN to generate new samples
+            self.nn_.fit(X_class)
+            nns = self.nn_.kneighbors(X_class, return_distance=False)[:, 1:]
+
+            enumerated_class_indices = np.arange(len(target_class_indices))
+            rows = np.repeat(enumerated_class_indices, n_samples_generate)
+            cols = random_state.choice(n_neighbors, size=n_samples)
+            diffs = X_class[nns[rows, cols]] - X_class[rows]
+            steps = random_state.uniform(size=(n_samples, 1))
+
+            if sparse.issparse(X):
+                sparse_func = type(X).__name__
+                steps = getattr(sparse, sparse_func)(steps)
+                X_new = X_class[rows] + steps.multiply(diffs)
+            else:
+                X_new = X_class[rows] + steps * diffs
+
+            X_new = X_new.astype(X.dtype)
+            y_new = np.full(n_samples, fill_value=class_sample, dtype=y.dtype)
+            X_resampled.append(X_new)
+            y_resampled.append(y_new)
+
+        if sparse.issparse(X):
+            X_resampled = sparse.vstack(X_resampled, format=X.format)
+        else:
+            X_resampled = np.vstack(X_resampled)
+        y_resampled = np.hstack(y_resampled)
+
+        if shape_recover:
+            X_resampled = X_resampled[:, np.newaxis, :]
+        return X_resampled, y_resampled
diff --git a/aeon/transformations/collection/imbalance/_smote.py b/aeon/transformations/collection/imbalance/_smote.py
index 24078d180e..36aea38b1c 100644
--- a/aeon/transformations/collection/imbalance/_smote.py
+++ b/aeon/transformations/collection/imbalance/_smote.py
@@ -1,15 +1,42 @@
-"""Wrapper for imblearn minority class rebalancer SMOTE."""
-
-from imblearn.over_sampling import SMOTE as smote
+"""
+implement for imblearn minority class rebalancer SMOTE.
+see more in imblearn.over_sampling.SMOTE
+original authors:
+#          Guillaume Lemaitre <g.lemaitre58@gmail.com>
+#          Fernando Nogueira
+#          Christos Aridas
+#          Dzianis Dudnik
+# License: MIT
+"""
 
+import numpy as np
 from aeon.transformations.collection import BaseCollectionTransformer
+from sklearn.neighbors import NearestNeighbors
+from sklearn.utils import check_random_state
+from scipy import sparse
+from collections import OrderedDict
 
-__maintainer__ = ["TonyBagnall"]
+__maintainer__ = ["TonyBagnall, Chris Qiu"]
 __all__ = ["SMOTE"]
 
 
 class SMOTE(BaseCollectionTransformer):
-    """Wrapper for SMOTE transform."""
+    """
+    Class to perform over-sampling using SMOTE.
+
+    This object is a simplified implementation of SMOTE - Synthetic Minority
+    Over-sampling Technique as presented in imblearn.over_sampling.SMOTE
+    sampling_strategy is sampling target by targeting all classes but not the
+    majority, which directly expressed in _fit.sampling_strategy.
+    Parameters
+    ----------
+    {random_state}
+
+    k_neighbors : int or object, default=5
+        The nearest neighbors used to define the neighborhood of samples to use
+        to generate the synthetic samples. `~sklearn.neighbors.NearestNeighbors`
+        instance will be fitted in this case.
+    """
 
     _tags = {
         "capability:multivariate": True,
@@ -17,14 +44,183 @@ class SMOTE(BaseCollectionTransformer):
         "requires_y": True,
     }
 
-    def __init__(self, sampling_strategy="auto", random_state=None, k_neighbors=5):
-        self.sampling_strategy = sampling_strategy
+    def __init__(self, random_state=None, k_neighbors=5):
         self.random_state = random_state
         self.k_neighbors = k_neighbors
+        super().__init__()
 
     def _fit(self, X, y=None):
-        self.smote_ = smote(self.sampling_strategy, self.random_state, self.k_neighbors)
-        self.smote_.fit(X, y)
+        # set the additional_neighbor=1
+        self.nn_ = NearestNeighbors(n_neighbors=self.k_neighbors + 1)
+
+        # generate sampling target by targeting all classes but not the majority
+        unique, counts = np.unique(y, return_counts=True)
+        target_stats = dict(zip(unique, counts))
+        n_sample_majority = max(target_stats.values())
+        class_majority = max(target_stats, key=target_stats.get)
+        sampling_strategy = {
+            key: n_sample_majority - value
+            for (key, value) in target_stats.items()
+            if key != class_majority
+        }
+        self.sampling_strategy_ = OrderedDict(
+            sorted(sampling_strategy.items())
+        )
+        return self
 
     def _transform(self, X, y=None):
-        return self.smote_.resample(X, y)
+        shape_recover = False   # use to recover the shape of X
+        if X.ndim == 3 and X.shape[1] == 1:
+            X = np.squeeze(X, axis=1)  # remove the middle dimension to be compatible with sklearn
+            shape_recover = True
+        X_resampled = [X.copy()]
+        y_resampled = [y.copy()]
+
+        # got the minority class label and the number needs to be generated i.e. num_majority - num_minority
+        for class_sample, n_samples in self.sampling_strategy_.items():
+            if n_samples == 0:
+                continue
+            target_class_indices = np.flatnonzero(y == class_sample)
+            X_class = X[target_class_indices]
+
+            self.nn_.fit(X_class)
+            nns = self.nn_.kneighbors(X_class, return_distance=False)[:, 1:]
+            X_new, y_new = self._make_samples(
+                X_class, y.dtype, class_sample, X_class, nns, n_samples, 1.0
+            )
+            X_resampled.append(X_new)
+            y_resampled.append(y_new)
+
+        if sparse.issparse(X):
+            X_resampled = sparse.vstack(X_resampled, format=X.format)
+        else:
+            X_resampled = np.vstack(X_resampled)
+        y_resampled = np.hstack(y_resampled)
+        if shape_recover:
+            X_resampled = X_resampled[:, np.newaxis, :]
+        return X_resampled, y_resampled
+
+    def _make_samples(
+            self, X, y_dtype, y_type, nn_data, nn_num, n_samples, step_size=1.0, y=None
+    ):
+        """A support function that returns artificial samples constructed along
+        the line connecting nearest neighbours.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Points from which the points will be created.
+
+        y_dtype : dtype
+            The data type of the targets.
+
+        y_type : str or int
+            The minority target value, just so the function can return the
+            target values for the synthetic variables with correct length in
+            a clear format.
+
+        nn_data : ndarray of shape (n_samples_all, n_features)
+            Data set carrying all the neighbours to be used
+
+        nn_num : ndarray of shape (n_samples_all, k_nearest_neighbours)
+            The nearest neighbours of each sample in `nn_data`.
+
+        n_samples : int
+            The number of samples to generate.
+
+        step_size : float, default=1.0
+            The step size to create samples.
+
+        y : ndarray of shape (n_samples_all,), default=None
+            The true target associated with `nn_data`. Used by Borderline SMOTE-2 to
+            weight the distances in the sample generation process.
+
+        Returns
+        -------
+        X_new : {ndarray, sparse matrix} of shape (n_samples_new, n_features)
+            Synthetically generated samples.
+
+        y_new : ndarray of shape (n_samples_new,)
+            Target values for synthetic samples.
+        """
+        random_state = check_random_state(self.random_state)
+        samples_indices = random_state.randint(low=0, high=nn_num.size, size=n_samples)
+
+        # np.newaxis for backwards compatability with random_state
+        steps = step_size * random_state.uniform(size=n_samples)[:, np.newaxis]
+        rows = np.floor_divide(samples_indices, nn_num.shape[1])
+        cols = np.mod(samples_indices, nn_num.shape[1])
+
+        X_new = self._generate_samples(X, nn_data, nn_num, rows, cols, steps, y_type, y)
+        y_new = np.full(n_samples, fill_value=y_type, dtype=y_dtype)
+        return X_new, y_new
+
+    def _generate_samples(
+            self, X, nn_data, nn_num, rows, cols, steps, y_type=None, y=None
+    ):
+        r"""Generate a synthetic sample.
+
+        The rule for the generation is:
+
+        .. math::
+           \mathbf{s_{s}} = \mathbf{s_{i}} + \mathcal{u}(0, 1) \times
+           (\mathbf{s_{i}} - \mathbf{s_{nn}}) \,
+
+        where \mathbf{s_{s}} is the new synthetic samples, \mathbf{s_{i}} is
+        the current sample, \mathbf{s_{nn}} is a randomly selected neighbors of
+        \mathbf{s_{i}} and \mathcal{u}(0, 1) is a random number between [0, 1).
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Points from which the points will be created.
+
+        nn_data : ndarray of shape (n_samples_all, n_features)
+            Data set carrying all the neighbours to be used.
+
+        nn_num : ndarray of shape (n_samples_all, k_nearest_neighbours)
+            The nearest neighbours of each sample in `nn_data`.
+
+        rows : ndarray of shape (n_samples,), dtype=int
+            Indices pointing at feature vector in X which will be used
+            as a base for creating new samples.
+
+        cols : ndarray of shape (n_samples,), dtype=int
+            Indices pointing at which nearest neighbor of base feature vector
+            will be used when creating new samples.
+
+        steps : ndarray of shape (n_samples,), dtype=float
+            Step sizes for new samples.
+
+        y_type : str, int or None, default=None
+            Class label of the current target classes for which we want to generate
+            samples.
+
+        y : ndarray of shape (n_samples_all,), default=None
+            The true target associated with `nn_data`. Used by Borderline SMOTE-2 to
+            weight the distances in the sample generation process.
+
+        Returns
+        -------
+        X_new : {ndarray, sparse matrix} of shape (n_samples, n_features)
+            Synthetically generated samples.
+        """
+        diffs = nn_data[nn_num[rows, cols]] - X[rows]
+        if y is not None:  # only entering for BorderlineSMOTE-2
+            random_state = check_random_state(self.random_state)
+            mask_pair_samples = y[nn_num[rows, cols]] != y_type
+            diffs[mask_pair_samples] *= random_state.uniform(
+                low=0.0, high=0.5, size=(mask_pair_samples.sum(), 1)
+            )
+
+        if sparse.issparse(X):
+            sparse_func = type(X).__name__
+            steps = getattr(sparse, sparse_func)(steps)
+            X_new = X[rows] + steps.multiply(diffs)
+        else:
+            X_new = X[rows] + steps * diffs
+
+        return X_new.astype(X.dtype)
+
+
+
diff --git a/aeon/transformations/collection/tests/test_imbalance.py b/aeon/transformations/collection/tests/test_imbalance.py
new file mode 100644
index 0000000000..f56df6fcfe
--- /dev/null
+++ b/aeon/transformations/collection/tests/test_imbalance.py
@@ -0,0 +1,60 @@
+"""Tests for the rebalancer transformers."""
+
+import numpy as np
+import pytest
+
+from aeon.transformations.collection.imbalance import SMOTE, ADASYN
+
+
+def test_smote():
+    """Test the SMOTE class.
+
+    This function creates a 3D numpy array, applies
+    SMOTE using the SMOTE class, and asserts that the
+    transformed data has a balanced number of samples.
+    """
+    n_samples = 100  # Total number of labels
+    majority_num = 90  # number of majority class
+    minority_num = n_samples - majority_num  # number of minority class
+
+    X = np.random.rand(n_samples, 1, 10)
+    y = np.array([0] * majority_num + [1] * minority_num)
+
+    transformer = SMOTE()
+    transformer.fit(X, y)
+    res_X, res_y = transformer.transform(X, y)
+    _, res_count = np.unique(res_y, return_counts=True)
+
+    assert len(res_X) == 2 * majority_num
+    assert len(res_y) == 2 * majority_num
+    assert res_count[0] == majority_num
+    assert res_count[1] == majority_num
+
+
+def test_adasyn():
+    """Test the ADASYN class.
+
+    This function creates a 3D numpy array, applies
+    ADASYN using the ADASYN class, and asserts that the
+    transformed data has a balanced number of samples.
+    ADASYN is a variant of SMOTE that generates synthetic samples,
+    but it focuses on generating samples near the decision boundary.
+    Therefore, sometimes, it may generate more or less samples than SMOTE,
+    which is why we only check if the number of samples is nearly balanced.
+    """
+    n_samples = 100  # Total number of labels
+    majority_num = 90  # number of majority class
+    minority_num = n_samples - majority_num  # number of minority class
+
+    X = np.random.rand(n_samples, 1, 10)
+    y = np.array([0] * majority_num + [1] * minority_num)
+
+    transformer = ADASYN()
+    transformer.fit(X, y)
+    res_X, res_y = transformer.transform(X, y)
+    _, res_count = np.unique(res_y, return_counts=True)
+
+    assert np.abs(len(res_X) - 2 * majority_num) < minority_num
+    assert np.abs(len(res_y) - 2 * majority_num) < minority_num
+    assert res_count[0] == majority_num
+    assert np.abs(res_count[0] - res_count[1]) < minority_num

From 5db24f39201b630cc6e125027774c9fd1c2fca0b Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Thu, 23 Jan 2025 16:50:11 +0000
Subject: [PATCH 03/19] make experimental

---
 README.md                                     |  1 +
 .../collection/imbalance/_adasyn.py           | 46 +++++++-----
 .../collection/imbalance/_smote.py            | 75 ++++++++++---------
 .../collection/imbalance/tests/__init__.py    |  1 +
 docs/developer_guide/deprecation.md           |  1 +
 docs/index.md                                 |  1 +
 6 files changed, 71 insertions(+), 54 deletions(-)
 create mode 100644 aeon/transformations/collection/imbalance/tests/__init__.py

diff --git a/README.md b/README.md
index e1475d6d85..e267f053fb 100644
--- a/README.md
+++ b/README.md
@@ -26,6 +26,7 @@ does not apply:
 - `segmentation`
 - `similarity_search`
 - `visualisation`
+- `transformations.collection.imbalance`
 
 | Overview        |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
 |-----------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
diff --git a/aeon/transformations/collection/imbalance/_adasyn.py b/aeon/transformations/collection/imbalance/_adasyn.py
index 72818b72a8..2db87c36d1 100644
--- a/aeon/transformations/collection/imbalance/_adasyn.py
+++ b/aeon/transformations/collection/imbalance/_adasyn.py
@@ -1,17 +1,20 @@
-"""
-implement for imblearn minority class rebalancer ADASYN.
+"""ADASYN over sampling algorithm.
+
 see more in imblearn.over_sampling.ADASYN
 original authors:
 #          Guillaume Lemaitre <g.lemaitre58@gmail.com>
 #          Christos Aridas
 # License: MIT
 """
+
+from collections import OrderedDict
+
 import numpy as np
-from aeon.transformations.collection import BaseCollectionTransformer
+from scipy import sparse
 from sklearn.neighbors import NearestNeighbors
 from sklearn.utils import check_random_state
-from scipy import sparse
-from collections import OrderedDict
+
+from aeon.transformations.collection import BaseCollectionTransformer
 
 __maintainer__ = ["TonyBagnall, Chris Qiu"]
 __all__ = ["ADASYN"]
@@ -26,20 +29,27 @@ class ADASYN(BaseCollectionTransformer):
     This method is similar to SMOTE, but it generates different number of
     samples depending on an estimate of the local distribution of the class
     to be oversampled.
+
+    Currently only works with two class problems.
+
     Parameters
     ----------
-    {random_state}
-
     k_neighbors : int or object, default=5
         The nearest neighbors used to define the neighborhood of samples to use
         to generate the synthetic samples. `~sklearn.neighbors.NearestNeighbors`
         instance will be fitted in this case.
+    random_state : int, RandomState instance or None, default=None
+        If `int`, random_state is the seed used by the random number generator;
+        If `RandomState` instance, random_state is the random number generator;
+        If `None`, the random number generator is the `RandomState` instance used
+        by `np.random`.
     """
 
     _tags = {
-        "capability:multivariate": True,
-        "capability:unequal_length": True,
+        "capability:multivariate": False,
+        "capability:unequal_length": False,
         "requires_y": True,
+        "python_dependencies": "imbalanced-learn",
     }
 
     def __init__(self, random_state=None, k_neighbors=5):
@@ -51,9 +61,11 @@ def _fit(self, X, y=None):
         # set the additional_neighbor=1
         self.nn_ = NearestNeighbors(n_neighbors=self.k_neighbors + 1)
 
-        # generate sampling target by targeting all classes but not the majority
+        # resamples all classes except the majority.
         unique, counts = np.unique(y, return_counts=True)
         target_stats = dict(zip(unique, counts))
+        # If two or more classes are equal largest, the majority is assumed to be the
+        # one with the largest index.
         n_sample_majority = max(target_stats.values())
         class_majority = max(target_stats, key=target_stats.get)
         sampling_strategy = {
@@ -61,21 +73,16 @@ def _fit(self, X, y=None):
             for (key, value) in target_stats.items()
             if key != class_majority
         }
-        self.sampling_strategy_ = OrderedDict(
-            sorted(sampling_strategy.items())
-        )
+        self.sampling_strategy_ = OrderedDict(sorted(sampling_strategy.items()))
         return self
 
     def _transform(self, X, y=None):
-        shape_recover = False  # use to recover the shape of X
-        if X.ndim == 3 and X.shape[1] == 1:
-            X = np.squeeze(X, axis=1)  # remove the middle dimension to be compatible with sklearn
-            shape_recover = True
+        X = np.squeeze(X, axis=1)
         random_state = check_random_state(self.random_state)
         X_resampled = [X.copy()]
         y_resampled = [y.copy()]
 
-        # got the minority class label and the number needs to be generated i.e. num_majority - num_minority
+        # got the minority class label and the number needs to be generated
         for class_sample, n_samples in self.sampling_strategy_.items():
             if n_samples == 0:
                 continue
@@ -135,6 +142,5 @@ def _transform(self, X, y=None):
             X_resampled = np.vstack(X_resampled)
         y_resampled = np.hstack(y_resampled)
 
-        if shape_recover:
-            X_resampled = X_resampled[:, np.newaxis, :]
+        X_resampled = X_resampled[:, np.newaxis, :]
         return X_resampled, y_resampled
diff --git a/aeon/transformations/collection/imbalance/_smote.py b/aeon/transformations/collection/imbalance/_smote.py
index 36aea38b1c..604179dded 100644
--- a/aeon/transformations/collection/imbalance/_smote.py
+++ b/aeon/transformations/collection/imbalance/_smote.py
@@ -1,6 +1,6 @@
-"""
-implement for imblearn minority class rebalancer SMOTE.
-see more in imblearn.over_sampling.SMOTE
+"""SMOTE over sampling algorithm.
+
+See more in imblearn.over_sampling.SMOTE
 original authors:
 #          Guillaume Lemaitre <g.lemaitre58@gmail.com>
 #          Fernando Nogueira
@@ -9,42 +9,58 @@
 # License: MIT
 """
 
+from collections import OrderedDict
+
 import numpy as np
-from aeon.transformations.collection import BaseCollectionTransformer
+from scipy import sparse
 from sklearn.neighbors import NearestNeighbors
 from sklearn.utils import check_random_state
-from scipy import sparse
-from collections import OrderedDict
 
-__maintainer__ = ["TonyBagnall, Chris Qiu"]
+from aeon.transformations.collection import BaseCollectionTransformer
+
+__maintainer__ = ["TonyBagnall"]
 __all__ = ["SMOTE"]
 
 
 class SMOTE(BaseCollectionTransformer):
     """
-    Class to perform over-sampling using SMOTE.
+    Over-sampling using the Synthetic Minority Over-sampling TEchnique (SMOTE)[1]_.
+
+    An adaptation of the imbalance-learn implementation of SMOTE in
+    imblearn.over_sampling.SMOTE. sampling_strategy is sampling target by
+    targeting all classes but not the majority, which is directly expressed in
+    _fit.sampling_strategy.
 
-    This object is a simplified implementation of SMOTE - Synthetic Minority
-    Over-sampling Technique as presented in imblearn.over_sampling.SMOTE
-    sampling_strategy is sampling target by targeting all classes but not the
-    majority, which directly expressed in _fit.sampling_strategy.
     Parameters
     ----------
-    {random_state}
-
     k_neighbors : int or object, default=5
         The nearest neighbors used to define the neighborhood of samples to use
         to generate the synthetic samples. `~sklearn.neighbors.NearestNeighbors`
         instance will be fitted in this case.
+    random_state : int, RandomState instance or None, default=None
+        If `int`, random_state is the seed used by the random number generator;
+        If `RandomState` instance, random_state is the random number generator;
+        If `None`, the random number generator is the `RandomState` instance used
+        by `np.random`.
+
+    See Also
+    --------
+    ADASYN
+
+    References
+    ----------
+    .. [1] Chawla et al. SMOTE: synthetic minority over-sampling technique, Journal
+    of Artificial Intelligence Research 16(1): 321–357, 2002.
+        https://dl.acm.org/doi/10.5555/1622407.1622416
     """
 
     _tags = {
-        "capability:multivariate": True,
-        "capability:unequal_length": True,
+        "capability:multivariate": False,
+        "capability:unequal_length": False,
         "requires_y": True,
     }
 
-    def __init__(self, random_state=None, k_neighbors=5):
+    def __init__(self, k_neighbors=5, random_state=None):
         self.random_state = random_state
         self.k_neighbors = k_neighbors
         super().__init__()
@@ -63,20 +79,16 @@ def _fit(self, X, y=None):
             for (key, value) in target_stats.items()
             if key != class_majority
         }
-        self.sampling_strategy_ = OrderedDict(
-            sorted(sampling_strategy.items())
-        )
+        self.sampling_strategy_ = OrderedDict(sorted(sampling_strategy.items()))
         return self
 
     def _transform(self, X, y=None):
-        shape_recover = False   # use to recover the shape of X
-        if X.ndim == 3 and X.shape[1] == 1:
-            X = np.squeeze(X, axis=1)  # remove the middle dimension to be compatible with sklearn
-            shape_recover = True
+        # remove the channel dimension to be compatible with sklearn
+        X = np.squeeze(X, axis=1)
         X_resampled = [X.copy()]
         y_resampled = [y.copy()]
 
-        # got the minority class label and the number needs to be generated i.e. num_majority - num_minority
+        # got the minority class label and the number needs to be generated
         for class_sample, n_samples in self.sampling_strategy_.items():
             if n_samples == 0:
                 continue
@@ -96,15 +108,13 @@ def _transform(self, X, y=None):
         else:
             X_resampled = np.vstack(X_resampled)
         y_resampled = np.hstack(y_resampled)
-        if shape_recover:
-            X_resampled = X_resampled[:, np.newaxis, :]
+        X_resampled = X_resampled[:, np.newaxis, :]
         return X_resampled, y_resampled
 
     def _make_samples(
-            self, X, y_dtype, y_type, nn_data, nn_num, n_samples, step_size=1.0, y=None
+        self, X, y_dtype, y_type, nn_data, nn_num, n_samples, step_size=1.0, y=None
     ):
-        """A support function that returns artificial samples constructed along
-        the line connecting nearest neighbours.
+        """Make artificial samples constructed based on nearest neighbours.
 
         Parameters
         ----------
@@ -156,7 +166,7 @@ def _make_samples(
         return X_new, y_new
 
     def _generate_samples(
-            self, X, nn_data, nn_num, rows, cols, steps, y_type=None, y=None
+        self, X, nn_data, nn_num, rows, cols, steps, y_type=None, y=None
     ):
         r"""Generate a synthetic sample.
 
@@ -221,6 +231,3 @@ def _generate_samples(
             X_new = X[rows] + steps * diffs
 
         return X_new.astype(X.dtype)
-
-
-
diff --git a/aeon/transformations/collection/imbalance/tests/__init__.py b/aeon/transformations/collection/imbalance/tests/__init__.py
new file mode 100644
index 0000000000..55831a6ec8
--- /dev/null
+++ b/aeon/transformations/collection/imbalance/tests/__init__.py
@@ -0,0 +1 @@
+"""Test resampling transformers."""
diff --git a/docs/developer_guide/deprecation.md b/docs/developer_guide/deprecation.md
index 4b10d81cb2..04aadbab3a 100644
--- a/docs/developer_guide/deprecation.md
+++ b/docs/developer_guide/deprecation.md
@@ -24,6 +24,7 @@ experimental. Currently experimental modules are:
 - `segmentation`
 - `similarity_search`
 - `visualisation`
+- `transformations.collection.imbalance`
 
 When we introduce a new module, we may classify it as experimental until the API is
 stable. We will try to not make drastic changes to experimental modules, but we need
diff --git a/docs/index.md b/docs/index.md
index 11b558839e..76fb04e1ce 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -276,6 +276,7 @@ experimental modules are:
 - `segmentation`
 - `similarity_search`
 - `visualisation`
+- `transformations.collection.imbalance`
 
 ```{toctree}
 :caption: Using aeon

From d9b35b79b5df40010cbe792a283009ddbff293d3 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Thu, 23 Jan 2025 17:01:17 +0000
Subject: [PATCH 04/19] inherit from SMOTE

---
 .../collection/imbalance/_adasyn.py           | 78 ++++---------------
 1 file changed, 16 insertions(+), 62 deletions(-)

diff --git a/aeon/transformations/collection/imbalance/_adasyn.py b/aeon/transformations/collection/imbalance/_adasyn.py
index 2db87c36d1..6b487529d8 100644
--- a/aeon/transformations/collection/imbalance/_adasyn.py
+++ b/aeon/transformations/collection/imbalance/_adasyn.py
@@ -1,80 +1,34 @@
-"""ADASYN over sampling algorithm.
-
-see more in imblearn.over_sampling.ADASYN
-original authors:
-#          Guillaume Lemaitre <g.lemaitre58@gmail.com>
-#          Christos Aridas
-# License: MIT
-"""
-
-from collections import OrderedDict
+"""ADASYN over sampling algorithm."""
 
 import numpy as np
 from scipy import sparse
-from sklearn.neighbors import NearestNeighbors
 from sklearn.utils import check_random_state
 
-from aeon.transformations.collection import BaseCollectionTransformer
+from aeon.transformations.collection.imbalance import SMOTE
 
-__maintainer__ = ["TonyBagnall, Chris Qiu"]
+__maintainer__ = ["TonyBagnall"]
 __all__ = ["ADASYN"]
 
 
-class ADASYN(BaseCollectionTransformer):
+class ADASYN(SMOTE):
     """
-    Class to perform over-sampling using ADASYN .
+    Over-sampling using Adaptive Synthetic Sampling (ADASYN).
 
-    This object is a simplified implementation of ADASYN - Adaptive
-    Synthetic (ADASYN) algorithm as presented in imblearn.over_sampling.ADASYN
-    This method is similar to SMOTE, but it generates different number of
+    Adaptation of imblearn.over_sampling.ADASYN
+    original authors:
+    #          Guillaume Lemaitre <g.lemaitre58@gmail.com>
+    #          Christos Aridas
+    # License: MIT
+
+    This transformer extends SMOTE, but it generates different number of
     samples depending on an estimate of the local distribution of the class
     to be oversampled.
-
-    Currently only works with two class problems.
-
-    Parameters
-    ----------
-    k_neighbors : int or object, default=5
-        The nearest neighbors used to define the neighborhood of samples to use
-        to generate the synthetic samples. `~sklearn.neighbors.NearestNeighbors`
-        instance will be fitted in this case.
-    random_state : int, RandomState instance or None, default=None
-        If `int`, random_state is the seed used by the random number generator;
-        If `RandomState` instance, random_state is the random number generator;
-        If `None`, the random number generator is the `RandomState` instance used
-        by `np.random`.
     """
 
-    _tags = {
-        "capability:multivariate": False,
-        "capability:unequal_length": False,
-        "requires_y": True,
-        "python_dependencies": "imbalanced-learn",
-    }
-
-    def __init__(self, random_state=None, k_neighbors=5):
-        self.random_state = random_state
-        self.k_neighbors = k_neighbors
-        super().__init__()
-
-    def _fit(self, X, y=None):
-        # set the additional_neighbor=1
-        self.nn_ = NearestNeighbors(n_neighbors=self.k_neighbors + 1)
-
-        # resamples all classes except the majority.
-        unique, counts = np.unique(y, return_counts=True)
-        target_stats = dict(zip(unique, counts))
-        # If two or more classes are equal largest, the majority is assumed to be the
-        # one with the largest index.
-        n_sample_majority = max(target_stats.values())
-        class_majority = max(target_stats, key=target_stats.get)
-        sampling_strategy = {
-            key: n_sample_majority - value
-            for (key, value) in target_stats.items()
-            if key != class_majority
-        }
-        self.sampling_strategy_ = OrderedDict(sorted(sampling_strategy.items()))
-        return self
+    def __init__(
+        self,
+    ):
+        super().__init__(random_state=None, k_neighbors=5)
 
     def _transform(self, X, y=None):
         X = np.squeeze(X, axis=1)

From 97c7466d076899b54119c7987fdebeaf65ebafbe Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Fri, 24 Jan 2025 11:12:58 +0000
Subject: [PATCH 05/19] test equivalence to imblearn

---
 .../collection/imbalance/_adasyn.py           |  6 ++--
 .../collection/imbalance/_smote.py            |  4 +--
 .../collection/imbalance/tests/test_adasyn.py | 32 +++++++++++++++++++
 .../collection/imbalance/tests/test_smote.py  | 32 +++++++++++++++++++
 4 files changed, 68 insertions(+), 6 deletions(-)
 create mode 100644 aeon/transformations/collection/imbalance/tests/test_adasyn.py
 create mode 100644 aeon/transformations/collection/imbalance/tests/test_smote.py

diff --git a/aeon/transformations/collection/imbalance/_adasyn.py b/aeon/transformations/collection/imbalance/_adasyn.py
index 6b487529d8..0d78637f86 100644
--- a/aeon/transformations/collection/imbalance/_adasyn.py
+++ b/aeon/transformations/collection/imbalance/_adasyn.py
@@ -25,10 +25,8 @@ class ADASYN(SMOTE):
     to be oversampled.
     """
 
-    def __init__(
-        self,
-    ):
-        super().__init__(random_state=None, k_neighbors=5)
+    def __init__(self, random_state=None, k_neighbors=5):
+        super().__init__(random_state=random_state, k_neighbors=k_neighbors)
 
     def _transform(self, X, y=None):
         X = np.squeeze(X, axis=1)
diff --git a/aeon/transformations/collection/imbalance/_smote.py b/aeon/transformations/collection/imbalance/_smote.py
index 604179dded..f56e6f7b40 100644
--- a/aeon/transformations/collection/imbalance/_smote.py
+++ b/aeon/transformations/collection/imbalance/_smote.py
@@ -66,10 +66,10 @@ def __init__(self, k_neighbors=5, random_state=None):
         super().__init__()
 
     def _fit(self, X, y=None):
-        # set the additional_neighbor=1
+        # set the additional_neighbor required by SMOTE
         self.nn_ = NearestNeighbors(n_neighbors=self.k_neighbors + 1)
 
-        # generate sampling target by targeting all classes but not the majority
+        # generate sampling target by targeting all classes except the majority
         unique, counts = np.unique(y, return_counts=True)
         target_stats = dict(zip(unique, counts))
         n_sample_majority = max(target_stats.values())
diff --git a/aeon/transformations/collection/imbalance/tests/test_adasyn.py b/aeon/transformations/collection/imbalance/tests/test_adasyn.py
new file mode 100644
index 0000000000..3557f85cb4
--- /dev/null
+++ b/aeon/transformations/collection/imbalance/tests/test_adasyn.py
@@ -0,0 +1,32 @@
+"""Test ADASYN oversampler ported from imblearn."""
+
+import numpy as np
+import pytest
+
+from aeon.testing.data_generation import make_example_3d_numpy
+from aeon.transformations.collection.imbalance import ADASYN
+from aeon.utils.validation._dependencies import _check_soft_dependencies
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies(
+        "imbalanced-learn",
+        package_import_alias={"imbalanced-learn": "imblearn"},
+        severity="none",
+    ),
+    reason="skip test if required soft dependency imbalanced-learn not available",
+)
+def test_equivalence_imbalance():
+    """Test ported ADASYN code produces the same as imblearn version."""
+    from imblearn.over_sampling import ADASYN as imbADASYN
+
+    X, y = make_example_3d_numpy(n_cases=20, n_channels=1)
+    y = np.array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
+    X = X.squeeze()
+    s1 = imbADASYN(random_state=49)
+    X2, y2 = s1.fit_resample(X, y)
+    s2 = ADASYN(random_state=49)
+    X3, y3 = s2.fit_transform(X, y)
+    X3 = X3.squeeze()
+    assert np.array_equal(y2, y3)
+    assert np.allclose(X2, X3, atol=1e-4)
diff --git a/aeon/transformations/collection/imbalance/tests/test_smote.py b/aeon/transformations/collection/imbalance/tests/test_smote.py
new file mode 100644
index 0000000000..53cc95cac7
--- /dev/null
+++ b/aeon/transformations/collection/imbalance/tests/test_smote.py
@@ -0,0 +1,32 @@
+"""Test function for SMOTE."""
+
+import numpy as np
+import pytest
+
+from aeon.testing.data_generation import make_example_3d_numpy
+from aeon.transformations.collection.imbalance import SMOTE
+from aeon.utils.validation._dependencies import _check_soft_dependencies
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies(
+        "imbalanced-learn",
+        package_import_alias={"imbalanced-learn": "imblearn"},
+        severity="none",
+    ),
+    reason="skip test if required soft dependency imbalanced-learn not available",
+)
+def test_equivalence_imbalance():
+    """Test ported SMOTE code produces the same as imblearn version."""
+    from imblearn.over_sampling import SMOTE as imbSMOTE
+
+    X, y = make_example_3d_numpy(n_cases=20, n_channels=1)
+    y = np.array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
+    X = X.squeeze()
+    s1 = imbSMOTE(random_state=49)
+    X2, y2 = s1.fit_resample(X, y)
+    s2 = SMOTE(random_state=49)
+    X3, y3 = s2.fit_transform(X, y)
+    X3 = X3.squeeze()
+    assert np.array_equal(y2, y3)
+    assert np.allclose(X2, X3, atol=1e-4)

From a440a90a43e88d2149cf4a181875e4aad03360fb Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Fri, 24 Jan 2025 11:18:34 +0000
Subject: [PATCH 06/19] move tests

---
 .../collection/imbalance/tests/test_adasyn.py | 29 +++++++++
 .../collection/imbalance/tests/test_smote.py  | 25 ++++++++
 .../collection/tests/test_imbalance.py        | 60 -------------------
 3 files changed, 54 insertions(+), 60 deletions(-)
 delete mode 100644 aeon/transformations/collection/tests/test_imbalance.py

diff --git a/aeon/transformations/collection/imbalance/tests/test_adasyn.py b/aeon/transformations/collection/imbalance/tests/test_adasyn.py
index 3557f85cb4..0bb5c62ea6 100644
--- a/aeon/transformations/collection/imbalance/tests/test_adasyn.py
+++ b/aeon/transformations/collection/imbalance/tests/test_adasyn.py
@@ -8,6 +8,35 @@
 from aeon.utils.validation._dependencies import _check_soft_dependencies
 
 
+def test_adasyn():
+    """Test the ADASYN class.
+
+    This function creates a 3D numpy array, applies
+    ADASYN using the ADASYN class, and asserts that the
+    transformed data has a balanced number of samples.
+    ADASYN is a variant of SMOTE that generates synthetic samples,
+    but it focuses on generating samples near the decision boundary.
+    Therefore, sometimes, it may generate more or less samples than SMOTE,
+    which is why we only check if the number of samples is nearly balanced.
+    """
+    n_samples = 100  # Total number of labels
+    majority_num = 90  # number of majority class
+    minority_num = n_samples - majority_num  # number of minority class
+
+    X = np.random.rand(n_samples, 1, 10)
+    y = np.array([0] * majority_num + [1] * minority_num)
+
+    transformer = ADASYN()
+    transformer.fit(X, y)
+    res_X, res_y = transformer.transform(X, y)
+    _, res_count = np.unique(res_y, return_counts=True)
+
+    assert np.abs(len(res_X) - 2 * majority_num) < minority_num
+    assert np.abs(len(res_y) - 2 * majority_num) < minority_num
+    assert res_count[0] == majority_num
+    assert np.abs(res_count[0] - res_count[1]) < minority_num
+
+
 @pytest.mark.skipif(
     not _check_soft_dependencies(
         "imbalanced-learn",
diff --git a/aeon/transformations/collection/imbalance/tests/test_smote.py b/aeon/transformations/collection/imbalance/tests/test_smote.py
index 53cc95cac7..70189633d0 100644
--- a/aeon/transformations/collection/imbalance/tests/test_smote.py
+++ b/aeon/transformations/collection/imbalance/tests/test_smote.py
@@ -8,6 +8,31 @@
 from aeon.utils.validation._dependencies import _check_soft_dependencies
 
 
+def test_smote():
+    """Test the SMOTE class.
+
+    This function creates a 3D numpy array, applies
+    SMOTE using the SMOTE class, and asserts that the
+    transformed data has a balanced number of samples.
+    """
+    n_samples = 100  # Total number of labels
+    majority_num = 90  # number of majority class
+    minority_num = n_samples - majority_num  # number of minority class
+
+    X = np.random.rand(n_samples, 1, 10)
+    y = np.array([0] * majority_num + [1] * minority_num)
+
+    transformer = SMOTE()
+    transformer.fit(X, y)
+    res_X, res_y = transformer.transform(X, y)
+    _, res_count = np.unique(res_y, return_counts=True)
+
+    assert len(res_X) == 2 * majority_num
+    assert len(res_y) == 2 * majority_num
+    assert res_count[0] == majority_num
+    assert res_count[1] == majority_num
+
+
 @pytest.mark.skipif(
     not _check_soft_dependencies(
         "imbalanced-learn",
diff --git a/aeon/transformations/collection/tests/test_imbalance.py b/aeon/transformations/collection/tests/test_imbalance.py
deleted file mode 100644
index f56df6fcfe..0000000000
--- a/aeon/transformations/collection/tests/test_imbalance.py
+++ /dev/null
@@ -1,60 +0,0 @@
-"""Tests for the rebalancer transformers."""
-
-import numpy as np
-import pytest
-
-from aeon.transformations.collection.imbalance import SMOTE, ADASYN
-
-
-def test_smote():
-    """Test the SMOTE class.
-
-    This function creates a 3D numpy array, applies
-    SMOTE using the SMOTE class, and asserts that the
-    transformed data has a balanced number of samples.
-    """
-    n_samples = 100  # Total number of labels
-    majority_num = 90  # number of majority class
-    minority_num = n_samples - majority_num  # number of minority class
-
-    X = np.random.rand(n_samples, 1, 10)
-    y = np.array([0] * majority_num + [1] * minority_num)
-
-    transformer = SMOTE()
-    transformer.fit(X, y)
-    res_X, res_y = transformer.transform(X, y)
-    _, res_count = np.unique(res_y, return_counts=True)
-
-    assert len(res_X) == 2 * majority_num
-    assert len(res_y) == 2 * majority_num
-    assert res_count[0] == majority_num
-    assert res_count[1] == majority_num
-
-
-def test_adasyn():
-    """Test the ADASYN class.
-
-    This function creates a 3D numpy array, applies
-    ADASYN using the ADASYN class, and asserts that the
-    transformed data has a balanced number of samples.
-    ADASYN is a variant of SMOTE that generates synthetic samples,
-    but it focuses on generating samples near the decision boundary.
-    Therefore, sometimes, it may generate more or less samples than SMOTE,
-    which is why we only check if the number of samples is nearly balanced.
-    """
-    n_samples = 100  # Total number of labels
-    majority_num = 90  # number of majority class
-    minority_num = n_samples - majority_num  # number of minority class
-
-    X = np.random.rand(n_samples, 1, 10)
-    y = np.array([0] * majority_num + [1] * minority_num)
-
-    transformer = ADASYN()
-    transformer.fit(X, y)
-    res_X, res_y = transformer.transform(X, y)
-    _, res_count = np.unique(res_y, return_counts=True)
-
-    assert np.abs(len(res_X) - 2 * majority_num) < minority_num
-    assert np.abs(len(res_y) - 2 * majority_num) < minority_num
-    assert res_count[0] == majority_num
-    assert np.abs(res_count[0] - res_count[1]) < minority_num

From 6e24ef0c5327eabd284fa988095b92ae979b0ffe Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Fri, 24 Jan 2025 11:29:58 +0000
Subject: [PATCH 07/19] format

---
 aeon/transformations/collection/imbalance/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aeon/transformations/collection/imbalance/__init__.py b/aeon/transformations/collection/imbalance/__init__.py
index 280251ad04..38441e9e9f 100644
--- a/aeon/transformations/collection/imbalance/__init__.py
+++ b/aeon/transformations/collection/imbalance/__init__.py
@@ -1,6 +1,6 @@
 """Supervised transformers to rebalance colelctions of time series."""
 
-__all__ = ["SMOTE", "ADASYN"]
+__all__ = ["ADASYN", "SMOTE"]
 
-from aeon.transformations.collection.imbalance._smote import SMOTE
 from aeon.transformations.collection.imbalance._adasyn import ADASYN
+from aeon.transformations.collection.imbalance._smote import SMOTE

From c73111755a727ecdf804010b85a8f6295322f13b Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Fri, 24 Jan 2025 11:50:54 +0000
Subject: [PATCH 08/19] import

---
 .../collection/imbalance/_adasyn.py           | 17 ++---------
 .../collection/imbalance/_smote.py            | 29 ++++++-------------
 2 files changed, 12 insertions(+), 34 deletions(-)

diff --git a/aeon/transformations/collection/imbalance/_adasyn.py b/aeon/transformations/collection/imbalance/_adasyn.py
index 0d78637f86..412007009d 100644
--- a/aeon/transformations/collection/imbalance/_adasyn.py
+++ b/aeon/transformations/collection/imbalance/_adasyn.py
@@ -1,10 +1,9 @@
 """ADASYN over sampling algorithm."""
 
 import numpy as np
-from scipy import sparse
 from sklearn.utils import check_random_state
 
-from aeon.transformations.collection.imbalance import SMOTE
+from aeon.transformations.collection.imbalance._smote import SMOTE
 
 __maintainer__ = ["TonyBagnall"]
 __all__ = ["ADASYN"]
@@ -75,23 +74,13 @@ def _transform(self, X, y=None):
             cols = random_state.choice(n_neighbors, size=n_samples)
             diffs = X_class[nns[rows, cols]] - X_class[rows]
             steps = random_state.uniform(size=(n_samples, 1))
-
-            if sparse.issparse(X):
-                sparse_func = type(X).__name__
-                steps = getattr(sparse, sparse_func)(steps)
-                X_new = X_class[rows] + steps.multiply(diffs)
-            else:
-                X_new = X_class[rows] + steps * diffs
+            X_new = X_class[rows] + steps * diffs
 
             X_new = X_new.astype(X.dtype)
             y_new = np.full(n_samples, fill_value=class_sample, dtype=y.dtype)
             X_resampled.append(X_new)
             y_resampled.append(y_new)
-
-        if sparse.issparse(X):
-            X_resampled = sparse.vstack(X_resampled, format=X.format)
-        else:
-            X_resampled = np.vstack(X_resampled)
+        X_resampled = np.vstack(X_resampled)
         y_resampled = np.hstack(y_resampled)
 
         X_resampled = X_resampled[:, np.newaxis, :]
diff --git a/aeon/transformations/collection/imbalance/_smote.py b/aeon/transformations/collection/imbalance/_smote.py
index f56e6f7b40..f8b7084e5e 100644
--- a/aeon/transformations/collection/imbalance/_smote.py
+++ b/aeon/transformations/collection/imbalance/_smote.py
@@ -12,7 +12,6 @@
 from collections import OrderedDict
 
 import numpy as np
-from scipy import sparse
 from sklearn.neighbors import NearestNeighbors
 from sklearn.utils import check_random_state
 
@@ -33,10 +32,10 @@ class SMOTE(BaseCollectionTransformer):
 
     Parameters
     ----------
-    k_neighbors : int or object, default=5
-        The nearest neighbors used to define the neighborhood of samples to use
-        to generate the synthetic samples. `~sklearn.neighbors.NearestNeighbors`
-        instance will be fitted in this case.
+    k_neighbors : int, default=5
+        The number  of nearest neighbors used to define the neighborhood of samples
+        to use to generate the synthetic time series.
+        `~sklearn.neighbors.NearestNeighbors` instance will be fitted in this case.
     random_state : int, RandomState instance or None, default=None
         If `int`, random_state is the seed used by the random number generator;
         If `RandomState` instance, random_state is the random number generator;
@@ -102,11 +101,7 @@ def _transform(self, X, y=None):
             )
             X_resampled.append(X_new)
             y_resampled.append(y_new)
-
-        if sparse.issparse(X):
-            X_resampled = sparse.vstack(X_resampled, format=X.format)
-        else:
-            X_resampled = np.vstack(X_resampled)
+        X_resampled = np.vstack(X_resampled)
         y_resampled = np.hstack(y_resampled)
         X_resampled = X_resampled[:, np.newaxis, :]
         return X_resampled, y_resampled
@@ -118,8 +113,9 @@ def _make_samples(
 
         Parameters
         ----------
-        X : {array-like, sparse matrix} of shape (n_samples, n_features)
-            Points from which the points will be created.
+        X : np.ndarray
+            Shape (n_cases, n_timepoints), time series from which the new series will
+            be created.
 
         y_dtype : dtype
             The data type of the targets.
@@ -222,12 +218,5 @@ def _generate_samples(
             diffs[mask_pair_samples] *= random_state.uniform(
                 low=0.0, high=0.5, size=(mask_pair_samples.sum(), 1)
             )
-
-        if sparse.issparse(X):
-            sparse_func = type(X).__name__
-            steps = getattr(sparse, sparse_func)(steps)
-            X_new = X[rows] + steps.multiply(diffs)
-        else:
-            X_new = X[rows] + steps * diffs
-
+        X_new = X[rows] + steps * diffs
         return X_new.astype(X.dtype)

From 770ea7515b35ac0a7fe104081e69eab6fae9ba50 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Fri, 24 Jan 2025 13:09:14 +0000
Subject: [PATCH 09/19] add test parameters

---
 .../_yield_estimator_checks.py                |  5 ++-
 .../collection/imbalance/_smote.py            | 38 +++++++++++++++----
 2 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/aeon/testing/estimator_checking/_yield_estimator_checks.py b/aeon/testing/estimator_checking/_yield_estimator_checks.py
index 70f714d4d9..b90e15df68 100644
--- a/aeon/testing/estimator_checking/_yield_estimator_checks.py
+++ b/aeon/testing/estimator_checking/_yield_estimator_checks.py
@@ -637,7 +637,10 @@ def check_persistence_via_pickle(estimator, datatype):
 def check_fit_deterministic(estimator, datatype):
     """Test that fit is deterministic.
 
-    Check that calling fit twice is equivalent to calling it once.
+    Check that calling fit twice is equivalent to calling it once, in terms of the
+    output of non-state changing methods such as predict and transform. Calls
+    fit, then calls all non-state changing methods, then calls fit and non-state
+    changing methods again, checking the output is the same.
     """
     estimator = _clone_estimator(estimator, random_state=0)
     _run_estimator_method(estimator, "fit", datatype, "train")
diff --git a/aeon/transformations/collection/imbalance/_smote.py b/aeon/transformations/collection/imbalance/_smote.py
index f8b7084e5e..ee00c78174 100644
--- a/aeon/transformations/collection/imbalance/_smote.py
+++ b/aeon/transformations/collection/imbalance/_smote.py
@@ -54,8 +54,6 @@ class SMOTE(BaseCollectionTransformer):
     """
 
     _tags = {
-        "capability:multivariate": False,
-        "capability:unequal_length": False,
         "requires_y": True,
     }
 
@@ -143,11 +141,11 @@ def _make_samples(
 
         Returns
         -------
-        X_new : {ndarray, sparse matrix} of shape (n_samples_new, n_features)
-            Synthetically generated samples.
+        X_new : ndarray
+            Synthetically generated samples of shape (n_samples_new, n_timepoints).
 
-        y_new : ndarray of shape (n_samples_new,)
-            Target values for synthetic samples.
+        y_new : ndarray
+            Target values for synthetic samples of shape (n_samples_new,).
         """
         random_state = check_random_state(self.random_state)
         samples_indices = random_state.randint(low=0, high=nn_num.size, size=n_samples)
@@ -178,8 +176,9 @@ def _generate_samples(
 
         Parameters
         ----------
-        X : {array-like, sparse matrix} of shape (n_samples, n_features)
-            Points from which the points will be created.
+        X : np.ndarray
+            Series from which the points will be created of shape (n_cases,
+            n_timepoints).
 
         nn_data : ndarray of shape (n_samples_all, n_features)
             Data set carrying all the neighbours to be used.
@@ -220,3 +219,26 @@ def _generate_samples(
             )
         X_new = X[rows] + steps * diffs
         return X_new.astype(X.dtype)
+
+    @classmethod
+    def _get_test_params(cls, parameter_set="default"):
+        """Return testing parameter settings for the estimator.
+
+        Parameters
+        ----------
+        parameter_set : str, default="default"
+            Name of the set of test parameters to return, for use in tests. If no
+            special parameters are defined for a value, will return `"default"` set.
+            ClassifierChannelEnsemble provides the following special sets:
+            - "results_comparison" - used in some classifiers to compare against
+              previously generated results where the default set of parameters
+              cannot produce suitable probability estimates
+
+        Returns
+        -------
+        params : dict or list of dict, default={}
+            Parameters to create testing instances of the class.
+            Each dict are parameters to construct an "interesting" test instance, i.e.,
+            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
+        """
+        return {"k_neighbors": 1}

From 2337f67c04c849834257160f20dc08e70451abb1 Mon Sep 17 00:00:00 2001
From: Chuanhang Qiu <80885865+LinGinQiu@users.noreply.github.com>
Date: Thu, 15 May 2025 20:25:13 +0100
Subject: [PATCH 10/19] Ported OHIT (#2573)

---
 .../collection/imbalance/__init__.py          |   3 +-
 .../collection/imbalance/_ohit.py             | 256 ++++++++++++++++++
 .../collection/imbalance/tests/test_ohit.py   |  31 +++
 3 files changed, 289 insertions(+), 1 deletion(-)
 create mode 100644 aeon/transformations/collection/imbalance/_ohit.py
 create mode 100644 aeon/transformations/collection/imbalance/tests/test_ohit.py

diff --git a/aeon/transformations/collection/imbalance/__init__.py b/aeon/transformations/collection/imbalance/__init__.py
index 38441e9e9f..2431c4c363 100644
--- a/aeon/transformations/collection/imbalance/__init__.py
+++ b/aeon/transformations/collection/imbalance/__init__.py
@@ -1,6 +1,7 @@
 """Supervised transformers to rebalance colelctions of time series."""
 
-__all__ = ["ADASYN", "SMOTE"]
+__all__ = ["ADASYN", "SMOTE", "OHIT"]
 
 from aeon.transformations.collection.imbalance._adasyn import ADASYN
 from aeon.transformations.collection.imbalance._smote import SMOTE
+from aeon.transformations.collection.imbalance._ohit import OHIT
\ No newline at end of file
diff --git a/aeon/transformations/collection/imbalance/_ohit.py b/aeon/transformations/collection/imbalance/_ohit.py
new file mode 100644
index 0000000000..94bb86eedf
--- /dev/null
+++ b/aeon/transformations/collection/imbalance/_ohit.py
@@ -0,0 +1,256 @@
+"""OHIT over sampling algorithm.
+
+An adaptation of the oversampling method based on DRSNN clustering.
+
+Original authors:
+#          zhutuanfei
+"""
+
+from collections import OrderedDict
+
+import numpy as np
+from scipy.stats import multivariate_normal
+from sklearn.utils import check_random_state
+from aeon.transformations.collection import BaseCollectionTransformer
+from sklearn.neighbors import NearestNeighbors
+from sklearn.covariance import ledoit_wolf
+
+
+__all__ = ["OHIT"]
+
+
+class OHIT(BaseCollectionTransformer):
+    """
+    Over-sampling using the Over-sampling based on (OHIT).
+
+    This method is based on Density-Ratio Shared Nearest Neighbor (DRSNN) clustering to find high-density regions
+    of minority class samples and generate synthetic samples within these clusters and Shrinkage estimation of
+    large-dimensional covariance matrix
+
+    DRSNN also contains three parameters(i.e.,drT ,k and kapa),it is capable of selecting the proper value for
+    drT around 1.In addition,k and kapa can be set in a complementary way to avoid the merging and
+    dissociation of clusters,that is,a large k with a relatively low kapa.
+    Parameters
+    ----------
+    k : int, the nearest neighbor parameter in SNN similarity
+        if None, set k = int(np.ceil(n ** 0.5 * 1.25)) where n is the number of minority samples
+    kapa : int, the nearest neighbor parameter in defining density ratio
+        if None, set kapa = int(np.ceil(n ** 0.5)) where n is the number of minority samples
+    drT : float, default=0.9, the threshold of density ratio.
+    distance : str or callable, default='euclidean'
+        Distance metric to use for KNN in SNN similarity.
+    random_state : int, RandomState instance or None, default=None
+        If `int`, random_state is the seed used by the random number generator;
+        If `RandomState` instance, random_state is the random number generator;
+        If `None`, the random number generator is the `RandomState` instance used
+        by `np.random`.
+    """
+
+    _tags = {
+        "requires_y": True,
+    }
+
+    def __init__(self, k=None, kapa=None, drT=0.9, distance='euclidean' ,random_state=None):
+        self.k = k
+        self.kapa = kapa
+        self.drT = drT
+        self.distance = distance
+        self.random_state = random_state
+        super().__init__()
+
+    def _fit(self, X, y=None):
+
+        unique, counts = np.unique(y, return_counts=True)
+        target_stats = dict(zip(unique, counts))
+        n_sample_majority = max(target_stats.values())
+        class_majority = max(target_stats, key=target_stats.get)
+        sampling_strategy = {
+            key: n_sample_majority - value
+            for (key, value) in target_stats.items()
+            if key != class_majority
+        }
+        self.sampling_strategy_ = OrderedDict(sorted(sampling_strategy.items()))
+
+        return self
+
+    def _transform(self, X, y=None):
+        X = np.squeeze(X, axis=1)
+        X_resampled = [X.copy()]
+        y_resampled = [y.copy()]
+
+        for class_sample, n_samples in self.sampling_strategy_.items():
+            if n_samples == 0:
+                continue
+            target_class_indices = np.flatnonzero(y == class_sample)
+            if len(target_class_indices) == 1:
+                X_new = np.tile(X[target_class_indices], (n_samples, 1))
+                y_new = np.full(n_samples, fill_value=class_sample, dtype=y.dtype)
+                X_resampled.append(X_new)
+                y_resampled.append(y_new)
+                continue
+            X_class = X[target_class_indices]
+            n, m = X_class.shape
+            # set the default value of k and kapa
+            if self.k is None:
+                self.k = int(np.ceil(n ** 0.5 * 1.25))
+            if self.kapa is None:
+                self.kapa = int(np.ceil(n ** 0.5))
+
+            # Initialize NearestNeighbors for SNN similarity
+            self.NearestNeighbors = NearestNeighbors(metric=self.distance, n_neighbors=self.k + 1)
+
+            clusters, cluster_label = self._cluster_minority(X_class)
+            Me, eigen_matrices, eigen_values = self._covStruct(X_class, clusters)
+
+            # allocate the number of synthetic samples to be generated for each cluster
+            random_state = check_random_state(self.random_state)
+            os_ind = np.tile(np.arange(0, n), int(np.floor(n_samples / n)))
+            remaining = random_state.choice(np.arange(0, n), n_samples - n * int(np.floor(n_samples / n)), replace=False)
+            os_ind = np.concatenate([os_ind, remaining])
+            R = 1.25 if len(clusters) > 1 else 1.1
+
+            """generate  the structure-preserving synthetic samples for each cluster"""
+            X_new = np.zeros((n_samples, m))
+            count = 0
+            # consider the samples in the cluster with label 0 i.e. the samples that are not clustered
+            X_class_0 = X_class[cluster_label == 0]
+            if X_class_0.size != 0:
+                gen_0 = np.sum(np.isin(os_ind, np.where(cluster_label == 0)[0]))
+                idx_0 = random_state.choice(len(X_class_0), gen_0, replace=True)
+                X_new[count:count + gen_0, :] = X_class_0[idx_0]
+                count += gen_0
+            for i, cluster in enumerate(clusters):
+                gen_i = np.sum(np.isin(os_ind, np.where(cluster_label == (i + 1))[0]))
+                X_new[count:count + gen_i, :] = self._generate_synthetic_samples(
+                                                Me[i], eigen_matrices[i], eigen_values[i], gen_i, R)
+                count += gen_i
+
+            assert count == n_samples
+            X_resampled.append(X_new)
+            y_new = np.full(n_samples, fill_value=class_sample, dtype=y.dtype)
+            y_resampled.append(y_new)
+
+        X_resampled = np.vstack(X_resampled)
+        y_resampled = np.hstack(y_resampled)
+        X_resampled = X_resampled[:, np.newaxis, :]
+        return X_resampled, y_resampled
+
+    def _cluster_minority(self, X):
+        """Apply DRSNN clustering on minority class samples."""
+        n = X.shape[0]
+        k = self.k
+        kapa = self.kapa
+        drT = self.drT
+
+        self.NearestNeighbors.fit(X)
+        neighbors = self.NearestNeighbors.kneighbors(X, return_distance=False)[:,1:]
+        """ construct the shared nearest neighbor similarity """
+        strength = np.zeros((n, n))
+        for i in range(n):
+            for j in range(i + 1, n):
+                shared_nn = np.intersect1d(neighbors[i, :k], neighbors[j, :k])
+                strength[i, j] = strength[j, i] = np.sum((k + 1 - np.searchsorted(neighbors[i, :k], shared_nn)) *
+                                                   (k + 1 - np.searchsorted(neighbors[j, :k], shared_nn)))
+
+        """ construct the shared nearest neighbor graph """
+        strength_nn = np.sort(strength, axis=1)[:, ::-1][:, :k]
+        idx_nn = np.argsort(strength, axis=1)[:, ::-1]
+        graph = np.zeros((n, k))
+        for i in range(n):
+            for j in range(k):
+                if np.any(idx_nn[idx_nn[i, j], :k] == i):
+                    graph[i, j] = 1
+
+        density = np.sum(strength_nn * graph, axis=1)
+        density_ratio = np.zeros(n)
+        for i in range(n):
+            non_noise = np.where(density[idx_nn[i, :kapa]] != 0)[0]
+            if non_noise.size == 0:
+                density_ratio[i] = 0
+            else:
+                density_ratio[i] = density[i] / np.mean(density[idx_nn[i, non_noise]])
+
+        """ identify core points """
+        core_idx = np.where(density_ratio > drT)[0]
+        """ find directly density-reachable samples for each core point"""
+        neighborhood = {core: set(idx_nn[core, :kapa]) for core in core_idx}
+        for i in core_idx:
+            for j in core_idx:
+                if np.any(idx_nn[j, :kapa] == i):
+                    neighborhood[i].add(j)
+        neighborhood = {key: list(value) for key, value in neighborhood.items()}
+
+        clusters = []
+        cluster_label = np.zeros(len(neighbors), dtype=int)
+        cluster_id = 0
+
+        for i in core_idx:
+            if cluster_label[i] == 0:
+                cluster_id += 1
+                seed = [i]
+                clusters.append(set(seed))
+                while seed:
+                    point = seed.pop(0)
+                    idx = np.where(core_idx == point)[0]
+                    if idx.size > 0 and cluster_label[point] == 0:
+                        seed.extend(neighborhood[point])
+                        clusters[-1].update(neighborhood[point])
+                    cluster_label[point] = cluster_id
+        # no cluster has been found, the whole samples are taken as one cluster
+        if len(clusters) == 0:
+            clusters.append(list(range(n)))
+            cluster_label = np.ones(n, dtype=int)
+        return clusters, cluster_label
+
+    def _covStruct(self, data, clusters):
+        """
+        Calculate the covariance matrix of the minority samples.
+        """
+        Me, Eigen_matrices, Eigen_values = [], [], []
+        for cluster in clusters:
+            cluster = list(cluster)
+            cluster_data = data[cluster]
+            sigma, shrinkage = ledoit_wolf(cluster_data)
+            me = np.mean(cluster_data, axis=0)
+            eigenValues, eigenVectors = np.linalg.eigh(sigma)
+            eigenValues = np.diag((eigenValues))
+            Me.append(me)
+            Eigen_matrices.append(eigenVectors)
+            Eigen_values.append(eigenValues)
+        return Me, Eigen_matrices, Eigen_values
+
+    def _generate_synthetic_samples(self, Me, eigenMatrix, eigenValue, eta, R):
+        """Generate synthetic samples based on clustered minority samples."""
+        # Initialize the output sample generator and probability arrays
+        n_samples = int(np.ceil(eta * R))
+        SampGen = np.zeros((n_samples, len(Me)))
+        Prob = np.zeros(n_samples)
+
+        # Calculate the square root of the absolute eigenvalues
+        DD = np.sqrt(np.abs(np.diag(eigenValue)))
+        DD = DD.reshape(1, -1)
+
+        # Initialize mean and covariance for the multivariate normal distribution
+        Mu = np.zeros(len(Me))
+        Sigma = np.eye(len(Me))
+
+        for cnt in range(n_samples):
+            # Generate a sample from the multivariate normal distribution
+            S = np.random.multivariate_normal(Mu, Sigma, 1)
+            Prob[cnt] = multivariate_normal.pdf(S, Mu, Sigma)
+
+            # Scale the sample with the eigenvalues
+            S = S * DD
+            # Generate the final sample by applying the eigenvector matrix
+            x = S @ eigenMatrix.T + Me
+            SampGen[cnt, :] = x
+
+        # Sort the samples based on the probability in descending order
+        sorted_indices = np.argsort(Prob)[::-1]
+        SampGen = SampGen[sorted_indices[:eta], :]
+
+        return SampGen
+
+    @classmethod
+    def _get_test_params(cls, parameter_set="default"):
+        return {"n_clusters": 3}
\ No newline at end of file
diff --git a/aeon/transformations/collection/imbalance/tests/test_ohit.py b/aeon/transformations/collection/imbalance/tests/test_ohit.py
new file mode 100644
index 0000000000..58d0794ba6
--- /dev/null
+++ b/aeon/transformations/collection/imbalance/tests/test_ohit.py
@@ -0,0 +1,31 @@
+"""Test function for OHIT."""
+
+import numpy as np
+import pytest
+from aeon.transformations.collection.imbalance import OHIT
+
+
+def test_ohit():
+    """Test the OHIT class.
+
+    This function creates a 3D numpy array, applies
+    OHIT using the OHIT class, and asserts that the
+    transformed data has a balanced number of samples.
+    """
+    n_samples = 100  # Total number of labels
+    majority_num = 90  # number of majority class
+    minority_num = n_samples - majority_num  # number of minority class
+
+    X = np.random.rand(n_samples, 1, 10)
+    y = np.array([0] * majority_num + [1] * minority_num)
+
+    transformer = OHIT()
+    transformer.fit(X, y)
+    res_X, res_y = transformer.transform(X, y)
+    _, res_count = np.unique(res_y, return_counts=True)
+
+    assert len(res_X) == 2 * majority_num
+    assert len(res_y) == 2 * majority_num
+    assert res_count[0] == majority_num
+    assert res_count[1] == majority_num
+

From eed16110aa728f32ca44a3048f6b4220c36314a5 Mon Sep 17 00:00:00 2001
From: MatthewMiddlehurst
 <25731235+MatthewMiddlehurst@users.noreply.github.com>
Date: Thu, 15 May 2025 19:26:35 +0000
Subject: [PATCH 11/19] Automatic `pre-commit` fixes

---
 .../collection/imbalance/__init__.py          |  2 +-
 .../collection/imbalance/_ohit.py             | 44 ++++++++++++-------
 .../collection/imbalance/tests/test_ohit.py   |  2 +-
 3 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/aeon/transformations/collection/imbalance/__init__.py b/aeon/transformations/collection/imbalance/__init__.py
index 2431c4c363..d6ee723069 100644
--- a/aeon/transformations/collection/imbalance/__init__.py
+++ b/aeon/transformations/collection/imbalance/__init__.py
@@ -3,5 +3,5 @@
 __all__ = ["ADASYN", "SMOTE", "OHIT"]
 
 from aeon.transformations.collection.imbalance._adasyn import ADASYN
+from aeon.transformations.collection.imbalance._ohit import OHIT
 from aeon.transformations.collection.imbalance._smote import SMOTE
-from aeon.transformations.collection.imbalance._ohit import OHIT
\ No newline at end of file
diff --git a/aeon/transformations/collection/imbalance/_ohit.py b/aeon/transformations/collection/imbalance/_ohit.py
index 94bb86eedf..cde354e275 100644
--- a/aeon/transformations/collection/imbalance/_ohit.py
+++ b/aeon/transformations/collection/imbalance/_ohit.py
@@ -10,11 +10,11 @@
 
 import numpy as np
 from scipy.stats import multivariate_normal
-from sklearn.utils import check_random_state
-from aeon.transformations.collection import BaseCollectionTransformer
-from sklearn.neighbors import NearestNeighbors
 from sklearn.covariance import ledoit_wolf
+from sklearn.neighbors import NearestNeighbors
+from sklearn.utils import check_random_state
 
+from aeon.transformations.collection import BaseCollectionTransformer
 
 __all__ = ["OHIT"]
 
@@ -30,6 +30,7 @@ class OHIT(BaseCollectionTransformer):
     DRSNN also contains three parameters(i.e.,drT ,k and kapa),it is capable of selecting the proper value for
     drT around 1.In addition,k and kapa can be set in a complementary way to avoid the merging and
     dissociation of clusters,that is,a large k with a relatively low kapa.
+
     Parameters
     ----------
     k : int, the nearest neighbor parameter in SNN similarity
@@ -50,7 +51,9 @@ class OHIT(BaseCollectionTransformer):
         "requires_y": True,
     }
 
-    def __init__(self, k=None, kapa=None, drT=0.9, distance='euclidean' ,random_state=None):
+    def __init__(
+        self, k=None, kapa=None, drT=0.9, distance="euclidean", random_state=None
+    ):
         self.k = k
         self.kapa = kapa
         self.drT = drT
@@ -92,12 +95,14 @@ def _transform(self, X, y=None):
             n, m = X_class.shape
             # set the default value of k and kapa
             if self.k is None:
-                self.k = int(np.ceil(n ** 0.5 * 1.25))
+                self.k = int(np.ceil(n**0.5 * 1.25))
             if self.kapa is None:
-                self.kapa = int(np.ceil(n ** 0.5))
+                self.kapa = int(np.ceil(n**0.5))
 
             # Initialize NearestNeighbors for SNN similarity
-            self.NearestNeighbors = NearestNeighbors(metric=self.distance, n_neighbors=self.k + 1)
+            self.NearestNeighbors = NearestNeighbors(
+                metric=self.distance, n_neighbors=self.k + 1
+            )
 
             clusters, cluster_label = self._cluster_minority(X_class)
             Me, eigen_matrices, eigen_values = self._covStruct(X_class, clusters)
@@ -105,7 +110,11 @@ def _transform(self, X, y=None):
             # allocate the number of synthetic samples to be generated for each cluster
             random_state = check_random_state(self.random_state)
             os_ind = np.tile(np.arange(0, n), int(np.floor(n_samples / n)))
-            remaining = random_state.choice(np.arange(0, n), n_samples - n * int(np.floor(n_samples / n)), replace=False)
+            remaining = random_state.choice(
+                np.arange(0, n),
+                n_samples - n * int(np.floor(n_samples / n)),
+                replace=False,
+            )
             os_ind = np.concatenate([os_ind, remaining])
             R = 1.25 if len(clusters) > 1 else 1.1
 
@@ -117,12 +126,13 @@ def _transform(self, X, y=None):
             if X_class_0.size != 0:
                 gen_0 = np.sum(np.isin(os_ind, np.where(cluster_label == 0)[0]))
                 idx_0 = random_state.choice(len(X_class_0), gen_0, replace=True)
-                X_new[count:count + gen_0, :] = X_class_0[idx_0]
+                X_new[count : count + gen_0, :] = X_class_0[idx_0]
                 count += gen_0
             for i, cluster in enumerate(clusters):
                 gen_i = np.sum(np.isin(os_ind, np.where(cluster_label == (i + 1))[0]))
-                X_new[count:count + gen_i, :] = self._generate_synthetic_samples(
-                                                Me[i], eigen_matrices[i], eigen_values[i], gen_i, R)
+                X_new[count : count + gen_i, :] = self._generate_synthetic_samples(
+                    Me[i], eigen_matrices[i], eigen_values[i], gen_i, R
+                )
                 count += gen_i
 
             assert count == n_samples
@@ -143,14 +153,16 @@ def _cluster_minority(self, X):
         drT = self.drT
 
         self.NearestNeighbors.fit(X)
-        neighbors = self.NearestNeighbors.kneighbors(X, return_distance=False)[:,1:]
+        neighbors = self.NearestNeighbors.kneighbors(X, return_distance=False)[:, 1:]
         """ construct the shared nearest neighbor similarity """
         strength = np.zeros((n, n))
         for i in range(n):
             for j in range(i + 1, n):
                 shared_nn = np.intersect1d(neighbors[i, :k], neighbors[j, :k])
-                strength[i, j] = strength[j, i] = np.sum((k + 1 - np.searchsorted(neighbors[i, :k], shared_nn)) *
-                                                   (k + 1 - np.searchsorted(neighbors[j, :k], shared_nn)))
+                strength[i, j] = strength[j, i] = np.sum(
+                    (k + 1 - np.searchsorted(neighbors[i, :k], shared_nn))
+                    * (k + 1 - np.searchsorted(neighbors[j, :k], shared_nn))
+                )
 
         """ construct the shared nearest neighbor graph """
         strength_nn = np.sort(strength, axis=1)[:, ::-1][:, :k]
@@ -213,7 +225,7 @@ def _covStruct(self, data, clusters):
             sigma, shrinkage = ledoit_wolf(cluster_data)
             me = np.mean(cluster_data, axis=0)
             eigenValues, eigenVectors = np.linalg.eigh(sigma)
-            eigenValues = np.diag((eigenValues))
+            eigenValues = np.diag(eigenValues)
             Me.append(me)
             Eigen_matrices.append(eigenVectors)
             Eigen_values.append(eigenValues)
@@ -253,4 +265,4 @@ def _generate_synthetic_samples(self, Me, eigenMatrix, eigenValue, eta, R):
 
     @classmethod
     def _get_test_params(cls, parameter_set="default"):
-        return {"n_clusters": 3}
\ No newline at end of file
+        return {"n_clusters": 3}
diff --git a/aeon/transformations/collection/imbalance/tests/test_ohit.py b/aeon/transformations/collection/imbalance/tests/test_ohit.py
index 58d0794ba6..b7d3372c2a 100644
--- a/aeon/transformations/collection/imbalance/tests/test_ohit.py
+++ b/aeon/transformations/collection/imbalance/tests/test_ohit.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 import pytest
+
 from aeon.transformations.collection.imbalance import OHIT
 
 
@@ -28,4 +29,3 @@ def test_ohit():
     assert len(res_y) == 2 * majority_num
     assert res_count[0] == majority_num
     assert res_count[1] == majority_num
-

From c12a9c27b0c55b4365cbc93d3037376fcc12ad70 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Wed, 28 May 2025 18:24:56 +0100
Subject: [PATCH 12/19] docstrings

---
 .../collection/imbalance/_adasyn.py           | 37 ++++++++---
 .../collection/imbalance/_ohit.py             | 62 ++++++++++++-------
 .../collection/imbalance/_smote.py            | 45 +++++++++-----
 3 files changed, 96 insertions(+), 48 deletions(-)

diff --git a/aeon/transformations/collection/imbalance/_adasyn.py b/aeon/transformations/collection/imbalance/_adasyn.py
index 412007009d..15106eca69 100644
--- a/aeon/transformations/collection/imbalance/_adasyn.py
+++ b/aeon/transformations/collection/imbalance/_adasyn.py
@@ -11,17 +11,36 @@
 
 class ADASYN(SMOTE):
     """
-    Over-sampling using Adaptive Synthetic Sampling (ADASYN).
+    Adaptive Synthetic Sampling (ADASYN) over-sampler.
 
-    Adaptation of imblearn.over_sampling.ADASYN
-    original authors:
-    #          Guillaume Lemaitre <g.lemaitre58@gmail.com>
-    #          Christos Aridas
-    # License: MIT
+    Generates synthetic samples for the minority class based on local data
+    distribution. ADASYN extends SMOTE by adapting the number of synthetic samples
+    according to the density of the minority class: more samples are generated for
+    minority samples that are harder to learn (i.e., surrounded by more majority
+    samples).
 
-    This transformer extends SMOTE, but it generates different number of
-    samples depending on an estimate of the local distribution of the class
-    to be oversampled.
+    This implementation is adapted from imbalanced-learn's
+    `imblearn.over_sampling.ADASYN`.
+
+    Parameters
+    ----------
+        random_state : int or None, optional (default=None)
+            Random seed for reproducibility.
+        k_neighbors : int, optional (default=5)
+            Number of nearest neighbours used to construct synthetic samples.
+
+    References
+    ----------
+    .. [1] He, H., Bai, Y., Garcia, E. A., & Li, S. (2008).
+           ADASYN: Adaptive synthetic sampling approach for imbalanced learning.
+           In IEEE International Joint Conference on Neural Networks, pp. 1322-1328.
+           https://doi.org/10.1109/IJCNN.2008.4633969
+
+    Examples
+    --------
+    >>> from aeon.classification.sampling import ADASYN
+    >>> sampler = ADASYN(random_state=42)
+    >>> X_res, y_res = sampler.fit_resample(X, y)
     """
 
     def __init__(self, random_state=None, k_neighbors=5):
diff --git a/aeon/transformations/collection/imbalance/_ohit.py b/aeon/transformations/collection/imbalance/_ohit.py
index cde354e275..d5ff0167c7 100644
--- a/aeon/transformations/collection/imbalance/_ohit.py
+++ b/aeon/transformations/collection/imbalance/_ohit.py
@@ -21,30 +21,51 @@
 
 class OHIT(BaseCollectionTransformer):
     """
-    Over-sampling using the Over-sampling based on (OHIT).
+    ver-sampling based on High-density region and Iterative Thresholding (OHIT).
 
-    This method is based on Density-Ratio Shared Nearest Neighbor (DRSNN) clustering to find high-density regions
-    of minority class samples and generate synthetic samples within these clusters and Shrinkage estimation of
-    large-dimensional covariance matrix
+    OHIT generates synthetic minority class samples based on the Density-Ratio Shared
+    Nearest Neighbor (DRSNN) clustering algorithm. It identifies high-density regions
+    amoung the minority class using DRSNN, then produces synthetic samples within
+    these clusters. Covariance estimation for high-dimensional data is performed using
+    shrinkage techniques.
 
-    DRSNN also contains three parameters(i.e.,drT ,k and kapa),it is capable of selecting the proper value for
-    drT around 1.In addition,k and kapa can be set in a complementary way to avoid the merging and
-    dissociation of clusters,that is,a large k with a relatively low kapa.
+    The DRSNN procedure involves three main parameters:
+    - `drT`: the density ratio threshold (typically set around 1).
+    - `k`: the nearest neighbour parameter in shared nearest neighbour similarity.
+    - `kapa`: the nearest neighbour parameter in defining density ratio.
+
+    `k` and `kapa` should be set in a complementary manner to avoid cluster merging
+    and dissociation. Typically, a large `k` is paired with a relatively low `kapa`.
 
     Parameters
     ----------
-    k : int, the nearest neighbor parameter in SNN similarity
-        if None, set k = int(np.ceil(n ** 0.5 * 1.25)) where n is the number of minority samples
-    kapa : int, the nearest neighbor parameter in defining density ratio
-        if None, set kapa = int(np.ceil(n ** 0.5)) where n is the number of minority samples
-    drT : float, default=0.9, the threshold of density ratio.
+    k : int or None, optional
+        The nearest neighbour parameter for SNN similarity.
+        If None, set to int(np.ceil(n ** 0.5 * 1.25)), where n is the number of
+        minority samples.
+    kapa : int or None, optional
+        The nearest neighbour parameter for defining the density ratio.
+        If None, set to int(np.ceil(n ** 0.5)), where n is the number of minority
+        samples.
+    drT : float, default=0.9
+        Threshold for the density ratio in DRSNN clustering.
     distance : str or callable, default='euclidean'
-        Distance metric to use for KNN in SNN similarity.
+        Distance metric to use for KNN in SNN similarity computation.
     random_state : int, RandomState instance or None, default=None
-        If `int`, random_state is the seed used by the random number generator;
-        If `RandomState` instance, random_state is the random number generator;
-        If `None`, the random number generator is the `RandomState` instance used
-        by `np.random`.
+        Controls random number generation for reproducibility:
+        - If `int`, sets the random seed.
+        - If `RandomState` instance, uses it as the generator.
+        - If `None`, uses `np.random`.
+
+    References
+    ----------
+    .. [1] (Add the relevant reference for OHIT here.)
+
+    Examples
+    --------
+    >>> from aeon.classification.sampling import OHIT
+    >>> ohit = OHIT(k=10, kapa=5, drT=0.9, random_state=0)
+    >>> X_resampled, y_resampled = ohit.fit_resample(X, y)
     """
 
     _tags = {
@@ -121,14 +142,13 @@ def _transform(self, X, y=None):
             """generate  the structure-preserving synthetic samples for each cluster"""
             X_new = np.zeros((n_samples, m))
             count = 0
-            # consider the samples in the cluster with label 0 i.e. the samples that are not clustered
             X_class_0 = X_class[cluster_label == 0]
             if X_class_0.size != 0:
                 gen_0 = np.sum(np.isin(os_ind, np.where(cluster_label == 0)[0]))
                 idx_0 = random_state.choice(len(X_class_0), gen_0, replace=True)
                 X_new[count : count + gen_0, :] = X_class_0[idx_0]
                 count += gen_0
-            for i, cluster in enumerate(clusters):
+            for i, _ in enumerate(clusters):
                 gen_i = np.sum(np.isin(os_ind, np.where(cluster_label == (i + 1))[0]))
                 X_new[count : count + gen_i, :] = self._generate_synthetic_samples(
                     Me[i], eigen_matrices[i], eigen_values[i], gen_i, R
@@ -215,9 +235,7 @@ def _cluster_minority(self, X):
         return clusters, cluster_label
 
     def _covStruct(self, data, clusters):
-        """
-        Calculate the covariance matrix of the minority samples.
-        """
+        """Calculate the covariance matrix of the minority samples."""
         Me, Eigen_matrices, Eigen_values = [], [], []
         for cluster in clusters:
             cluster = list(cluster)
diff --git a/aeon/transformations/collection/imbalance/_smote.py b/aeon/transformations/collection/imbalance/_smote.py
index ee00c78174..59a4f6e8c5 100644
--- a/aeon/transformations/collection/imbalance/_smote.py
+++ b/aeon/transformations/collection/imbalance/_smote.py
@@ -23,41 +23,52 @@
 
 class SMOTE(BaseCollectionTransformer):
     """
-    Over-sampling using the Synthetic Minority Over-sampling TEchnique (SMOTE)[1]_.
+    Synthetic Minority Over-sampling TEchnique (SMOTE) for imbalanced datasets.
 
-    An adaptation of the imbalance-learn implementation of SMOTE in
-    imblearn.over_sampling.SMOTE. sampling_strategy is sampling target by
-    targeting all classes but not the majority, which is directly expressed in
-    _fit.sampling_strategy.
+    Generates synthetic samples of the minority class to address class imbalance.
+    SMOTE constructs new samples by interpolating between existing minority samples
+    and their nearest neighbours in feature space.
+
+    This implementation adapts the algorithm from `imblearn.over_sampling.SMOTE`.
+    It targets all classes except the majority, as controlled by the `sampling_strategy`
+    in the `_fit` method. It uses ``aeon`` distances to find the nearest neighbours.
 
     Parameters
     ----------
     k_neighbors : int, default=5
-        The number  of nearest neighbors used to define the neighborhood of samples
-        to use to generate the synthetic time series.
-        `~sklearn.neighbors.NearestNeighbors` instance will be fitted in this case.
+        Number of nearest neighbours used to generate synthetic samples. A
+        `sklearn.neighbors.NearestNeighbors` instance is fitted for this purpose.
     random_state : int, RandomState instance or None, default=None
-        If `int`, random_state is the seed used by the random number generator;
-        If `RandomState` instance, random_state is the random number generator;
-        If `None`, the random number generator is the `RandomState` instance used
-        by `np.random`.
+        Controls the random number generation for reproducibility:
+        - If `int`, sets the random seed.
+        - If `RandomState` instance, uses it as the generator.
+        - If `None`, uses `np.random`.
 
     See Also
     --------
-    ADASYN
+    ADASYN : Adaptive synthetic sampling extension to SMOTE.
 
     References
     ----------
-    .. [1] Chawla et al. SMOTE: synthetic minority over-sampling technique, Journal
-    of Artificial Intelligence Research 16(1): 321–357, 2002.
-        https://dl.acm.org/doi/10.5555/1622407.1622416
+    .. [1] Chawla, N. V., Bowyer, K. W., Hall, L. O., & Kegelmeyer, W. P. (2002).
+           SMOTE: Synthetic minority over-sampling technique.
+           Journal of Artificial Intelligence Research, 16, 321–357.
+           https://dl.acm.org/doi/10.5555/1622407.1622416
+
+    Examples
+    --------
+    >>> from aeon.classification.sampling import SMOTE
+    >>> from aeon.datasets import load_unit_test
+    >>> X, y = load_unit_test()
+    >>> smote = SMOTE(k_neighbors=3, random_state=0)
+    >>> X_resampled, y_resampled = smote.fit(X, y)
     """
 
     _tags = {
         "requires_y": True,
     }
 
-    def __init__(self, k_neighbors=5, random_state=None):
+    def __init__(self, k_neighbors: int = 5, random_state=None):
         self.random_state = random_state
         self.k_neighbors = k_neighbors
         super().__init__()

From f42d4b0032ec14ddc41ca2f2ae7f7aeb5bbbfa5e Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Wed, 28 May 2025 19:03:33 +0100
Subject: [PATCH 13/19] remove import

---
 aeon/transformations/collection/imbalance/tests/test_ohit.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/aeon/transformations/collection/imbalance/tests/test_ohit.py b/aeon/transformations/collection/imbalance/tests/test_ohit.py
index b7d3372c2a..7162aab894 100644
--- a/aeon/transformations/collection/imbalance/tests/test_ohit.py
+++ b/aeon/transformations/collection/imbalance/tests/test_ohit.py
@@ -1,7 +1,6 @@
 """Test function for OHIT."""
 
 import numpy as np
-import pytest
 
 from aeon.transformations.collection.imbalance import OHIT
 

From 2da6505eec1ed2b7de76c09b4d86c52be4e0cfd2 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Wed, 28 May 2025 19:09:35 +0100
Subject: [PATCH 14/19] remove incorrect test parameters

---
 aeon/transformations/collection/imbalance/_adasyn.py | 11 ++++++++++-
 aeon/transformations/collection/imbalance/_ohit.py   |  6 +-----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/aeon/transformations/collection/imbalance/_adasyn.py b/aeon/transformations/collection/imbalance/_adasyn.py
index 15106eca69..6dd81f4bdf 100644
--- a/aeon/transformations/collection/imbalance/_adasyn.py
+++ b/aeon/transformations/collection/imbalance/_adasyn.py
@@ -1,4 +1,13 @@
-"""ADASYN over sampling algorithm."""
+"""ADASYN over sampling algorithm.
+
+See more in imblearn.over_sampling.ADASYN
+original authors:
+#          Guillaume Lemaitre <g.lemaitre58@gmail.com>
+#          Fernando Nogueira
+#          Christos Aridas
+#          Dzianis Dudnik
+# License: MIT
+"""
 
 import numpy as np
 from sklearn.utils import check_random_state
diff --git a/aeon/transformations/collection/imbalance/_ohit.py b/aeon/transformations/collection/imbalance/_ohit.py
index d5ff0167c7..f87691ba65 100644
--- a/aeon/transformations/collection/imbalance/_ohit.py
+++ b/aeon/transformations/collection/imbalance/_ohit.py
@@ -21,7 +21,7 @@
 
 class OHIT(BaseCollectionTransformer):
     """
-    ver-sampling based on High-density region and Iterative Thresholding (OHIT).
+    Over-sampling based on High-density region and Iterative Thresholding (OHIT).
 
     OHIT generates synthetic minority class samples based on the Density-Ratio Shared
     Nearest Neighbor (DRSNN) clustering algorithm. It identifies high-density regions
@@ -280,7 +280,3 @@ def _generate_synthetic_samples(self, Me, eigenMatrix, eigenValue, eta, R):
         SampGen = SampGen[sorted_indices[:eta], :]
 
         return SampGen
-
-    @classmethod
-    def _get_test_params(cls, parameter_set="default"):
-        return {"n_clusters": 3}

From 4847a443c75be743bdda3342782068efa7e42732 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Wed, 28 May 2025 19:30:01 +0100
Subject: [PATCH 15/19] docstrings

---
 .../collection/imbalance/_adasyn.py               |  9 ++++++---
 .../transformations/collection/imbalance/_ohit.py | 13 ++++++++++---
 .../collection/imbalance/_smote.py                | 15 ++++++++++-----
 3 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/aeon/transformations/collection/imbalance/_adasyn.py b/aeon/transformations/collection/imbalance/_adasyn.py
index 6dd81f4bdf..fef69ad467 100644
--- a/aeon/transformations/collection/imbalance/_adasyn.py
+++ b/aeon/transformations/collection/imbalance/_adasyn.py
@@ -47,9 +47,12 @@ class ADASYN(SMOTE):
 
     Examples
     --------
-    >>> from aeon.classification.sampling import ADASYN
-    >>> sampler = ADASYN(random_state=42)
-    >>> X_res, y_res = sampler.fit_resample(X, y)
+    >>> from aeon.transformations.collection.imbalance import ADASYN
+    >>> import numpy as np
+    >>> X = np.random.random(size=(100,1,50))
+    >>> y = np.array([0] * 90 + [1] * 10)
+    >>> sampler = ADASYN(random_state=49)
+    >>> X_res, y_res = sampler.fit_transform(X, y)
     """
 
     def __init__(self, random_state=None, k_neighbors=5):
diff --git a/aeon/transformations/collection/imbalance/_ohit.py b/aeon/transformations/collection/imbalance/_ohit.py
index f87691ba65..8c901dfb62 100644
--- a/aeon/transformations/collection/imbalance/_ohit.py
+++ b/aeon/transformations/collection/imbalance/_ohit.py
@@ -63,9 +63,16 @@ class OHIT(BaseCollectionTransformer):
 
     Examples
     --------
-    >>> from aeon.classification.sampling import OHIT
-    >>> ohit = OHIT(k=10, kapa=5, drT=0.9, random_state=0)
-    >>> X_resampled, y_resampled = ohit.fit_resample(X, y)
+    >>> from aeon.transformations.collection.imbalance import OHIT
+    >>> import numpy as np
+    >>> X = np.random.random(size=(100,1,50))
+    >>> y = np.array([0] * 90 + [1] * 10)
+    >>> sampler = OHIT(random_state=49)
+    >>> X_res, y_res = sampler.fit_transform(X, y)
+    >>> np.sum(y_res == 1)
+    90
+    >>> np.sum(y_res == 0)
+    90
     """
 
     _tags = {
diff --git a/aeon/transformations/collection/imbalance/_smote.py b/aeon/transformations/collection/imbalance/_smote.py
index 59a4f6e8c5..fb5d1896a2 100644
--- a/aeon/transformations/collection/imbalance/_smote.py
+++ b/aeon/transformations/collection/imbalance/_smote.py
@@ -57,11 +57,16 @@ class SMOTE(BaseCollectionTransformer):
 
     Examples
     --------
-    >>> from aeon.classification.sampling import SMOTE
-    >>> from aeon.datasets import load_unit_test
-    >>> X, y = load_unit_test()
-    >>> smote = SMOTE(k_neighbors=3, random_state=0)
-    >>> X_resampled, y_resampled = smote.fit(X, y)
+    >>> from aeon.transformations.collection.imbalance import SMOTE
+    >>> import numpy as np
+    >>> X = np.random.random(size=(100,1,50))
+    >>> y = np.array([0] * 90 + [1] * 10)
+    >>> sampler = SMOTE(random_state=49)
+    >>> X_res, y_res = sampler.fit_transform(X, y)
+    >>> np.sum(y_res == 1)
+    90
+    >>> np.sum(y_res == 0)
+    90
     """
 
     _tags = {

From c5719dc26d29313d0e0a43cf37fb85b5a2d21a65 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Wed, 28 May 2025 20:39:01 +0100
Subject: [PATCH 16/19] examples

---
 aeon/transformations/collection/imbalance/_ohit.py  | 3 ++-
 aeon/transformations/collection/imbalance/_smote.py | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/aeon/transformations/collection/imbalance/_ohit.py b/aeon/transformations/collection/imbalance/_ohit.py
index 8c901dfb62..9d7f4e122b 100644
--- a/aeon/transformations/collection/imbalance/_ohit.py
+++ b/aeon/transformations/collection/imbalance/_ohit.py
@@ -64,8 +64,9 @@ class OHIT(BaseCollectionTransformer):
     Examples
     --------
     >>> from aeon.transformations.collection.imbalance import OHIT
+    >>> from aeon.testing.data_generation import make_example_3d_numpy
     >>> import numpy as np
-    >>> X = np.random.random(size=(100,1,50))
+    >>> X = make_example_3d_numpy(n_cases=100, return_y=False, random_state=49)
     >>> y = np.array([0] * 90 + [1] * 10)
     >>> sampler = OHIT(random_state=49)
     >>> X_res, y_res = sampler.fit_transform(X, y)
diff --git a/aeon/transformations/collection/imbalance/_smote.py b/aeon/transformations/collection/imbalance/_smote.py
index fb5d1896a2..f74f596f45 100644
--- a/aeon/transformations/collection/imbalance/_smote.py
+++ b/aeon/transformations/collection/imbalance/_smote.py
@@ -58,8 +58,9 @@ class SMOTE(BaseCollectionTransformer):
     Examples
     --------
     >>> from aeon.transformations.collection.imbalance import SMOTE
+    >>> from aeon.testing.data_generation import make_example_3d_numpy
     >>> import numpy as np
-    >>> X = np.random.random(size=(100,1,50))
+    >>> X = make_example_3d_numpy(n_cases=100, return_y=False, random_state=49)
     >>> y = np.array([0] * 90 + [1] * 10)
     >>> sampler = SMOTE(random_state=49)
     >>> X_res, y_res = sampler.fit_transform(X, y)

From 230ffdd1e21fc1872af203d8e6d052c870104d49 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Wed, 28 May 2025 20:52:06 +0100
Subject: [PATCH 17/19] examples

---
 aeon/transformations/collection/imbalance/_ohit.py  | 6 ++----
 aeon/transformations/collection/imbalance/_smote.py | 6 ++----
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/aeon/transformations/collection/imbalance/_ohit.py b/aeon/transformations/collection/imbalance/_ohit.py
index 9d7f4e122b..b300e8687b 100644
--- a/aeon/transformations/collection/imbalance/_ohit.py
+++ b/aeon/transformations/collection/imbalance/_ohit.py
@@ -70,10 +70,8 @@ class OHIT(BaseCollectionTransformer):
     >>> y = np.array([0] * 90 + [1] * 10)
     >>> sampler = OHIT(random_state=49)
     >>> X_res, y_res = sampler.fit_transform(X, y)
-    >>> np.sum(y_res == 1)
-    90
-    >>> np.sum(y_res == 0)
-    90
+    >>> y_res.shape
+    (180,)
     """
 
     _tags = {
diff --git a/aeon/transformations/collection/imbalance/_smote.py b/aeon/transformations/collection/imbalance/_smote.py
index f74f596f45..611b2bbb0e 100644
--- a/aeon/transformations/collection/imbalance/_smote.py
+++ b/aeon/transformations/collection/imbalance/_smote.py
@@ -64,10 +64,8 @@ class SMOTE(BaseCollectionTransformer):
     >>> y = np.array([0] * 90 + [1] * 10)
     >>> sampler = SMOTE(random_state=49)
     >>> X_res, y_res = sampler.fit_transform(X, y)
-    >>> np.sum(y_res == 1)
-    90
-    >>> np.sum(y_res == 0)
-    90
+    >>> y_res.shape
+    (180,)
     """
 
     _tags = {

From 15cecc20f3cf8da03220a9f6af1cea3f46265a30 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Sun, 1 Jun 2025 20:46:15 +0100
Subject: [PATCH 18/19] refactor variable name

---
 aeon/transformations/collection/imbalance/_ohit.py  | 8 +++-----
 aeon/transformations/collection/imbalance/_smote.py | 2 +-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/aeon/transformations/collection/imbalance/_ohit.py b/aeon/transformations/collection/imbalance/_ohit.py
index b300e8687b..a704838179 100644
--- a/aeon/transformations/collection/imbalance/_ohit.py
+++ b/aeon/transformations/collection/imbalance/_ohit.py
@@ -127,9 +127,7 @@ def _transform(self, X, y=None):
                 self.kapa = int(np.ceil(n**0.5))
 
             # Initialize NearestNeighbors for SNN similarity
-            self.NearestNeighbors = NearestNeighbors(
-                metric=self.distance, n_neighbors=self.k + 1
-            )
+            self.nn_ = NearestNeighbors(metric=self.distance, n_neighbors=self.k + 1)
 
             clusters, cluster_label = self._cluster_minority(X_class)
             Me, eigen_matrices, eigen_values = self._covStruct(X_class, clusters)
@@ -178,8 +176,8 @@ def _cluster_minority(self, X):
         kapa = self.kapa
         drT = self.drT
 
-        self.NearestNeighbors.fit(X)
-        neighbors = self.NearestNeighbors.kneighbors(X, return_distance=False)[:, 1:]
+        self.nn_.fit(X)
+        neighbors = self.nn_.kneighbors(X, return_distance=False)[:, 1:]
         """ construct the shared nearest neighbor similarity """
         strength = np.zeros((n, n))
         for i in range(n):
diff --git a/aeon/transformations/collection/imbalance/_smote.py b/aeon/transformations/collection/imbalance/_smote.py
index 611b2bbb0e..63ada8a23b 100644
--- a/aeon/transformations/collection/imbalance/_smote.py
+++ b/aeon/transformations/collection/imbalance/_smote.py
@@ -226,7 +226,7 @@ def _generate_samples(
             Synthetically generated samples.
         """
         diffs = nn_data[nn_num[rows, cols]] - X[rows]
-        if y is not None:  # only entering for BorderlineSMOTE-2
+        if y is not None:
             random_state = check_random_state(self.random_state)
             mask_pair_samples = y[nn_num[rows, cols]] != y_type
             diffs[mask_pair_samples] *= random_state.uniform(

From 5b5ce9cee03a93a0ae05ff8b1232c49452a24607 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Mon, 2 Jun 2025 19:42:50 +0100
Subject: [PATCH 19/19] format comments and reference

---
 aeon/transformations/collection/imbalance/_ohit.py | 14 ++++++++------
 .../transformations/collection/imbalance/_smote.py |  7 -------
 2 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/aeon/transformations/collection/imbalance/_ohit.py b/aeon/transformations/collection/imbalance/_ohit.py
index a704838179..4d154a2d13 100644
--- a/aeon/transformations/collection/imbalance/_ohit.py
+++ b/aeon/transformations/collection/imbalance/_ohit.py
@@ -59,7 +59,9 @@ class OHIT(BaseCollectionTransformer):
 
     References
     ----------
-    .. [1] (Add the relevant reference for OHIT here.)
+    .. [1] T. Zhu, C. Luo, Z. Zhang, J. Li, S. Ren, and Y. Zeng. Minority
+    oversampling for imbalanced time series classification. Knowledge-Based Systems,
+    247:108764, 2022.
 
     Examples
     --------
@@ -143,7 +145,7 @@ def _transform(self, X, y=None):
             os_ind = np.concatenate([os_ind, remaining])
             R = 1.25 if len(clusters) > 1 else 1.1
 
-            """generate  the structure-preserving synthetic samples for each cluster"""
+            # generate  the structure-preserving synthetic samples for each cluster
             X_new = np.zeros((n_samples, m))
             count = 0
             X_class_0 = X_class[cluster_label == 0]
@@ -178,7 +180,7 @@ def _cluster_minority(self, X):
 
         self.nn_.fit(X)
         neighbors = self.nn_.kneighbors(X, return_distance=False)[:, 1:]
-        """ construct the shared nearest neighbor similarity """
+        # construct the shared nearest neighbor similarity
         strength = np.zeros((n, n))
         for i in range(n):
             for j in range(i + 1, n):
@@ -188,7 +190,7 @@ def _cluster_minority(self, X):
                     * (k + 1 - np.searchsorted(neighbors[j, :k], shared_nn))
                 )
 
-        """ construct the shared nearest neighbor graph """
+        # construct the shared nearest neighbor graph
         strength_nn = np.sort(strength, axis=1)[:, ::-1][:, :k]
         idx_nn = np.argsort(strength, axis=1)[:, ::-1]
         graph = np.zeros((n, k))
@@ -206,9 +208,9 @@ def _cluster_minority(self, X):
             else:
                 density_ratio[i] = density[i] / np.mean(density[idx_nn[i, non_noise]])
 
-        """ identify core points """
+        # identify core points
         core_idx = np.where(density_ratio > drT)[0]
-        """ find directly density-reachable samples for each core point"""
+        # find directly density-reachable samples for each core point
         neighborhood = {core: set(idx_nn[core, :kapa]) for core in core_idx}
         for i in core_idx:
             for j in core_idx:
diff --git a/aeon/transformations/collection/imbalance/_smote.py b/aeon/transformations/collection/imbalance/_smote.py
index 63ada8a23b..f6e7062f2f 100644
--- a/aeon/transformations/collection/imbalance/_smote.py
+++ b/aeon/transformations/collection/imbalance/_smote.py
@@ -194,28 +194,21 @@ def _generate_samples(
         X : np.ndarray
             Series from which the points will be created of shape (n_cases,
             n_timepoints).
-
         nn_data : ndarray of shape (n_samples_all, n_features)
             Data set carrying all the neighbours to be used.
-
         nn_num : ndarray of shape (n_samples_all, k_nearest_neighbours)
             The nearest neighbours of each sample in `nn_data`.
-
         rows : ndarray of shape (n_samples,), dtype=int
             Indices pointing at feature vector in X which will be used
             as a base for creating new samples.
-
         cols : ndarray of shape (n_samples,), dtype=int
             Indices pointing at which nearest neighbor of base feature vector
             will be used when creating new samples.
-
         steps : ndarray of shape (n_samples,), dtype=float
             Step sizes for new samples.
-
         y_type : str, int or None, default=None
             Class label of the current target classes for which we want to generate
             samples.
-
         y : ndarray of shape (n_samples_all,), default=None
             The true target associated with `nn_data`. Used by Borderline SMOTE-2 to
             weight the distances in the sample generation process.