From 374341dbdcdbc862a1d6f21ec4cafbc5122037e1 Mon Sep 17 00:00:00 2001
From: Aryan <aryanpola1603@gmail.com>
Date: Sun, 9 Mar 2025 10:59:01 +0530
Subject: [PATCH 1/4] initialised parameters

---
 aeon/clustering/density_based/__init__.py     |  1 +
 .../clustering/density_based/_density_peak.py | 47 +++++++++++++++++++
 2 files changed, 48 insertions(+)
 create mode 100644 aeon/clustering/density_based/__init__.py
 create mode 100644 aeon/clustering/density_based/_density_peak.py

diff --git a/aeon/clustering/density_based/__init__.py b/aeon/clustering/density_based/__init__.py
new file mode 100644
index 0000000000..4bfc3bf42c
--- /dev/null
+++ b/aeon/clustering/density_based/__init__.py
@@ -0,0 +1 @@
+"""Density."""
diff --git a/aeon/clustering/density_based/_density_peak.py b/aeon/clustering/density_based/_density_peak.py
new file mode 100644
index 0000000000..693b5ca591
--- /dev/null
+++ b/aeon/clustering/density_based/_density_peak.py
@@ -0,0 +1,47 @@
+"""Density clustering for time series data."""
+
+__maintainer__ = []
+__all__ = ["DensityPeakClusterer"]
+
+import numpy as np  # noqa
+
+from aeon.distances import get_distance_function, pairwise_distance  # noqa
+
+
+class DensityPeakClusterer:
+    """Density Peak Clusterer.
+
+    Clusters time series data using a density-based approach that estimates local
+    densities and identifies peaks as cluster centers.
+    """
+
+    def __init__(
+        self,
+        rho=None,
+        cutoff_distance=None,
+        distance="dtw",
+        n_jobs=1,
+    ):
+        self.rho = rho
+        self.cutoff_distance = cutoff_distance
+        self.distance = distance
+        self.n_jobs = n_jobs
+
+    def fit(self, X, y=None):
+        """Fit time series clusterer to training data.
+
+        Parameters
+        ----------
+        X : array-like
+            Time series data to cluster.
+        y : array-like, optional
+            Labels for the data (unused in clustering).
+
+        Returns
+        -------
+        self : object
+            The fitted clusterer.
+        """
+        self._fit(X)
+        self.is_fitted = True
+        return self

From 61853264394fc0e088beb40b2ecb201d6bd6e4c6 Mon Sep 17 00:00:00 2001
From: Aryan <aryanpola1603@gmail.com>
Date: Thu, 13 Mar 2025 17:22:59 +0530
Subject: [PATCH 2/4] Uncertain results

---
 .../clustering/density_based/_density_peak.py | 228 ++++++++++-
 .../density_based/tests/__init__.py           |   1 +
 .../density_based/tests/test_density_peak.py  | 382 ++++++++++++++++++
 3 files changed, 602 insertions(+), 9 deletions(-)
 create mode 100644 aeon/clustering/density_based/tests/__init__.py
 create mode 100644 aeon/clustering/density_based/tests/test_density_peak.py

diff --git a/aeon/clustering/density_based/_density_peak.py b/aeon/clustering/density_based/_density_peak.py
index 693b5ca591..034679ea10 100644
--- a/aeon/clustering/density_based/_density_peak.py
+++ b/aeon/clustering/density_based/_density_peak.py
@@ -3,31 +3,114 @@
 __maintainer__ = []
 __all__ = ["DensityPeakClusterer"]
 
-import numpy as np  # noqa
+import numpy as np
 
-from aeon.distances import get_distance_function, pairwise_distance  # noqa
+from aeon.distances import get_distance_function, pairwise_distance
 
 
 class DensityPeakClusterer:
-    """Density Peak Clusterer.
+    """
+    Density Peak Clusterer.
 
     Clusters time series data using a density-based approach that estimates local
     densities and identifies peaks as cluster centers.
+
+    Parameters
+    ----------
+    rho : float, optional
+        The local density for each data point.
+    delta : np.ndarray
+        For each point, the minimum distance to any point with higher density.
+    gauss_cutoff : bool, default=True
+        Whether to use Gaussian cutoff for density estimation.
+    cutoff_distance : float, optional
+        Distance cutoff for Gaussian kernel.
+    distance_metric : str, default="euclidean"
+        Distance metric to use for clustering.
+    n_jobs : int, default=1
+        Number of parallel jobs to run.
+    density_threshold : float, optional
+        Density threshold to select cluster centers. If None, will use midpoint.
+    distance_threshold : float, optional
+        Distance threshold to select cluster centers. If None, will use midpoint.
     """
 
     def __init__(
         self,
-        rho=None,
-        cutoff_distance=None,
-        distance="dtw",
-        n_jobs=1,
+        rho: float = None,
+        gauss_cutoff: bool = True,
+        cutoff_distance: int = None,
+        distance_metric: str = "euclidean",
+        n_jobs: int = 1,
+        density_threshold: float = None,
+        distance_threshold: float = None,
     ):
         self.rho = rho
+        self.gauss_cutoff = gauss_cutoff
         self.cutoff_distance = cutoff_distance
-        self.distance = distance
+        self.distance_metric = distance_metric
         self.n_jobs = n_jobs
+        self.density_threshold = density_threshold
+        self.distance_threshold = distance_threshold
+
+    def _build_distance(self):
+        """
+        Compute the pairwise distance matrix using the resolved distance function.
+
+        Returns
+        -------
+        distance_matrix : np.ndarray
+            A matrix of symmetric pairwise distances.
+        """
+        dist_func = get_distance_function(self.distance_metric)
+        distance_matrix = pairwise_distance(self.data, method=dist_func)
+        return distance_matrix
+
+    def _auto_select_dc(self):
+        """Auto-select cutoff distance (dc) so that the fraction of pairwise distances less than dc is within a target range (e.g: 1-2%)."""  # noqa
+        tri_indices = np.triu_indices(
+            self.n, k=1
+        )  # k=1 to ignore diagonal as it has 0's
+        distances = self.distance_matrix[tri_indices]
+        max_distance = np.max(distances)
+        min_distance = np.min(distances)
+        dc = (max_distance + min_distance) / 2
+
+        lower_bound = 0.002
+        upper_bound = 0.01  # 1-2% range
+
+        # recursively setting the value of dc
+        while True:
+            nneighs = np.sum(distances < dc) / (self.n**2)  # nearest neighbors
+
+            if lower_bound < nneighs < upper_bound:  # case 1 : dc is in range
+                break
+            if nneighs < lower_bound:  # case 2 : increase dc (too few neighbors)
+                min_distance = dc
+            if nneighs > upper_bound:  # case 3 : decrease dc (too many neighbors)
+                max_distance = dc
+
+            dc = (max_distance + min_distance) / 2
+            if max_distance - min_distance < 1e-6:
+                break
+
+        return dc
 
-    def fit(self, X, y=None):
+    def select_dc(self):
+        """
+        Select the cutoff distance (dc) for density estimation.
+
+        Returns
+        -------
+        dc : float
+            The cutoff distance.
+        """
+        if self.cutoff_distance == "auto":
+            return self._auto_select_dc()
+        else:
+            return self.cutoff_distance
+
+    def _fit(self, X: np.ndarray, y: np.ndarray = None):
         """Fit time series clusterer to training data.
 
         Parameters
@@ -42,6 +125,133 @@ def fit(self, X, y=None):
         self : object
             The fitted clusterer.
         """
+        self.data = X
+        self.n = X.shape[0]  # total no. of time points
+
+        self.distance_matrix = self._build_distance()  # pairwise distance matrix
+
+        self.dc = self.select_dc()  # selecting cutoff distance
+
+        self.rho = np.zeros(self.n)  # local density
+
+        if self.gauss_cutoff:
+            # Gaussian kernel: weight = exp(- (d / self.dc) ** 2)
+            for i in range(self.n):
+                self.rho[i] = np.sum(
+                    np.exp(-((self.distance_matrix[i] / self.dc) ** 2))
+                )
+        else:
+            # Hard cutoff: weight = 1 if d < dc, 0 otherwise
+            for i in range(self.n):
+                self.rho[i] = np.sum(self.distance_matrix[i] < self.dc)
+
+        self.delta = np.full(self.n, np.inf)
+        self.nneigh = np.zeros(self.n, dtype=int)
+        sorted_indices = np.argsort(
+            -self.rho
+        )  # decending order of local density indices
+        self.sorted_indices = sorted_indices
+
+        highest_index = sorted_indices[0]
+        self.delta[highest_index] = np.max(
+            self.distance_matrix
+        )  # distance to highest density point
+        self.nneigh[highest_index] = highest_index
+
+        for i in range(1, self.n):
+            current_index = sorted_indices[i]
+            for j in range(i):
+                higher_index = sorted_indices[j]
+                d = self.distance_matrix[current_index, higher_index]
+                if d < self.delta[current_index]:
+                    self.delta[current_index] = d
+                    self.nneigh[current_index] = higher_index
+
+        # If thresholds are not provided, use the midpoint rule
+        if self.density_threshold is None:
+            rho_threshold = 0.5 * (self.rho.min() + self.rho.max())
+        else:
+            rho_threshold = self.density_threshold
+
+        if self.distance_threshold is None:
+            delta_threshold = 0.5 * (self.delta.min() + self.delta.max())
+        else:
+            delta_threshold = self.distance_threshold
+
+        # Initialize cluster labels to -1 and assign centers based on the thresholds
+        self.labels_ = -np.ones(self.n, dtype=int)
+        for idx in range(self.n):
+            if (self.rho[idx] >= rho_threshold) and (
+                self.delta[idx] >= delta_threshold
+            ):
+                self.labels_[idx] = (
+                    idx  # mark as cluster center (label equals its own index)
+                )
+
+        # descending-density assignment for non-center points
+        for idx in sorted_indices:
+            if self.labels_[idx] == -1:
+                self.labels_[idx] = self.labels_[self.nneigh[idx]]
+
+        # store the final center indices for reference.
+        self.cluster_centers = [i for i in range(self.n) if self.labels_[i] == i]
+
+    def fit(self, X: np.ndarray, y: np.ndarray = None):
+        """
+        Fit time series clusterer to training data.
+
+        Parameters
+        ----------
+        X : array-like, shape=(n_samples, n_timepoints) or
+            (n_samples, n_channels, n_timepoints)
+            Time series data to cluster.
+        y : array-like, optional
+            Labels for the data (unused in clustering).
+
+        Returns
+        -------
+        self : DensityPeakClusterer
+            The fitted clusterer.
+        """
         self._fit(X)
         self.is_fitted = True
         return self
+
+    def plot(self, mode="all", title="", **kwargs):
+        """
+        Plot clustering results and/or the decision graph.
+
+        Parameters
+        ----------
+        mode : str, default="all"
+            One of "decision" (plot decision graph), "label" (plot clustered data),
+            or "all" (plot both).
+        title : str, optional
+            Title for the plots.
+        kwargs : dict
+            Additional keyword arguments passed to plotting functions.
+        """
+        import matplotlib.pyplot as plt
+
+        if mode in {"decision", "all"}:
+            plt.figure()
+            plt.scatter(self.rho, self.delta, c=self.labels_, cmap="viridis")
+            plt.xlabel("Local Density (rho)")
+            plt.ylabel("Delta")
+            plt.title(title + " Decision Graph")
+            plt.colorbar()
+            plt.show()
+
+        if mode in {"label", "all"}:
+            plt.figure()
+            if self.data.ndim == 2 and self.data.shape[1] >= 2:
+                plt.scatter(
+                    self.data[:, 0], self.data[:, 1], c=self.labels_, cmap="viridis"
+                )
+                plt.xlabel("Feature 1")
+                plt.ylabel("Feature 2")
+            else:
+                plt.plot(self.data.T)
+            plt.title(title + " Cluster Labels")
+            plt.colorbar()
+            plt.show()
diff --git a/aeon/clustering/density_based/tests/__init__.py b/aeon/clustering/density_based/tests/__init__.py
new file mode 100644
index 0000000000..44d07e76f6
--- /dev/null
+++ b/aeon/clustering/density_based/tests/__init__.py
@@ -0,0 +1 @@
+"""Tests for density based clustering algorithms."""
diff --git a/aeon/clustering/density_based/tests/test_density_peak.py b/aeon/clustering/density_based/tests/test_density_peak.py
new file mode 100644
index 0000000000..ba6a14a0fe
--- /dev/null
+++ b/aeon/clustering/density_based/tests/test_density_peak.py
@@ -0,0 +1,382 @@
+"""Test for the DensityPeakClusterer module."""
+
+import numpy as np
+import pytest
+
+from aeon.clustering.density_based._density_peak import DensityPeakClusterer
+
+# spiral
+DATA_STR = """
+31.95   7.95    3
+31.15   7.3     3
+30.45   6.65    3
+29.7    6.0     3
+28.9    5.55    3
+28.05   5.0     3
+27.2    4.55    3
+26.35   4.15    3
+25.4    3.85    3
+24.6    3.6     3
+23.6    3.3     3
+22.75   3.15    3
+21.85   3.05    3
+20.9    3.0     3
+20.0    2.9     3
+19.1    3.0     3
+18.2    3.2     3
+17.3    3.25    3
+16.55   3.5     3
+15.7    3.7     3
+14.85   4.1     3
+14.15   4.4     3
+13.4    4.75    3
+12.7    5.2     3
+12.05   5.65    3
+11.45   6.15    3
+10.9    6.65    3
+10.3    7.25    3
+9.7     7.85    3
+9.35    8.35    3
+8.9     9.05    3
+8.55    9.65    3
+8.15    10.35   3
+7.95    10.95   3
+7.75    11.7    3
+7.55    12.35   3
+7.45    13.0    3
+7.35    13.75   3
+7.3     14.35   3
+7.35    14.95   3
+7.35    15.75   3
+7.55    16.35   3
+7.7     16.95   3
+7.8     17.55   3
+8.05    18.15   3
+8.3     18.75   3
+8.65    19.3    3
+8.9     19.85   3
+9.3     20.3    3
+9.65    20.8    3
+10.2    21.25   3
+10.6    21.65   3
+11.1    22.15   3
+11.55   22.45   3
+11.95   22.7    3
+12.55   23.0    3
+13.05   23.2    3
+13.45   23.4    3
+14.0    23.55   3
+14.55   23.6    3
+15.1    23.75   3
+15.7    23.75   3
+16.15   23.85   3
+16.7    23.8    3
+17.15   23.75   3
+17.75   23.75   3
+18.2    23.6    3
+18.65   23.5    3
+19.1    23.35   3
+19.6    23.15   3
+20.0    22.95   3
+20.4    22.7    3
+20.7    22.55   3
+21.0    22.15   3
+21.45   21.95   3
+21.75   21.55   3
+22.0    21.25   3
+22.25   21.0    3
+22.5    20.7    3
+22.65   20.35   3
+22.75   20.05   3
+22.9    19.65   3
+23.0    19.35   3
+23.1    19.0    3
+23.15   18.65   3
+23.2    18.25   3
+23.2    18.05   3
+23.2    17.8    3
+23.1    17.45   3
+23.05   17.15   3
+22.9    16.9    3
+22.85   16.6    3
+22.7    16.4    3
+22.6    16.2    3
+22.55   16.05   3
+22.4    15.95   3
+22.35   15.8    3
+22.2    15.65   3
+22.15   15.55   3
+22.0    15.4    3
+21.9    15.3    3
+21.85   15.25   3
+21.75   15.15   3
+21.65   15.05   3
+21.55   15.0    3
+21.5    14.9    3
+19.35   31.65   1
+20.35   31.45   1
+21.35   31.1    1
+22.25   30.9    1
+23.2    30.45   1
+23.95   30.05   1
+24.9    29.65   1
+25.6    29.05   1
+26.35   28.5    1
+27.15   27.9    1
+27.75   27.35   1
+28.3    26.6    1
+28.95   25.85   1
+29.5    25.15   1
+29.95   24.45   1
+30.4    23.7    1
+30.6    22.9    1
+30.9    22.1    1
+31.25   21.3    1
+31.35   20.55   1
+31.5    19.7    1
+31.55   18.9    1
+31.65   18.15   1
+31.6    17.35   1
+31.45   16.55   1
+31.3    15.8    1
+31.15   15.05   1
+30.9    14.35   1
+30.6    13.65   1
+30.3    13.0    1
+29.9    12.3    1
+29.5    11.75   1
+29.0    11.15   1
+28.5    10.6    1
+28.0    10.1    1
+27.55   9.65    1
+26.9    9.1     1
+26.25   8.8     1
+25.7    8.4     1
+25.15   8.05    1
+24.5    7.75    1
+23.9    7.65    1
+23.15   7.4     1
+22.5    7.3     1
+21.9    7.1     1
+21.25   7.05    1
+20.5    7.0     1
+19.9    6.95    1
+19.25   7.05    1
+18.75   7.1     1
+18.05   7.25    1
+17.5    7.35    1
+16.9    7.6     1
+16.35   7.8     1
+15.8    8.05    1
+15.4    8.35    1
+14.9    8.7     1
+14.45   8.9     1
+13.95   9.3     1
+13.6    9.65    1
+13.25   10.1    1
+12.95   10.55   1
+12.65   10.9    1
+12.35   11.4    1
+12.2    11.75   1
+11.95   12.2    1
+11.8    12.65   1
+11.75   13.05   1
+11.55   13.6    1
+11.55   14.0    1
+11.55   14.35   1
+11.55   14.7    1
+11.6    15.25   1
+11.65   15.7    1
+11.8    16.05   1
+11.85   16.5    1
+12.0    16.75   1
+12.15   17.2    1
+12.3    17.6    1
+12.55   17.85   1
+12.8    18.05   1
+13.1    18.4    1
+13.3    18.6    1
+13.55   18.85   1
+13.8    19.05   1
+14.15   19.25   1
+14.45   19.5    1
+14.85   19.55   1
+15.0    19.7    1
+15.25   19.7    1
+15.55   19.85   1
+15.95   19.9    1
+16.2    19.9    1
+16.55   19.9    1
+16.85   19.9    1
+17.2    19.9    1
+17.4    19.8    1
+17.65   19.75   1
+17.8    19.7    1
+18.0    19.6    1
+18.2    19.55   1
+3.9     9.6     2
+3.55    10.65   2
+3.35    11.4    2
+3.1     12.35   2
+3.1     13.25   2
+3.05    14.15   2
+3.0     15.1    2
+3.1     16.0    2
+3.2     16.85   2
+3.45    17.75   2
+3.7     18.7    2
+3.95    19.55   2
+4.35    20.25   2
+4.7     21.1    2
+5.15    21.8    2
+5.6     22.5    2
+6.2     23.3    2
+6.8     23.85   2
+7.35    24.45   2
+8.05    24.95   2
+8.8     25.45   2
+9.5     26.0    2
+10.2    26.35   2
+10.9    26.75   2
+11.7    27.0    2
+12.45   27.25   2
+13.3    27.6    2
+14.05   27.6    2
+14.7    27.75   2
+15.55   27.75   2
+16.4    27.75   2
+17.1    27.75   2
+17.9    27.75   2
+18.55   27.7    2
+19.35   27.6    2
+20.1    27.35   2
+20.7    27.1    2
+21.45   26.8    2
+22.05   26.5    2
+22.7    26.15   2
+23.35   25.65   2
+23.8    25.3    2
+24.3    24.85   2
+24.75   24.35   2
+25.25   23.95   2
+25.65   23.45   2
+26.05   23.0    2
+26.2    22.3    2
+26.6    21.8    2
+26.75   21.25   2
+27.0    20.7    2
+27.15   20.15   2
+27.15   19.6    2
+27.35   19.1    2
+27.35   18.45   2
+27.4    18.0    2
+27.3    17.4    2
+27.15   16.9    2
+27.0    16.4    2
+27.0    15.9    2
+26.75   15.35   2
+26.55   14.85   2
+26.3    14.45   2
+25.95   14.1    2
+25.75   13.7    2
+25.35   13.3    2
+25.05   12.95   2
+24.8    12.7    2
+24.4    12.45   2
+24.05   12.2    2
+23.55   11.85   2
+23.2    11.65   2
+22.75   11.4    2
+22.3    11.3    2
+21.9    11.1    2
+21.45   11.05   2
+21.1    11.0    2
+20.7    10.95   2
+20.35   10.95   2
+19.95   11.0    2
+19.55   11.0    2
+19.15   11.05   2
+18.85   11.1    2
+18.45   11.25   2
+18.15   11.35   2
+17.85   11.5    2
+17.5    11.7    2
+17.2    11.95   2
+17.0    12.05   2
+16.75   12.2    2
+16.65   12.35   2
+16.5    12.5    2
+16.35   12.7    2
+16.2    12.8    2
+16.15   12.95   2
+16.0    13.1    2
+15.95   13.25   2
+15.9    13.4    2
+15.8    13.5    2
+15.8    13.65   2
+15.75   13.85   2
+15.65   14.05   2
+15.65   14.25   2
+15.65   14.5    2
+15.65   14.6    2
+"""
+
+
+def load_dataset():
+    """
+    Load the provided dataset from the multiline string.
+
+    Each row has three columns (x, y, and an unused label).
+    Returns the x and y coordinates as (n_samples, 2).
+    """
+    lines = DATA_STR.strip().splitlines()
+    data_list = []
+    for line in lines:
+        parts = line.split()
+        if len(parts) >= 2:
+            x = float(parts[0])
+            y = float(parts[1])
+            data_list.append([x, y])
+    return np.array(data_list)
+
+
+@pytest.fixture
+def dataset():
+    """Load the dataset."""
+    return load_dataset()
+
+
+def test_density_peak_clusterer(dataset):
+    """Test the DensityPeakClusterer with the provided dataset."""
+    clusterer = DensityPeakClusterer(
+        gauss_cutoff=True,
+        cutoff_distance="auto",
+        distance_metric="euclidean",
+        density_threshold=8,
+        distance_threshold=5,
+    )
+
+    # Fit the clusterer on the dataset
+    clusterer.fit(dataset)
+
+    # Print cluster labels and cluster centers
+    print("Cluster Labels:")  # noqa
+    print(clusterer.labels_)  # noqa
+    print("\nCluster Centers:")  # noqa
+    print(clusterer.cluster_centers)  # noqa
+
+    # Assertions to verify the clustering results
+    assert clusterer.labels_ is not None, "Cluster labels should not be None"
+    assert len(clusterer.labels_) == len(
+        dataset
+    ), "Number of labels should match number of data points"
+    assert clusterer.cluster_centers is not None, "Cluster centers should not be None"
+    assert (
+        len(clusterer.cluster_centers) > 0
+    ), "There should be at least one cluster center"
+
+    # Optional: Plot decision graph and clustering results
+    clusterer.plot(mode="all", title="Density Peak Clustering")
+
+
+# use pytest -s

From b00f466edae889500481ea4b2df8804bea812e5a Mon Sep 17 00:00:00 2001
From: Aryan <aryanpola1603@gmail.com>
Date: Wed, 19 Mar 2025 15:13:29 +0530
Subject: [PATCH 3/4] 98 cluster center instead of 97 (auto)

---
 .../clustering/density_based/_density_peak.py | 163 ++++++++++++------
 .../density_based/tests/test_density_peak.py  |   5 +-
 2 files changed, 109 insertions(+), 59 deletions(-)

diff --git a/aeon/clustering/density_based/_density_peak.py b/aeon/clustering/density_based/_density_peak.py
index 034679ea10..d35a703278 100644
--- a/aeon/clustering/density_based/_density_peak.py
+++ b/aeon/clustering/density_based/_density_peak.py
@@ -3,6 +3,7 @@
 __maintainer__ = []
 __all__ = ["DensityPeakClusterer"]
 
+import matplotlib.pyplot as plt
 import numpy as np
 
 from aeon.distances import get_distance_function, pairwise_distance
@@ -22,17 +23,23 @@ class DensityPeakClusterer:
     delta : np.ndarray
         For each point, the minimum distance to any point with higher density.
     gauss_cutoff : bool, default=True
-        Whether to use Gaussian cutoff for density estimation.
-    cutoff_distance : float, optional
-        Distance cutoff for Gaussian kernel.
+        Whether to use a Gaussian kernel for density estimation.
+    cutoff_distance : float or str, optional
+        Distance cutoff for the Gaussian kernel. If set to "auto", the cutoff is
+        automatically selected.
     distance_metric : str, default="euclidean"
-        Distance metric to use for clustering.
+        The distance metric to use for clustering.
     n_jobs : int, default=1
         Number of parallel jobs to run.
     density_threshold : float, optional
-        Density threshold to select cluster centers. If None, will use midpoint.
+        Density threshold for selecting cluster centers. If None, the midpoint of min
+        and max density is used.
     distance_threshold : float, optional
-        Distance threshold to select cluster centers. If None, will use midpoint.
+        Distance threshold for selecting cluster centers. If None, the midpoint of min
+        and max delta is used.
+    anormal : bool, default=False
+        If True, points in the halo region (border points) are marked with a label
+        of -1.
     """
 
     def __init__(
@@ -44,6 +51,7 @@ def __init__(
         n_jobs: int = 1,
         density_threshold: float = None,
         distance_threshold: float = None,
+        anormal: bool = False,
     ):
         self.rho = rho
         self.gauss_cutoff = gauss_cutoff
@@ -52,6 +60,7 @@ def __init__(
         self.n_jobs = n_jobs
         self.density_threshold = density_threshold
         self.distance_threshold = distance_threshold
+        self.anormal = anormal
 
     def _build_distance(self):
         """
@@ -60,36 +69,37 @@ def _build_distance(self):
         Returns
         -------
         distance_matrix : np.ndarray
-            A matrix of symmetric pairwise distances.
+            A symmetric matrix of pairwise distances.
         """
         dist_func = get_distance_function(self.distance_metric)
         distance_matrix = pairwise_distance(self.data, method=dist_func)
         return distance_matrix
 
     def _auto_select_dc(self):
-        """Auto-select cutoff distance (dc) so that the fraction of pairwise distances less than dc is within a target range (e.g: 1-2%)."""  # noqa
-        tri_indices = np.triu_indices(
-            self.n, k=1
-        )  # k=1 to ignore diagonal as it has 0's
+        """
+        Auto-select cutoff distance (dc).
+
+        The fraction of pairwise distances less than dc
+        is within a target range (approximately 0.2% to 1%).
+        Intended target range: 0.01 <= nneighs <= 0.002.
+        """
+        tri_indices = np.triu_indices(self.n, k=1)  # ignore diagonal (self-distances)
         distances = self.distance_matrix[tri_indices]
         max_distance = np.max(distances)
         min_distance = np.min(distances)
         dc = (max_distance + min_distance) / 2
 
-        lower_bound = 0.002
-        upper_bound = 0.01  # 1-2% range
+        lower_bound = 0.01  # 0.002
+        upper_bound = 0.002  # 0.01  # target range: 0.2% to 1%
 
-        # recursively setting the value of dc
         while True:
-            nneighs = np.sum(distances < dc) / (self.n**2)  # nearest neighbors
-
-            if lower_bound < nneighs < upper_bound:  # case 1 : dc is in range
+            nneighs = np.sum(distances < dc) / (self.n**2)
+            if lower_bound <= nneighs <= upper_bound:
                 break
-            if nneighs < lower_bound:  # case 2 : increase dc (too few neighbors)
+            if nneighs < lower_bound:  # too few neighbors => increase dc
                 min_distance = dc
-            if nneighs > upper_bound:  # case 3 : decrease dc (too many neighbors)
+            elif nneighs > upper_bound:  # too many neighbors => decrease dc
                 max_distance = dc
-
             dc = (max_distance + min_distance) / 2
             if max_distance - min_distance < 1e-6:
                 break
@@ -103,20 +113,33 @@ def select_dc(self):
         Returns
         -------
         dc : float
-            The cutoff distance.
+            The chosen cutoff distance.
         """
         if self.cutoff_distance == "auto":
             return self._auto_select_dc()
+        elif self.cutoff_distance is None:
+            n = self.data.shape[0]
+            self.distances = {}
+            for i in range(n):
+                for j in range(i + 1, n):
+                    self.distances[(i, j)] = self.distance_matrix[i, j]
+                    self.distances[(j, i)] = self.distance_matrix[i, j]
+            percent = 2.0
+            position = int(self.n * (self.n + 1) / 2 * percent / 100)
+            sorted_vals = np.sort(list(self.distances.values()))
+            dc = sorted_vals[position * 2 + self.n]
+            return dc
         else:
             return self.cutoff_distance
 
     def _fit(self, X: np.ndarray, y: np.ndarray = None):
-        """Fit time series clusterer to training data.
+        """
+        Fit the density peak clusterer to the training data.
 
         Parameters
         ----------
         X : array-like
-            Time series data to cluster.
+            Time series (or 2D) data to cluster.
         y : array-like, optional
             Labels for the data (unused in clustering).
 
@@ -126,36 +149,32 @@ def _fit(self, X: np.ndarray, y: np.ndarray = None):
             The fitted clusterer.
         """
         self.data = X
-        self.n = X.shape[0]  # total no. of time points
-
-        self.distance_matrix = self._build_distance()  # pairwise distance matrix
+        self.n = X.shape[0]  # total number of data points
 
-        self.dc = self.select_dc()  # selecting cutoff distance
-
-        self.rho = np.zeros(self.n)  # local density
+        self.distance_matrix = self._build_distance()
+        self.dc = self.select_dc()
 
+        # Compute local density, excluding self-distance
+        self.rho = np.zeros(self.n)
         if self.gauss_cutoff:
-            # Gaussian kernel: weight = exp(- (d / self.dc) ** 2)
             for i in range(self.n):
-                self.rho[i] = np.sum(
-                    np.exp(-((self.distance_matrix[i] / self.dc) ** 2))
-                )
+                self.rho[i] = (
+                    np.sum(np.exp(-((self.distance_matrix[i] / self.dc) ** 2))) - 1
+                )  # -1 to exclude self (diagonal)
         else:
-            # Hard cutoff: weight = 1 if d < dc, 0 otherwise
+            # Count neighbors using a hard cutoff, subtracting the self-count
             for i in range(self.n):
-                self.rho[i] = np.sum(self.distance_matrix[i] < self.dc)
+                self.rho[i] = np.sum(self.distance_matrix[i] < self.dc) - 1
 
+        # computing delta and nearest higher-density neighbor
         self.delta = np.full(self.n, np.inf)
         self.nneigh = np.zeros(self.n, dtype=int)
-        sorted_indices = np.argsort(
-            -self.rho
-        )  # decending order of local density indices
+        sorted_indices = np.argsort(-self.rho)  # indices in descending order of density
         self.sorted_indices = sorted_indices
 
         highest_index = sorted_indices[0]
-        self.delta[highest_index] = np.max(
-            self.distance_matrix
-        )  # distance to highest density point
+        # For the highest-density point, set delta to the maximum distance in the matrix
+        self.delta[highest_index] = np.max(self.distance_matrix)
         self.nneigh[highest_index] = highest_index
 
         for i in range(1, self.n):
@@ -167,7 +186,7 @@ def _fit(self, X: np.ndarray, y: np.ndarray = None):
                     self.delta[current_index] = d
                     self.nneigh[current_index] = higher_index
 
-        # If thresholds are not provided, use the midpoint rule
+        # midpoint rule if thresholds are not provided
         if self.density_threshold is None:
             rho_threshold = 0.5 * (self.rho.min() + self.rho.max())
         else:
@@ -178,33 +197,69 @@ def _fit(self, X: np.ndarray, y: np.ndarray = None):
         else:
             delta_threshold = self.distance_threshold
 
-        # Initialize cluster labels to -1 and assign centers based on the thresholds
+        # Initial cluster assignment:
+        # marking points as centers,they exceed both density and delta thresholds
         self.labels_ = -np.ones(self.n, dtype=int)
         for idx in range(self.n):
             if (self.rho[idx] >= rho_threshold) and (
                 self.delta[idx] >= delta_threshold
             ):
-                self.labels_[idx] = (
-                    idx  # mark as cluster center (label equals its own index)
-                )
+                self.labels_[idx] = idx
 
-        # descending-density assignment for non-center points
+        # labels for non-center points based on the nearest higher-density neighbor.
         for idx in sorted_indices:
             if self.labels_[idx] == -1:
                 self.labels_[idx] = self.labels_[self.nneigh[idx]]
 
-        # store the final center indices for reference.
+        # Create a copy for halo assignment.
+        halo = self.labels_.copy()
+        # Identify unique cluster centers.
+        unique_centers = [i for i in range(self.n) if self.labels_[i] == i]
+        # Initialize border density for each cluster.
+        bord_rho = {center: 0.0 for center in unique_centers}
+
+        # For every pair of points in clusters within dc, update border density
+        for i in range(self.n):
+            for j in range(i + 1, self.n):
+                if (
+                    self.labels_[i] != self.labels_[j]
+                    and self.distance_matrix[i, j] <= self.dc
+                ):
+                    rho_avg = (self.rho[i] + self.rho[j]) / 2.0
+                    if (
+                        self.labels_[i] in bord_rho
+                        and rho_avg > bord_rho[self.labels_[i]]
+                    ):
+                        bord_rho[self.labels_[i]] = rho_avg
+                    if (
+                        self.labels_[j] in bord_rho
+                        and rho_avg > bord_rho[self.labels_[j]]
+                    ):
+                        bord_rho[self.labels_[j]] = rho_avg
+
+        # points falling in halo region (density lower than cluster's border density)
+        for i in range(self.n):
+            if self.labels_[i] in bord_rho and self.rho[i] < bord_rho[self.labels_[i]]:
+                halo[i] = 0
+
+        # if anormal flag is True, assign halo points a label of -1
+        if self.anormal:
+            for i in range(self.n):
+                if halo[i] == 0:
+                    self.labels_[i] = -1
+
+        # Final cluster centers: points whose label equals their own index.
         self.cluster_centers = [i for i in range(self.n) if self.labels_[i] == i]
 
     def fit(self, X: np.ndarray, y: np.ndarray = None):
         """
-        Fit time series clusterer to training data.
+        Fit the clusterer to the training data.
 
         Parameters
         ----------
         X : array-like, shape=(n_samples, n_timepoints) or
             (n_samples, n_channels, n_timepoints)
-            Time series data to cluster.
+            Data to cluster.
         y : array-like, optional
             Labels for the data (unused in clustering).
 
@@ -219,20 +274,18 @@ def fit(self, X: np.ndarray, y: np.ndarray = None):
 
     def plot(self, mode="all", title="", **kwargs):
         """
-        Plot clustering results and/or the decision graph.
+        Plot the clustering results and/or the decision graph.
 
         Parameters
         ----------
         mode : str, default="all"
-            One of "decision" (plot decision graph), "label" (plot clustered data),
-            or "all" (plot both).
+            One of "decision" (to plot the decision graph),
+            "label" (to plot cluster labels), or "all" (to plot both).
         title : str, optional
             Title for the plots.
         kwargs : dict
             Additional keyword arguments passed to plotting functions.
         """
-        import matplotlib.pyplot as plt
-
         if mode in {"decision", "all"}:
             plt.figure()
             plt.scatter(self.rho, self.delta, c=self.labels_, cmap="viridis")
diff --git a/aeon/clustering/density_based/tests/test_density_peak.py b/aeon/clustering/density_based/tests/test_density_peak.py
index ba6a14a0fe..434108b216 100644
--- a/aeon/clustering/density_based/tests/test_density_peak.py
+++ b/aeon/clustering/density_based/tests/test_density_peak.py
@@ -354,18 +354,16 @@ def test_density_peak_clusterer(dataset):
         distance_metric="euclidean",
         density_threshold=8,
         distance_threshold=5,
+        anormal=False,
     )
 
-    # Fit the clusterer on the dataset
     clusterer.fit(dataset)
 
-    # Print cluster labels and cluster centers
     print("Cluster Labels:")  # noqa
     print(clusterer.labels_)  # noqa
     print("\nCluster Centers:")  # noqa
     print(clusterer.cluster_centers)  # noqa
 
-    # Assertions to verify the clustering results
     assert clusterer.labels_ is not None, "Cluster labels should not be None"
     assert len(clusterer.labels_) == len(
         dataset
@@ -375,7 +373,6 @@ def test_density_peak_clusterer(dataset):
         len(clusterer.cluster_centers) > 0
     ), "There should be at least one cluster center"
 
-    # Optional: Plot decision graph and clustering results
     clusterer.plot(mode="all", title="Density Peak Clustering")
 
 

From e9daf14e35fd2b33363dac153c28797137680211 Mon Sep 17 00:00:00 2001
From: Aryan <aryanpola1603@gmail.com>
Date: Sun, 25 May 2025 06:31:53 +0530
Subject: [PATCH 4/4] ari=0.915552

---
 aeon/clustering/density_based/_density_peak.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aeon/clustering/density_based/_density_peak.py b/aeon/clustering/density_based/_density_peak.py
index d35a703278..2950fecdab 100644
--- a/aeon/clustering/density_based/_density_peak.py
+++ b/aeon/clustering/density_based/_density_peak.py
@@ -89,8 +89,8 @@ def _auto_select_dc(self):
         min_distance = np.min(distances)
         dc = (max_distance + min_distance) / 2
 
-        lower_bound = 0.01  # 0.002
-        upper_bound = 0.002  # 0.01  # target range: 0.2% to 1%
+        lower_bound = 0.01  # 0.2
+        upper_bound = 0.002  # 1
 
         while True:
             nneighs = np.sum(distances < dc) / (self.n**2)