aeon-toolkit
diff --git a/‎.github/actions/numba_cache/action.yml
Lines changed: 1 addition & 1 deletion b/‎.github/actions/numba_cache/action.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/utilities/codespell_ignore_words.txt
Lines changed: 4 additions & 0 deletions b/‎.github/utilities/codespell_ignore_words.txt
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/workflows/pr_precommit.yml
Lines changed: 13 additions & 0 deletions b/‎.github/workflows/pr_precommit.yml
Lines changed: 13 additions & 0 deletions
diff --git a/‎.pre-commit-config.yaml
Lines changed: 8 additions & 0 deletions b/‎.pre-commit-config.yaml
Lines changed: 8 additions & 0 deletions
diff --git a/‎aeon/anomaly_detection/series/distance_based/_kmeans.py
Lines changed: 1 addition & 1 deletion b/‎aeon/anomaly_detection/series/distance_based/_kmeans.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎aeon/anomaly_detection/series/distance_based/_merlin.py
Lines changed: 5 additions & 5 deletions b/‎aeon/anomaly_detection/series/distance_based/_merlin.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎aeon/anomaly_detection/series/outlier_detection/_stray.py
Lines changed: 2 additions & 2 deletions b/‎aeon/anomaly_detection/series/outlier_detection/_stray.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎aeon/base/_estimators/compose/collection_ensemble.py
Lines changed: 1 addition & 1 deletion b/‎aeon/base/_estimators/compose/collection_ensemble.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎aeon/benchmarking/results_loaders.py
Lines changed: 2 additions & 2 deletions b/‎aeon/benchmarking/results_loaders.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎aeon/benchmarking/stats.py
Lines changed: 1 addition & 1 deletion b/‎aeon/benchmarking/stats.py
Lines changed: 1 addition & 1 deletion
@@ -62,6 +62,6 @@ runs:
         path: ${{ github.workspace }}/.numba_cache
         # Try restore using today's date
         key: numba-${{ inputs.cache_name }}-${{ inputs.runner_os }}-${{ inputs.python_version }}-${{ env.CURRENT_DATE }}
-        # If cant restore with today's date try another cache (without date)
+        # If can't restore with today's date try another cache (without date)
         restore-keys: |
           numba-${{ inputs.cache_name }}-${{ inputs.runner_os }}-${{ inputs.python_version }}-
@@ -0,0 +1,4 @@
+fpr
+mape
+recuse
+strat
@@ -60,3 +60,16 @@ jobs:
         with:
           commit_message: Automatic `pre-commit` fixes
           commit_user_name: aeon-actions-bot[bot]
+
+  codespell-annotations:
+    runs-on: ubuntu-24.04
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Annotate locations with typos
+        uses: codespell-project/codespell-problem-matcher@v1
+
+      - name: Codespell
+        uses: codespell-project/actions-codespell@v2
@@ -78,3 +78,11 @@ repos:
     hooks:
       - id: check-manifest
         stages: [ manual ]
+
+  - repo: https://github.com/codespell-project/codespell
+    # Configuration for codespell is in pyproject.toml
+    rev: v2.4.1
+    hooks:
+      - id: codespell
+        additional_dependencies:
+          - tomli  # for python_version < '3.11'
@@ -42,7 +42,7 @@ class KMeansAD(BaseSeriesAnomalyDetector):
 
     stride : int, default=1
         The stride of the sliding window. The stride determines how many time points
-        the windows are spaced appart. A stride of 1 means that the window is moved one
+        the windows are spaced apart. A stride of 1 means that the window is moved one
         time point forward compared to the previous window. The larger the stride, the
         fewer windows are created, which leads to noisier anomaly scores.
 
 
@@ -102,26 +102,26 @@ def _predict(self, X):
 
         r = 2 * np.sqrt(self.min_length)
         distances = np.full(len(lengths), -1.0)
-        indicies = np.full(len(lengths), -1)
+        indices = np.full(len(lengths), -1)
 
-        indicies[0], distances[0] = self._find_index(X, lengths[0], r, np.multiply, 0.5)
+        indices[0], distances[0] = self._find_index(X, lengths[0], r, np.multiply, 0.5)
 
         for i in range(1, min(5, len(lengths))):
             r = distances[i - 1] * 0.99
-            indicies[i], distances[i] = self._find_index(
+            indices[i], distances[i] = self._find_index(
                 X, lengths[i], r, np.multiply, 0.99
             )
 
         for i in range(min(5, len(lengths)), len(lengths)):
             m = mean(distances[i - 5 : i])
             s = std(distances[i - 5 : i])
             r = m - 2 * s
-            indicies[i], distances[i] = self._find_index(
+            indices[i], distances[i] = self._find_index(
                 X, lengths[i], r, np.subtract, s
             )
 
         anomalies = np.zeros(X.shape[0], dtype=bool)
-        for i in indicies:
+        for i in indices:
             if i > -1:
                 anomalies[i] = True
 
 
@@ -20,7 +20,7 @@ class STRAY(BaseSeriesAnomalyDetector):
     ability to detect clusters of outliers in multidimensional data without
     requiring a model of the typical behavior of the system. However, it suffers
     from some limitations that affect its accuracy. STRAY is an extension of
-    HDoutliers that uses extreme value theory for the anomolous threshold
+    HDoutliers that uses extreme value theory for the anomalous threshold
     calculation, to deal with data streams that exhibit non-stationary behavior.
 
     Parameters
@@ -39,7 +39,7 @@ class STRAY(BaseSeriesAnomalyDetector):
         Proportion of possible candidates for outliers. This defines the starting point
         for the bottom up searching algorithm.
     size_threshold : int, default=50
-        Sample size to calculate an emperical threshold.
+        Sample size to calculate an empirical threshold.
     outlier_tail : str {"min", "max"}, default="max"
         Direction of the outlier tail.
 
 
@@ -42,7 +42,7 @@ class BaseCollectionEnsemble(ComposableEstimatorMixin, BaseCollectionEstimator):
         Only used if weights is a float. The method used to generate a performance
         estimation from the training data set i.e. cross-validation.
         If None, predictions are made using that estimators fit_predict or
-            fit_predict_proba methods. These are somtimes overridden for efficient
+            fit_predict_proba methods. These are sometimes overridden for efficient
             performance evaluations, i.e. out-of-bag predictions.
         If int or sklearn object input, the parameter is passed directly to the cv
             parameter of the cross_val_predict function from sklearn.
 
@@ -201,7 +201,7 @@ def estimator_alias(name: str) -> str:
 def get_available_estimators(
     task: str = "classification", as_list: bool = False
 ) -> Union[pd.DataFrame, list]:
-    """Get a DataFrame of estimators avialable for a specific learning task.
+    """Get a DataFrame of estimators available for a specific learning task.
 
     Parameters
     ----------
@@ -251,7 +251,7 @@ def get_estimator_results(
 
     Parameters
     ----------
-    estimators : str ot list of str
+    estimators : str or list of str
         Estimator name or list of estimator names to search for. See
         get_available_estimators, aeon.benchmarking.results_loading.NAME_ALIASES or
         the directory at path for valid options.
 
@@ -57,7 +57,7 @@ def nemenyi_test(ordered_avg_ranks, n_datasets, alpha):
     ordered_avg_ranks : np.array
         Average ranks of estimators.
     n_datasets : int
-        Mumber of datasets.
+        Number of datasets.
     alpha : float
         alpha level for Nemenyi test.
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +fpr
 +mape
 +recuse
 +strat