From ac185e23774d9482c949bb0c29e406f7068b47b8 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Mon, 17 Feb 2025 23:55:00 +0000 Subject: [PATCH 01/19] added numba prange to all pairwise distances --- aeon/distances/_distance.py | 25 +++++++++++++++++--- aeon/distances/_mpdist.py | 14 ++++++++--- aeon/distances/_sbd.py | 29 +++++++++++++++++++---- aeon/distances/_shift_scale_invariant.py | 15 +++++++++--- aeon/distances/elastic/_adtw.py | 19 +++++++++++---- aeon/distances/elastic/_ddtw.py | 19 +++++++++++---- aeon/distances/elastic/_dtw.py | 19 +++++++++++---- aeon/distances/elastic/_edr.py | 19 +++++++++++---- aeon/distances/elastic/_erp.py | 19 +++++++++++---- aeon/distances/elastic/_lcss.py | 19 +++++++++++---- aeon/distances/elastic/_msm.py | 19 +++++++++++---- aeon/distances/elastic/_shape_dtw.py | 19 +++++++++++---- aeon/distances/elastic/_soft_dtw.py | 19 +++++++++++---- aeon/distances/elastic/_twe.py | 19 +++++++++++---- aeon/distances/elastic/_wddtw.py | 19 +++++++++++---- aeon/distances/elastic/_wdtw.py | 19 +++++++++++---- aeon/distances/mindist/_dft_sfa.py | 15 ++++++++---- aeon/distances/mindist/_paa_sax.py | 20 ++++++++++++---- aeon/distances/mindist/_sax.py | 20 ++++++++++++---- aeon/distances/mindist/_sfa.py | 11 +++++++-- aeon/distances/pointwise/_euclidean.py | 30 ++++++++++++++++++++---- aeon/distances/pointwise/_manhattan.py | 30 ++++++++++++++++++++---- aeon/distances/pointwise/_minkowski.py | 19 +++++++++++---- aeon/distances/pointwise/_squared.py | 29 +++++++++++++++++++---- 24 files changed, 377 insertions(+), 108 deletions(-) diff --git a/aeon/distances/_distance.py b/aeon/distances/_distance.py index 1263e11cb4..1cf5c35dc0 100644 --- a/aeon/distances/_distance.py +++ b/aeon/distances/_distance.py @@ -1,5 +1,6 @@ __maintainer__ = [] +import warnings from enum import Enum from typing import Any, Callable, Optional, TypedDict, Union @@ -173,6 +174,7 @@ def pairwise_distance( y: Optional[np.ndarray] = None, method: Union[str, DistanceFunction, None] = None, symmetric: bool = True, + n_jobs: int = 1, **kwargs: Unpack[DistanceKwargs], ) -> np.ndarray: """Compute the pairwise distance matrix between two time series. @@ -197,6 +199,10 @@ def pairwise_distance( function is provided as the "method" parameter, then it will compute an asymmetric distance matrix, and the entire matrix (including both upper and lower triangles) is returned. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. kwargs : Any Extra arguments for distance. Refer to each distance documentation for a list of possible arguments. @@ -240,11 +246,13 @@ def pairwise_distance( [ 48.]]) """ if method in PAIRWISE_DISTANCE: - return DISTANCES_DICT[method]["pairwise_distance"](x, y, **kwargs) + return DISTANCES_DICT[method]["pairwise_distance"]( + x, y, n_jobs=n_jobs, **kwargs + ) elif isinstance(method, Callable): if y is None and not symmetric: - return _custom_func_pairwise(x, x, method, **kwargs) - return _custom_func_pairwise(x, y, method, **kwargs) + return _custom_func_pairwise(x, x, method, n_jobs=n_jobs, **kwargs) + return _custom_func_pairwise(x, y, method, n_jobs=n_jobs, **kwargs) else: raise ValueError("Method must be one of the supported strings or a callable") @@ -253,11 +261,22 @@ def _custom_func_pairwise( X: Optional[Union[np.ndarray, list[np.ndarray]]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, dist_func: Union[DistanceFunction, None] = None, + n_jobs: int = 1, **kwargs: Unpack[DistanceKwargs], ) -> np.ndarray: if dist_func is None: raise ValueError("dist_func must be a callable") + if n_jobs != 1: + warnings.warn( + "You are using a custom distance function with n_jobs > 1. " + "Aeon does not support parallelization for custom distance " + "functions. If it is an existing aeon distance try using the " + "string name instead.", + UserWarning, + stacklevel=2, + ) + multivariate_conversion = _is_numpy_list_multivariate(X, y) X, _ = _convert_collection_to_numba_list(X, "X", multivariate_conversion) if y is None: diff --git a/aeon/distances/_mpdist.py b/aeon/distances/_mpdist.py index c679daef5c..c9f195cba9 100644 --- a/aeon/distances/_mpdist.py +++ b/aeon/distances/_mpdist.py @@ -1,5 +1,6 @@ """Matrix Profile Distances.""" +import warnings from typing import Optional, Union import numpy as np @@ -287,6 +288,7 @@ def mp_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, m: int = 0, + **kwargs, ) -> np.ndarray: """Compute the mpdist pairwise distance between a set of time series. @@ -339,14 +341,20 @@ def mp_pairwise_distance( [2.82842712], [2.82842712]]) """ + if "n_jobs" in kwargs: + warnings.warn( + "n_jobs is not supported for the mpdist distance method and will be " + "ignored.", + UserWarning, + stacklevel=2, + ) + if m == 0: + m = int(X.shape[2] / 4) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion ) - if m == 0: - m = int(_X.shape[2] / 4) - if y is None: return _mpdist_pairwise_distance_single(_X, m) diff --git a/aeon/distances/_sbd.py b/aeon/distances/_sbd.py index 1097f27b5a..b07652ad13 100644 --- a/aeon/distances/_sbd.py +++ b/aeon/distances/_sbd.py @@ -2,14 +2,16 @@ __maintainer__ = ["SebastianSchmidl"] +import warnings from typing import Optional, Union import numpy as np -from numba import njit, objmode +from numba import njit, objmode, prange, set_num_threads from numba.typed import List as NumbaList from scipy.signal import correlate from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -117,6 +119,8 @@ def sbd_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, standardize: bool = True, + n_jobs: int = 1, + **kwargs, ) -> np.ndarray: """ Compute the shape-based distance (SBD) between all pairs of time series. @@ -138,6 +142,10 @@ def sbd_pairwise_distance( standardize : bool, default=True Apply z-score to both input time series for standardization before computing the distance. This makes SBD scaling invariant. Default is True. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -188,6 +196,17 @@ def sbd_pairwise_distance( [0.36754447, 0. , 0.29289322], [0.5527864 , 0.29289322, 0. ]]) """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) + if n_jobs > 1: + warnings.warn( + "You have set n_jobs > 1. For this distance function " + "unless your data is very large (> 10000 time series), it is " + "recommended to use n_jobs=1. If this function is slower than " + "expected try setting n_jobs=1.", + UserWarning, + stacklevel=2, + ) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, _ = _convert_collection_to_numba_list(X, "", multivariate_conversion) @@ -199,14 +218,14 @@ def sbd_pairwise_distance( return _sbd_pairwise_distance(_X, _y, standardize) -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _sbd_pairwise_distance_single( x: NumbaList[np.ndarray], standardize: bool ) -> np.ndarray: n_cases = len(x) distances = np.zeros((n_cases, n_cases)) - for i in range(n_cases): + for i in prange(n_cases): for j in range(i + 1, n_cases): distances[i, j] = sbd_distance(x[i], x[j], standardize) distances[j, i] = distances[i, j] @@ -214,7 +233,7 @@ def _sbd_pairwise_distance_single( return distances -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _sbd_pairwise_distance( x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], standardize: bool ) -> np.ndarray: @@ -222,7 +241,7 @@ def _sbd_pairwise_distance( m_cases = len(y) distances = np.zeros((n_cases, m_cases)) - for i in range(n_cases): + for i in prange(n_cases): for j in range(m_cases): distances[i, j] = sbd_distance(x[i], y[j], standardize) return distances diff --git a/aeon/distances/_shift_scale_invariant.py b/aeon/distances/_shift_scale_invariant.py index 951b7ac560..66caa282fd 100644 --- a/aeon/distances/_shift_scale_invariant.py +++ b/aeon/distances/_shift_scale_invariant.py @@ -3,10 +3,11 @@ from typing import Optional, Union import numpy as np -from numba import njit +from numba import njit, prange, set_num_threads from numba.typed import List as NumbaList from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -160,6 +161,8 @@ def shift_scale_invariant_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, max_shift: Optional[int] = None, + n_jobs: int = 1, + **kwargs, ) -> np.ndarray: r"""Compute the shift-scale invariant pairwise distance between time series. @@ -193,6 +196,10 @@ def shift_scale_invariant_pairwise_distance( Maximum shift allowed in the alignment path. If None, then max_shift is set to min(X.shape[-1], y.shape[-1]) or if y is None, max_shift is set to X.shape[-1]. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -223,6 +230,8 @@ def shift_scale_invariant_pairwise_distance( >>> y_univariate = np.array([11., 12., 13.]) >>> single_pw =shift_scale_invariant_pairwise_distance(X, y_univariate) """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) if max_shift is None: if y is None: max_shift = X.shape[-1] @@ -308,7 +317,7 @@ def shift_scale_invariant_best_shift( raise ValueError("x and y must be 1D or 2D") -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _shift_invariant_pairwise_distance( x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], max_shift: int ) -> np.ndarray: @@ -316,7 +325,7 @@ def _shift_invariant_pairwise_distance( m_cases = len(y) distances = np.zeros((n_cases, m_cases)) - for i in range(n_cases): + for i in prange(n_cases): for j in range(m_cases): distances[i, j] = shift_scale_invariant_distance(x[i], y[j], max_shift) return distances diff --git a/aeon/distances/elastic/_adtw.py b/aeon/distances/elastic/_adtw.py index feab2b4c18..7ab182f394 100644 --- a/aeon/distances/elastic/_adtw.py +++ b/aeon/distances/elastic/_adtw.py @@ -5,13 +5,14 @@ from typing import Optional, Union import numpy as np -from numba import njit +from numba import njit, prange, set_num_threads from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path from aeon.distances.elastic._bounding_matrix import create_bounding_matrix from aeon.distances.pointwise._squared import _univariate_squared_distance from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -203,6 +204,8 @@ def adtw_pairwise_distance( window: Optional[float] = None, itakura_max_slope: Optional[float] = None, warp_penalty: float = 1.0, + n_jobs: int = 1, + **kwargs, ) -> np.ndarray: r"""Compute the ADTW pairwise distance between a set of time series. @@ -226,6 +229,10 @@ def adtw_pairwise_distance( Penalty for warping. A high value will mean less warping. warp less and if value is low then will encourage algorithm to warp more. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -272,6 +279,8 @@ def adtw_pairwise_distance( [ 44., 0., 87.], [294., 87., 0.]]) """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion @@ -290,7 +299,7 @@ def adtw_pairwise_distance( ) -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _adtw_pairwise_distance( X: NumbaList[np.ndarray], window: Optional[float], @@ -306,7 +315,7 @@ def _adtw_pairwise_distance( bounding_matrix = create_bounding_matrix( n_timepoints, n_timepoints, window, itakura_max_slope ) - for i in range(n_cases): + for i in prange(n_cases): for j in range(i + 1, n_cases): x1, x2 = X[i], X[j] if unequal_length: @@ -319,7 +328,7 @@ def _adtw_pairwise_distance( return distances -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _adtw_from_multiple_to_multiple_distance( x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], @@ -336,7 +345,7 @@ def _adtw_from_multiple_to_multiple_distance( bounding_matrix = create_bounding_matrix( x[0].shape[1], y[0].shape[1], window, itakura_max_slope ) - for i in range(n_cases): + for i in prange(n_cases): for j in range(m_cases): x1, y1 = x[i], y[j] if unequal_length: diff --git a/aeon/distances/elastic/_ddtw.py b/aeon/distances/elastic/_ddtw.py index 50007185fd..dffd3f71e2 100644 --- a/aeon/distances/elastic/_ddtw.py +++ b/aeon/distances/elastic/_ddtw.py @@ -5,7 +5,7 @@ from typing import Optional, Union import numpy as np -from numba import njit +from numba import njit, prange, set_num_threads from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path @@ -15,6 +15,7 @@ create_bounding_matrix, ) from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -172,6 +173,8 @@ def ddtw_pairwise_distance( y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, window: Optional[float] = None, itakura_max_slope: Optional[float] = None, + n_jobs: int = 1, + **kwargs, ) -> np.ndarray: """Compute the DDTW pairwise distance between a set of time series. @@ -191,6 +194,10 @@ def ddtw_pairwise_distance( itakura_max_slope : float, default=None Maximum slope as a proportion of the number of time points used to create Itakura parallelogram on the bounding matrix. Must be between 0. and 1. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -237,6 +244,8 @@ def ddtw_pairwise_distance( [0., 0., 0.], [0., 0., 0.]]) """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion @@ -254,7 +263,7 @@ def ddtw_pairwise_distance( ) -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _ddtw_pairwise_distance( X: NumbaList[np.ndarray], window: Optional[float], @@ -274,7 +283,7 @@ def _ddtw_pairwise_distance( for i in range(n_cases): X_average_of_slope.append(average_of_slope(X[i])) - for i in range(n_cases): + for i in prange(n_cases): for j in range(i + 1, n_cases): x1, x2 = X_average_of_slope[i], X_average_of_slope[j] if unequal_length: @@ -287,7 +296,7 @@ def _ddtw_pairwise_distance( return distances -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _ddtw_from_multiple_to_multiple_distance( x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], @@ -313,7 +322,7 @@ def _ddtw_from_multiple_to_multiple_distance( for i in range(m_cases): y_average_of_slope.append(average_of_slope(y[i])) - for i in range(n_cases): + for i in prange(n_cases): for j in range(m_cases): x1, y1 = x_average_of_slope[i], y_average_of_slope[j] if unequal_length: diff --git a/aeon/distances/elastic/_dtw.py b/aeon/distances/elastic/_dtw.py index 73cce697ab..0f15892477 100644 --- a/aeon/distances/elastic/_dtw.py +++ b/aeon/distances/elastic/_dtw.py @@ -5,13 +5,14 @@ from typing import Optional, Union import numpy as np -from numba import njit +from numba import njit, prange, set_num_threads from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path from aeon.distances.elastic._bounding_matrix import create_bounding_matrix from aeon.distances.pointwise._squared import _univariate_squared_distance from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -233,6 +234,8 @@ def dtw_pairwise_distance( y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, window: Optional[float] = None, itakura_max_slope: Optional[float] = None, + n_jobs: int = 1, + **kwargs, ) -> np.ndarray: r"""Compute the DTW pairwise distance between a set of time series. @@ -268,6 +271,10 @@ def dtw_pairwise_distance( itakura_max_slope : float, default=None Maximum slope as a proportion of the number of time points used to create Itakura parallelogram on the bounding matrix. Must be between 0. and 1. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -314,6 +321,8 @@ def dtw_pairwise_distance( [ 42., 0., 83.], [292., 83., 0.]]) """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion @@ -330,7 +339,7 @@ def dtw_pairwise_distance( ) -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _dtw_pairwise_distance( X: NumbaList[np.ndarray], window: Optional[float], @@ -345,7 +354,7 @@ def _dtw_pairwise_distance( bounding_matrix = create_bounding_matrix( n_timepoints, n_timepoints, window, itakura_max_slope ) - for i in range(n_cases): + for i in prange(n_cases): for j in range(i + 1, n_cases): x1, x2 = X[i], X[j] if unequal_length: @@ -358,7 +367,7 @@ def _dtw_pairwise_distance( return distances -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _dtw_from_multiple_to_multiple_distance( x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], @@ -374,7 +383,7 @@ def _dtw_from_multiple_to_multiple_distance( bounding_matrix = create_bounding_matrix( x[0].shape[1], y[0].shape[1], window, itakura_max_slope ) - for i in range(n_cases): + for i in prange(n_cases): for j in range(m_cases): x1, y1 = x[i], y[j] if unequal_length: diff --git a/aeon/distances/elastic/_edr.py b/aeon/distances/elastic/_edr.py index e14996ef7a..e3a2bf3bf1 100644 --- a/aeon/distances/elastic/_edr.py +++ b/aeon/distances/elastic/_edr.py @@ -5,13 +5,14 @@ from typing import Optional, Union import numpy as np -from numba import njit +from numba import njit, prange, set_num_threads from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path from aeon.distances.elastic._bounding_matrix import create_bounding_matrix from aeon.distances.pointwise._euclidean import _univariate_euclidean_distance from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -235,6 +236,8 @@ def edr_pairwise_distance( window: Optional[float] = None, epsilon: Optional[float] = None, itakura_max_slope: Optional[float] = None, + n_jobs: int = 1, + **kwargs, ) -> np.ndarray: """Compute the pairwise EDR distance between a set of time series. @@ -258,6 +261,10 @@ def edr_pairwise_distance( itakura_max_slope : float, default=None Maximum slope as a proportion of the number of time points used to create Itakura parallelogram on the bounding matrix. Must be between 0. and 1. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -303,6 +310,8 @@ def edr_pairwise_distance( [0.75, 0. , 0.8 ], [0.6 , 0.8 , 0. ]]) """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion @@ -322,7 +331,7 @@ def edr_pairwise_distance( ) -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _edr_pairwise_distance( X: NumbaList[np.ndarray], window: Optional[float], @@ -338,7 +347,7 @@ def _edr_pairwise_distance( bounding_matrix = create_bounding_matrix( n_timepoints, n_timepoints, window, itakura_max_slope ) - for i in range(n_cases): + for i in prange(n_cases): for j in range(i + 1, n_cases): x1, x2 = X[i], X[j] if unequal_length: @@ -351,7 +360,7 @@ def _edr_pairwise_distance( return distances -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _edr_from_multiple_to_multiple_distance( x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], @@ -368,7 +377,7 @@ def _edr_from_multiple_to_multiple_distance( bounding_matrix = create_bounding_matrix( x[0].shape[1], y[0].shape[1], window, itakura_max_slope ) - for i in range(n_cases): + for i in prange(n_cases): for j in range(m_cases): x1, y1 = x[i], y[j] if unequal_length: diff --git a/aeon/distances/elastic/_erp.py b/aeon/distances/elastic/_erp.py index 179b2f24f4..12ea5313ce 100644 --- a/aeon/distances/elastic/_erp.py +++ b/aeon/distances/elastic/_erp.py @@ -5,13 +5,14 @@ from typing import Optional, Union import numpy as np -from numba import njit +from numba import njit, prange, set_num_threads from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path from aeon.distances.elastic._bounding_matrix import create_bounding_matrix from aeon.distances.pointwise._euclidean import _univariate_euclidean_distance from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -255,6 +256,8 @@ def erp_pairwise_distance( g: float = 0.0, g_arr: Optional[np.ndarray] = None, itakura_max_slope: Optional[float] = None, + n_jobs: int = 1, + **kwargs, ) -> np.ndarray: """Compute the ERP pairwise distance between a set of time series. @@ -283,6 +286,10 @@ def erp_pairwise_distance( itakura_max_slope : float, default=None Maximum slope as a proportion of the number of time points used to create Itakura parallelogram on the bounding matrix. Must be between 0. and 1. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -327,6 +334,8 @@ def erp_pairwise_distance( [16., 0., 28.], [44., 28., 0.]]) """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion @@ -343,7 +352,7 @@ def erp_pairwise_distance( ) -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _erp_pairwise_distance( X: NumbaList[np.ndarray], window: Optional[float], @@ -361,7 +370,7 @@ def _erp_pairwise_distance( n_timepoints, n_timepoints, window, itakura_max_slope ) - for i in range(n_cases): + for i in prange(n_cases): for j in range(i + 1, n_cases): x1, x2 = X[i], X[j] if unequal_length: @@ -374,7 +383,7 @@ def _erp_pairwise_distance( return distances -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _erp_from_multiple_to_multiple_distance( x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], @@ -392,7 +401,7 @@ def _erp_from_multiple_to_multiple_distance( bounding_matrix = create_bounding_matrix( x[0].shape[1], y[0].shape[1], window, itakura_max_slope ) - for i in range(n_cases): + for i in prange(n_cases): for j in range(m_cases): x1, y1 = x[i], y[j] if unequal_length: diff --git a/aeon/distances/elastic/_lcss.py b/aeon/distances/elastic/_lcss.py index 23e1eb9fe2..6191335799 100644 --- a/aeon/distances/elastic/_lcss.py +++ b/aeon/distances/elastic/_lcss.py @@ -5,13 +5,14 @@ from typing import Optional, Union import numpy as np -from numba import njit +from numba import njit, prange, set_num_threads from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_lcss_return_path from aeon.distances.elastic._bounding_matrix import create_bounding_matrix from aeon.distances.pointwise._euclidean import _univariate_euclidean_distance from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -228,6 +229,8 @@ def lcss_pairwise_distance( window: Optional[float] = None, epsilon: float = 1.0, itakura_max_slope: Optional[float] = None, + n_jobs: int = 1, + **kwargs, ) -> np.ndarray: """Compute the LCSS pairwise distance between a set of time series. @@ -250,6 +253,10 @@ def lcss_pairwise_distance( itakura_max_slope : float, default=None Maximum slope as a proportion of the number of time points used to create Itakura parallelogram on the bounding matrix. Must be between 0. and 1. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -295,6 +302,8 @@ def lcss_pairwise_distance( [0.66666667, 0. , 0.75 ], [1. , 0.75 , 0. ]]) """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion @@ -312,7 +321,7 @@ def lcss_pairwise_distance( ) -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _lcss_pairwise_distance( X: NumbaList[np.ndarray], window: Optional[float], @@ -327,7 +336,7 @@ def _lcss_pairwise_distance( bounding_matrix = create_bounding_matrix( n_timepoints, n_timepoints, window, itakura_max_slope ) - for i in range(n_cases): + for i in prange(n_cases): for j in range(i + 1, n_cases): x1, x2 = X[i], X[j] if unequal_length: @@ -340,7 +349,7 @@ def _lcss_pairwise_distance( return distances -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _lcss_from_multiple_to_multiple_distance( x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], @@ -357,7 +366,7 @@ def _lcss_from_multiple_to_multiple_distance( bounding_matrix = create_bounding_matrix( x[0].shape[1], y[0].shape[1], window, itakura_max_slope ) - for i in range(n_cases): + for i in prange(n_cases): for j in range(m_cases): x1, y1 = x[i], y[j] if unequal_length: diff --git a/aeon/distances/elastic/_msm.py b/aeon/distances/elastic/_msm.py index c51eca3ab6..24f87b66ab 100644 --- a/aeon/distances/elastic/_msm.py +++ b/aeon/distances/elastic/_msm.py @@ -5,13 +5,14 @@ from typing import Optional, Union import numpy as np -from numba import njit +from numba import njit, prange, set_num_threads from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path from aeon.distances.elastic._bounding_matrix import create_bounding_matrix from aeon.distances.pointwise._squared import _univariate_squared_distance from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -350,6 +351,8 @@ def msm_pairwise_distance( independent: bool = True, c: float = 1.0, itakura_max_slope: Optional[float] = None, + n_jobs: int = 1, + **kwargs, ) -> np.ndarray: """Compute the msm pairwise distance between a set of time series. @@ -374,6 +377,10 @@ def msm_pairwise_distance( itakura_max_slope : float, default=None Maximum slope as a proportion of the number of time points used to create Itakura parallelogram on the bounding matrix. Must be between 0. and 1. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -419,6 +426,8 @@ def msm_pairwise_distance( [10., 0., 14.], [17., 14., 0.]]) """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion @@ -438,7 +447,7 @@ def msm_pairwise_distance( ) -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _msm_pairwise_distance( X: NumbaList[np.ndarray], window: Optional[float], @@ -455,7 +464,7 @@ def _msm_pairwise_distance( bounding_matrix = create_bounding_matrix( n_timepoints, n_timepoints, window, itakura_max_slope ) - for i in range(n_cases): + for i in prange(n_cases): for j in range(i + 1, n_cases): x1, x2 = X[i], X[j] if unequal_length: @@ -468,7 +477,7 @@ def _msm_pairwise_distance( return distances -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _msm_from_multiple_to_multiple_distance( x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], @@ -486,7 +495,7 @@ def _msm_from_multiple_to_multiple_distance( bounding_matrix = create_bounding_matrix( x[0].shape[1], y[0].shape[1], window, itakura_max_slope ) - for i in range(n_cases): + for i in prange(n_cases): for j in range(m_cases): x1, y1 = x[i], y[j] if unequal_length: diff --git a/aeon/distances/elastic/_shape_dtw.py b/aeon/distances/elastic/_shape_dtw.py index 25a72cef10..c4933f0ff1 100644 --- a/aeon/distances/elastic/_shape_dtw.py +++ b/aeon/distances/elastic/_shape_dtw.py @@ -5,7 +5,7 @@ from typing import Optional, Union import numpy as np -from numba import njit +from numba import njit, prange, set_num_threads from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path @@ -13,6 +13,7 @@ from aeon.distances.elastic._dtw import _dtw_cost_matrix from aeon.distances.pointwise._squared import _univariate_squared_distance from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -525,6 +526,8 @@ def shape_dtw_pairwise_distance( transformation_precomputed: bool = False, transformed_x: Optional[np.ndarray] = None, transformed_y: Optional[np.ndarray] = None, + n_jobs: int = 1, + **kwargs, ) -> np.ndarray: """Compute the ShapeDTW pairwise distance among a set of series. @@ -563,6 +566,10 @@ def shape_dtw_pairwise_distance( The transformation of X, ignored if transformation_precomputed is False. transformed_y : np.ndarray, default = None The transformation of y, ignored if transformation_precomputed is False. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -609,6 +616,8 @@ def shape_dtw_pairwise_distance( [ 43., 0., 89.], [292., 89., 0.]]) """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion @@ -644,7 +653,7 @@ def shape_dtw_pairwise_distance( ) -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _shape_dtw_pairwise_distance( X: NumbaList[np.ndarray], window: Optional[float], @@ -663,7 +672,7 @@ def _shape_dtw_pairwise_distance( bounding_matrix = create_bounding_matrix( n_timepoints, n_timepoints, window, itakura_max_slope ) - for i in range(len(X)): + for i in prange(len(X)): for j in range(i + 1, n_cases): x1_, x2_ = X[i], X[j] x1 = _pad_ts_edges(x=x1_, reach=reach) @@ -695,7 +704,7 @@ def _shape_dtw_pairwise_distance( return distances -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _shape_dtw_from_multiple_to_multiple_distance( x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], @@ -716,7 +725,7 @@ def _shape_dtw_from_multiple_to_multiple_distance( bounding_matrix = create_bounding_matrix( x[0].shape[1], y[0].shape[1], window, itakura_max_slope ) - for i in range(n_cases): + for i in prange(n_cases): for j in range(m_cases): x1_, y1_ = x[i], y[j] x1 = _pad_ts_edges(x=x1_, reach=reach) diff --git a/aeon/distances/elastic/_soft_dtw.py b/aeon/distances/elastic/_soft_dtw.py index 31b8743599..006498bbfd 100644 --- a/aeon/distances/elastic/_soft_dtw.py +++ b/aeon/distances/elastic/_soft_dtw.py @@ -5,7 +5,7 @@ from typing import Optional, Union import numpy as np -from numba import njit +from numba import njit, prange, set_num_threads from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path @@ -13,6 +13,7 @@ from aeon.distances.elastic._dtw import _dtw_cost_matrix from aeon.distances.pointwise._squared import _univariate_squared_distance from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -249,6 +250,8 @@ def soft_dtw_pairwise_distance( gamma: float = 1.0, window: Optional[float] = None, itakura_max_slope: Optional[float] = None, + n_jobs: int = 1, + **kwargs, ) -> np.ndarray: r"""Compute the soft-DTW pairwise distance between a set of time series. @@ -270,6 +273,10 @@ def soft_dtw_pairwise_distance( itakura_max_slope : float, default=None Maximum slope as a proportion of the number of time points used to create Itakura parallelogram on the bounding matrix. Must be between 0. and 1. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -316,6 +323,8 @@ def soft_dtw_pairwise_distance( [ 41.44055555, 0. , 82.43894439], [291.99999969, 82.43894439, 0. ]]) """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion @@ -334,7 +343,7 @@ def soft_dtw_pairwise_distance( ) -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _soft_dtw_pairwise_distance( X: NumbaList[np.ndarray], window: Optional[float], @@ -350,7 +359,7 @@ def _soft_dtw_pairwise_distance( bounding_matrix = create_bounding_matrix( n_timepoints, n_timepoints, window, itakura_max_slope ) - for i in range(n_cases): + for i in prange(n_cases): for j in range(i + 1, n_cases): x1, x2 = X[i], X[j] if unequal_length: @@ -363,7 +372,7 @@ def _soft_dtw_pairwise_distance( return distances -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _soft_dtw_from_multiple_to_multiple_distance( x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], @@ -380,7 +389,7 @@ def _soft_dtw_from_multiple_to_multiple_distance( bounding_matrix = create_bounding_matrix( x[0].shape[1], y[0].shape[1], window, itakura_max_slope ) - for i in range(n_cases): + for i in prange(n_cases): for j in range(m_cases): x1, y1 = x[i], y[j] if unequal_length: diff --git a/aeon/distances/elastic/_twe.py b/aeon/distances/elastic/_twe.py index fa4a3f4dea..69003dd54c 100644 --- a/aeon/distances/elastic/_twe.py +++ b/aeon/distances/elastic/_twe.py @@ -5,13 +5,14 @@ from typing import Optional, Union import numpy as np -from numba import njit +from numba import njit, prange, set_num_threads from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path from aeon.distances.elastic._bounding_matrix import create_bounding_matrix from aeon.distances.pointwise._euclidean import _univariate_euclidean_distance from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -250,6 +251,8 @@ def twe_pairwise_distance( nu: float = 0.001, lmbda: float = 1.0, itakura_max_slope: Optional[float] = None, + n_jobs: int = 1, + **kwargs, ) -> np.ndarray: """Compute the TWE pairwise distance between a set of time series. @@ -274,6 +277,10 @@ def twe_pairwise_distance( itakura_max_slope : float, default=None Maximum slope as a proportion of the number of time points used to create Itakura parallelogram on the bounding matrix. Must be between 0. and 1. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -319,6 +326,8 @@ def twe_pairwise_distance( [13.005, 0. , 18.007], [19.006, 18.007, 0. ]]) """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion @@ -336,7 +345,7 @@ def twe_pairwise_distance( ) -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _twe_pairwise_distance( X: NumbaList[np.ndarray], window: Optional[float], @@ -359,7 +368,7 @@ def _twe_pairwise_distance( for i in range(n_cases): padded_X.append(_pad_arrs(X[i])) - for i in range(n_cases): + for i in prange(n_cases): for j in range(i + 1, n_cases): x1, x2 = padded_X[i], padded_X[j] if unequal_length: @@ -372,7 +381,7 @@ def _twe_pairwise_distance( return distances -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _twe_from_multiple_to_multiple_distance( x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], @@ -399,7 +408,7 @@ def _twe_from_multiple_to_multiple_distance( for i in range(m_cases): padded_y.append(_pad_arrs(y[i])) - for i in range(n_cases): + for i in prange(n_cases): for j in range(m_cases): x1, y1 = padded_x[i], padded_y[j] if unequal_length: diff --git a/aeon/distances/elastic/_wddtw.py b/aeon/distances/elastic/_wddtw.py index 9a49728c30..0987cf7555 100644 --- a/aeon/distances/elastic/_wddtw.py +++ b/aeon/distances/elastic/_wddtw.py @@ -5,7 +5,7 @@ from typing import Optional, Union import numpy as np -from numba import njit +from numba import njit, prange, set_num_threads from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path @@ -13,6 +13,7 @@ from aeon.distances.elastic._ddtw import average_of_slope from aeon.distances.elastic._wdtw import _wdtw_cost_matrix, _wdtw_distance from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -177,6 +178,8 @@ def wddtw_pairwise_distance( window: Optional[float] = None, g: float = 0.05, itakura_max_slope: Optional[float] = None, + n_jobs: int = 1, + **kwargs, ) -> np.ndarray: """Compute the WDDTW pairwise distance between a set of time series. @@ -199,6 +202,10 @@ def wddtw_pairwise_distance( itakura_max_slope : float, default=None Maximum slope as a proportion of the number of time points used to create Itakura parallelogram on the bounding matrix. Must be between 0. and 1. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Raises ------ @@ -240,6 +247,8 @@ def wddtw_pairwise_distance( [0., 0., 0.], [0., 0., 0.]]) """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion @@ -258,7 +267,7 @@ def wddtw_pairwise_distance( ) -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _wddtw_pairwise_distance( X: NumbaList[np.ndarray], window: Optional[float], @@ -278,7 +287,7 @@ def _wddtw_pairwise_distance( for i in range(n_cases): X_average_of_slope.append(average_of_slope(X[i])) - for i in range(n_cases): + for i in prange(n_cases): for j in range(i + 1, n_cases): x1, x2 = X_average_of_slope[i], X_average_of_slope[j] if unequal_length: @@ -291,7 +300,7 @@ def _wddtw_pairwise_distance( return distances -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _wddtw_from_multiple_to_multiple_distance( x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], @@ -318,7 +327,7 @@ def _wddtw_from_multiple_to_multiple_distance( for i in range(m_cases): y_average_of_slope.append(average_of_slope(y[i])) - for i in range(n_cases): + for i in prange(n_cases): for j in range(m_cases): x1, y1 = x_average_of_slope[i], y_average_of_slope[j] if unequal_length: diff --git a/aeon/distances/elastic/_wdtw.py b/aeon/distances/elastic/_wdtw.py index 3ad1767c9e..d3659bbd32 100644 --- a/aeon/distances/elastic/_wdtw.py +++ b/aeon/distances/elastic/_wdtw.py @@ -5,13 +5,14 @@ from typing import Optional, Union import numpy as np -from numba import njit +from numba import njit, prange, set_num_threads from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path from aeon.distances.elastic._bounding_matrix import create_bounding_matrix from aeon.distances.pointwise._squared import _univariate_squared_distance from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -241,6 +242,8 @@ def wdtw_pairwise_distance( window: Optional[float] = None, g: float = 0.05, itakura_max_slope: Optional[float] = None, + n_jobs: int = 1, + **kwargs, ) -> np.ndarray: """Compute the WDTW pairwise distance between a set of time series. @@ -263,6 +266,10 @@ def wdtw_pairwise_distance( itakura_max_slope : float, default=None Maximum slope as a proportion of the number of time points used to create Itakura parallelogram on the bounding matrix. Must be between 0. and 1. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -308,6 +315,8 @@ def wdtw_pairwise_distance( [ 20.25043711, 0. , 39.64543037], [139.70656066, 39.64543037, 0. ]]) """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion @@ -324,7 +333,7 @@ def wdtw_pairwise_distance( ) -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _wdtw_pairwise_distance( X: NumbaList[np.ndarray], window: Optional[float], @@ -340,7 +349,7 @@ def _wdtw_pairwise_distance( bounding_matrix = create_bounding_matrix( n_timepoints, n_timepoints, window, itakura_max_slope ) - for i in range(n_cases): + for i in prange(n_cases): for j in range(i + 1, n_cases): x1, x2 = X[i], X[j] if unequal_length: @@ -353,7 +362,7 @@ def _wdtw_pairwise_distance( return distances -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _wdtw_from_multiple_to_multiple_distance( x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], @@ -370,7 +379,7 @@ def _wdtw_from_multiple_to_multiple_distance( bounding_matrix = create_bounding_matrix( x[0].shape[1], y[0].shape[1], window, itakura_max_slope ) - for i in range(n_cases): + for i in prange(n_cases): for j in range(m_cases): x1, y1 = x[i], y[j] if unequal_length: diff --git a/aeon/distances/mindist/_dft_sfa.py b/aeon/distances/mindist/_dft_sfa.py index 5f6e856260..7d29cf8e9b 100644 --- a/aeon/distances/mindist/_dft_sfa.py +++ b/aeon/distances/mindist/_dft_sfa.py @@ -3,9 +3,10 @@ from typing import Union import numpy as np -from numba import njit, prange +from numba import njit, prange, set_num_threads from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -86,7 +87,7 @@ def _univariate_dft_sfa_distance( def mindist_dft_sfa_pairwise_distance( - X: np.ndarray, y: np.ndarray, breakpoints: np.ndarray + X: np.ndarray, y: np.ndarray, breakpoints: np.ndarray, n_jobs: int = 1, **kwargs ) -> np.ndarray: """Compute the DFT SFA pairwise distance between a set of SFA representations. @@ -98,6 +99,10 @@ def mindist_dft_sfa_pairwise_distance( A collection of SFA instances of shape ``(n_instances, n_timepoints)``. breakpoints: np.ndarray The breakpoints of the SAX transformation + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -110,6 +115,8 @@ def mindist_dft_sfa_pairwise_distance( If X is not 2D array when only passing X. If X and y are not 1D, 2D arrays when passing both X and y. """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion @@ -132,7 +139,7 @@ def _dft_sfa_from_multiple_to_multiple_distance( distances = np.zeros((n_instances, n_instances)) for i in prange(n_instances): - for j in prange(i + 1, n_instances): + for j in range(i + 1, n_instances): distances[i, j] = _univariate_dft_sfa_distance(X[i], X[j], breakpoints) distances[j, i] = distances[i, j] else: @@ -141,7 +148,7 @@ def _dft_sfa_from_multiple_to_multiple_distance( distances = np.zeros((n_instances, m_instances)) for i in prange(n_instances): - for j in prange(m_instances): + for j in range(m_instances): distances[i, j] = _univariate_dft_sfa_distance(X[i], y[j], breakpoints) return distances diff --git a/aeon/distances/mindist/_paa_sax.py b/aeon/distances/mindist/_paa_sax.py index a53a8b35aa..e8950f94f7 100644 --- a/aeon/distances/mindist/_paa_sax.py +++ b/aeon/distances/mindist/_paa_sax.py @@ -1,9 +1,10 @@ __maintainer__ = [] import numpy as np -from numba import njit, prange +from numba import njit, prange, set_num_threads from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -91,7 +92,12 @@ def _univariate_paa_sax_distance( def mindist_paa_sax_pairwise_distance( - X: np.ndarray, y: np.ndarray, breakpoints: np.ndarray, n: int + X: np.ndarray, + y: np.ndarray, + breakpoints: np.ndarray, + n: int, + n_jobs: int = 1, + **kwargs, ) -> np.ndarray: """Compute the PAA SAX pairwise distance between a set of SAX representations. @@ -105,6 +111,10 @@ def mindist_paa_sax_pairwise_distance( The breakpoints of the SAX transformation n : int The original size of the time series + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -118,6 +128,8 @@ def mindist_paa_sax_pairwise_distance( If X and y are not 1D, 2D arrays when passing both X and y. """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion @@ -139,7 +151,7 @@ def _paa_sax_from_multiple_to_multiple_distance( distances = np.zeros((n_instances, n_instances)) for i in prange(n_instances): - for j in prange(i + 1, n_instances): + for j in range(i + 1, n_instances): distances[i, j] = _univariate_paa_sax_distance( X[i], X[j], breakpoints, n ) @@ -150,7 +162,7 @@ def _paa_sax_from_multiple_to_multiple_distance( distances = np.zeros((n_instances, m_instances)) for i in prange(n_instances): - for j in prange(m_instances): + for j in range(m_instances): distances[i, j] = _univariate_paa_sax_distance( X[i], y[j], breakpoints, n ) diff --git a/aeon/distances/mindist/_sax.py b/aeon/distances/mindist/_sax.py index cdecfb2ebc..dec2582d86 100644 --- a/aeon/distances/mindist/_sax.py +++ b/aeon/distances/mindist/_sax.py @@ -3,9 +3,10 @@ from typing import Union import numpy as np -from numba import njit, prange +from numba import njit, prange, set_num_threads from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -85,7 +86,12 @@ def _univariate_sax_distance( def mindist_sax_pairwise_distance( - X: np.ndarray, y: np.ndarray, breakpoints: np.ndarray, n: int + X: np.ndarray, + y: np.ndarray, + breakpoints: np.ndarray, + n: int, + n_jobs: int = 1, + **kwargs, ) -> np.ndarray: """Compute the SAX pairwise distance between a set of SAX representations. @@ -99,6 +105,10 @@ def mindist_sax_pairwise_distance( The breakpoints of the SAX transformation n : int The original size of the time series + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -112,6 +122,8 @@ def mindist_sax_pairwise_distance( If X and y are not 1D, 2D arrays when passing both X and y. """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion @@ -134,7 +146,7 @@ def _sax_from_multiple_to_multiple_distance( distances = np.zeros((n_instances, n_instances)) for i in prange(n_instances): - for j in prange(i + 1, n_instances): + for j in range(i + 1, n_instances): distances[i, j] = _univariate_sax_distance(X[i], X[j], breakpoints, n) distances[j, i] = distances[i, j] else: @@ -143,7 +155,7 @@ def _sax_from_multiple_to_multiple_distance( distances = np.zeros((n_instances, m_instances)) for i in prange(n_instances): - for j in prange(m_instances): + for j in range(m_instances): distances[i, j] = _univariate_sax_distance(X[i], y[j], breakpoints, n) return distances diff --git a/aeon/distances/mindist/_sfa.py b/aeon/distances/mindist/_sfa.py index e9c6cf8638..152c679471 100644 --- a/aeon/distances/mindist/_sfa.py +++ b/aeon/distances/mindist/_sfa.py @@ -3,9 +3,10 @@ from typing import Union import numpy as np -from numba import njit, prange +from numba import njit, prange, set_num_threads from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -78,7 +79,7 @@ def _univariate_sfa_distance( def mindist_sfa_pairwise_distance( - X: np.ndarray, y: np.ndarray, breakpoints: np.ndarray + X: np.ndarray, y: np.ndarray, breakpoints: np.ndarray, n_jobs: int = 1, **kwargs ) -> np.ndarray: """Compute the SFA mindist pairwise distance between a set of SFA representations. @@ -90,6 +91,10 @@ def mindist_sfa_pairwise_distance( A collection of SFA instances of shape ``(n_instances, n_timepoints)``. breakpoints: np.ndarray The breakpoints of the SAX transformation + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -103,6 +108,8 @@ def mindist_sfa_pairwise_distance( If X and y are not 1D, 2D arrays when passing both X and y. """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion diff --git a/aeon/distances/pointwise/_euclidean.py b/aeon/distances/pointwise/_euclidean.py index f7f0a640d4..c1e9794d60 100644 --- a/aeon/distances/pointwise/_euclidean.py +++ b/aeon/distances/pointwise/_euclidean.py @@ -1,9 +1,10 @@ __maintainer__ = [] +import warnings from typing import Optional, Union import numpy as np -from numba import njit +from numba import njit, prange, set_num_threads from numba.typed import List as NumbaList from aeon.distances.pointwise._squared import ( @@ -11,6 +12,7 @@ squared_distance, ) from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -72,6 +74,8 @@ def _univariate_euclidean_distance(x: np.ndarray, y: np.ndarray) -> float: def euclidean_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, + n_jobs: int = 1, + **kwargs, ) -> np.ndarray: """Compute the Euclidean pairwise distance between a set of time series. @@ -85,6 +89,10 @@ def euclidean_pairwise_distance( ``(m_cases, m_timepoints)`` or ``(m_cases, m_channels, m_timepoints)``. If None, then the euclidean pairwise distance between the instances of X is calculated. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -128,6 +136,18 @@ def euclidean_pairwise_distance( [ 5.19615242, 0. , 8. ], [12.12435565, 8. , 0. ]]) """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) + if n_jobs > 1: + warnings.warn( + "You have set n_jobs > 1. For this distance function " + "unless your data is very large (> 10000 time series), it is " + "recommended to use n_jobs=1. If this function is slower than " + "expected try setting n_jobs=1.", + UserWarning, + stacklevel=2, + ) + multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, _ = _convert_collection_to_numba_list(X, "X", multivariate_conversion) if y is None: @@ -138,12 +158,12 @@ def euclidean_pairwise_distance( return _euclidean_from_multiple_to_multiple_distance(_X, _y) -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _euclidean_pairwise_distance(X: NumbaList[np.ndarray]) -> np.ndarray: n_cases = len(X) distances = np.zeros((n_cases, n_cases)) - for i in range(n_cases): + for i in prange(n_cases): for j in range(i + 1, n_cases): distances[i, j] = euclidean_distance(X[i], X[j]) distances[j, i] = distances[i, j] @@ -151,7 +171,7 @@ def _euclidean_pairwise_distance(X: NumbaList[np.ndarray]) -> np.ndarray: return distances -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _euclidean_from_multiple_to_multiple_distance( x: NumbaList[np.ndarray], y: NumbaList[np.ndarray] ) -> np.ndarray: @@ -159,7 +179,7 @@ def _euclidean_from_multiple_to_multiple_distance( m_cases = len(y) distances = np.zeros((n_cases, m_cases)) - for i in range(n_cases): + for i in prange(n_cases): for j in range(m_cases): distances[i, j] = euclidean_distance(x[i], y[j]) return distances diff --git a/aeon/distances/pointwise/_manhattan.py b/aeon/distances/pointwise/_manhattan.py index 5c4a80e7a2..1b034a360d 100644 --- a/aeon/distances/pointwise/_manhattan.py +++ b/aeon/distances/pointwise/_manhattan.py @@ -1,12 +1,14 @@ __maintainer__ = [] +import warnings from typing import Optional, Union import numpy as np -from numba import njit +from numba import njit, prange, set_num_threads from numba.typed import List as NumbaList from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -77,6 +79,8 @@ def _univariate_manhattan_distance(x: np.ndarray, y: np.ndarray) -> float: def manhattan_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, + n_jobs: int = 1, + **kwargs, ) -> np.ndarray: """Compute the manhattan pairwise distance between a set of time series. @@ -90,6 +94,10 @@ def manhattan_pairwise_distance( ``(m_cases, m_timepoints)`` or ``(m_cases, m_channels, m_timepoints)``. If None, then the manhattan pairwise distance between the instances of X is calculated. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -133,6 +141,18 @@ def manhattan_pairwise_distance( [ 9., 0., 16.], [21., 16., 0.]]) """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) + if n_jobs > 1: + warnings.warn( + "You have set n_jobs > 1. For this distance function " + "unless your data is very large (> 10000 time series), it is " + "recommended to use n_jobs=1. If this function is slower than " + "expected try setting n_jobs=1.", + UserWarning, + stacklevel=2, + ) + multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, _ = _convert_collection_to_numba_list(X, "X", multivariate_conversion) if y is None: @@ -142,12 +162,12 @@ def manhattan_pairwise_distance( return _manhattan_from_multiple_to_multiple_distance(_X, _y) -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _manhattan_pairwise_distance(X: NumbaList[np.ndarray]) -> np.ndarray: n_cases = len(X) distances = np.zeros((n_cases, n_cases)) - for i in range(n_cases): + for i in prange(n_cases): for j in range(i + 1, n_cases): distances[i, j] = manhattan_distance(X[i], X[j]) distances[j, i] = distances[i, j] @@ -155,7 +175,7 @@ def _manhattan_pairwise_distance(X: NumbaList[np.ndarray]) -> np.ndarray: return distances -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _manhattan_from_multiple_to_multiple_distance( x: NumbaList[np.ndarray], y: NumbaList[np.ndarray] ) -> np.ndarray: @@ -163,7 +183,7 @@ def _manhattan_from_multiple_to_multiple_distance( m_cases = len(y) distances = np.zeros((n_cases, m_cases)) - for i in range(n_cases): + for i in prange(n_cases): for j in range(m_cases): distances[i, j] = manhattan_distance(x[i], y[j]) return distances diff --git a/aeon/distances/pointwise/_minkowski.py b/aeon/distances/pointwise/_minkowski.py index d25b504403..f441727aa0 100644 --- a/aeon/distances/pointwise/_minkowski.py +++ b/aeon/distances/pointwise/_minkowski.py @@ -3,10 +3,11 @@ from typing import Optional, Union import numpy as np -from numba import njit +from numba import njit, prange, set_num_threads from numba.typed import List as NumbaList from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -131,6 +132,8 @@ def minkowski_pairwise_distance( y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, p: float = 2.0, w: Optional[np.ndarray] = None, + n_jobs: int = 1, + **kwargs, ) -> np.ndarray: """Compute the Minkowski pairwise distance between a set of time series. @@ -150,6 +153,10 @@ def minkowski_pairwise_distance( w : np.ndarray, default=None An array of weights, applied to each pairwise calculation. The weights should match the shape of the time series in X and y. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -202,6 +209,8 @@ def minkowski_pairwise_distance( [ 5.19615242, 0. , 8. ], [12.12435565, 8. , 0. ]]) """ + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, _ = _convert_collection_to_numba_list(X, "X", multivariate_conversion) if y is None: @@ -211,14 +220,14 @@ def minkowski_pairwise_distance( return _minkowski_from_multiple_to_multiple_distance(_X, _y, p, w) -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _minkowski_pairwise_distance( X: NumbaList[np.ndarray], p: float, w: Optional[np.ndarray] = None ) -> np.ndarray: n_cases = len(X) distances = np.zeros((n_cases, n_cases)) - for i in range(n_cases): + for i in prange(n_cases): for j in range(i + 1, n_cases): if w is None: distances[i, j] = minkowski_distance(X[i], X[j], p) @@ -232,7 +241,7 @@ def _minkowski_pairwise_distance( return distances -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _minkowski_from_multiple_to_multiple_distance( x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], @@ -243,7 +252,7 @@ def _minkowski_from_multiple_to_multiple_distance( m_cases = len(y) distances = np.zeros((n_cases, m_cases)) - for i in range(n_cases): + for i in prange(n_cases): for j in range(m_cases): if w is None: distances[i, j] = minkowski_distance(x[i], y[j], p) diff --git a/aeon/distances/pointwise/_squared.py b/aeon/distances/pointwise/_squared.py index 045466ef51..a4de9cd131 100644 --- a/aeon/distances/pointwise/_squared.py +++ b/aeon/distances/pointwise/_squared.py @@ -1,12 +1,14 @@ __maintainer__ = [] +import warnings from typing import Optional, Union import numpy as np -from numba import njit +from numba import njit, prange, set_num_threads from numba.typed import List as NumbaList from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -76,6 +78,8 @@ def _univariate_squared_distance(x: np.ndarray, y: np.ndarray) -> float: def squared_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, + n_jobs: int = 1, + **kwargs, ) -> np.ndarray: """Compute the squared pairwise distance between a set of time series. @@ -89,6 +93,10 @@ def squared_pairwise_distance( ``(m_cases, m_timepoints)`` or ``(m_cases, m_channels, m_timepoints)``. If None, then the squared pairwise distance between the instances of X is calculated. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -132,6 +140,17 @@ def squared_pairwise_distance( [ 27., 0., 64.], [147., 64., 0.]]) """ + n_jobs = check_n_jobs(n_jobs) + if n_jobs > 1: + warnings.warn( + "You have set n_jobs > 1. For this distance function " + "unless your data is very large (> 10000 time series), it is " + "recommended to use n_jobs=1. If this function is slower than " + "expected try setting n_jobs=1.", + UserWarning, + stacklevel=2, + ) + set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, _ = _convert_collection_to_numba_list(X, "X", multivariate_conversion) @@ -143,12 +162,12 @@ def squared_pairwise_distance( return _squared_from_multiple_to_multiple_distance(_X, _y) -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _squared_pairwise_distance(X: NumbaList[np.ndarray]) -> np.ndarray: n_cases = len(X) distances = np.zeros((n_cases, n_cases)) - for i in range(n_cases): + for i in prange(n_cases): for j in range(i + 1, n_cases): distances[i, j] = squared_distance(X[i], X[j]) distances[j, i] = distances[i, j] @@ -156,7 +175,7 @@ def _squared_pairwise_distance(X: NumbaList[np.ndarray]) -> np.ndarray: return distances -@njit(cache=True, fastmath=True) +@njit(cache=True, fastmath=True, parallel=True) def _squared_from_multiple_to_multiple_distance( x: NumbaList[np.ndarray], y: NumbaList[np.ndarray] ) -> np.ndarray: @@ -164,7 +183,7 @@ def _squared_from_multiple_to_multiple_distance( m_cases = len(y) distances = np.zeros((n_cases, m_cases)) - for i in range(n_cases): + for i in prange(n_cases): for j in range(m_cases): distances[i, j] = squared_distance(x[i], y[j]) return distances From c4066233b51354768c87e68aa6ff84fd1b22c9f1 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Tue, 18 Feb 2025 00:02:50 +0000 Subject: [PATCH 02/19] sfa update --- aeon/distances/mindist/_sfa.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aeon/distances/mindist/_sfa.py b/aeon/distances/mindist/_sfa.py index 152c679471..82effbfc4d 100644 --- a/aeon/distances/mindist/_sfa.py +++ b/aeon/distances/mindist/_sfa.py @@ -132,7 +132,7 @@ def _sfa_from_multiple_to_multiple_distance( distances = np.zeros((n_instances, n_instances)) for i in prange(n_instances): - for j in prange(i + 1, n_instances): + for j in range(i + 1, n_instances): distances[i, j] = _univariate_sfa_distance(X[i], X[j], breakpoints) distances[j, i] = distances[i, j] else: @@ -141,7 +141,7 @@ def _sfa_from_multiple_to_multiple_distance( distances = np.zeros((n_instances, m_instances)) for i in prange(n_instances): - for j in prange(m_instances): + for j in range(m_instances): distances[i, j] = _univariate_sfa_distance(X[i], y[j], breakpoints) return distances From cbf5e191f23500b05aed0649de929dfe4e92e208 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Tue, 18 Feb 2025 12:04:52 +0000 Subject: [PATCH 03/19] changed warning --- aeon/distances/_sbd.py | 2 +- aeon/distances/pointwise/_euclidean.py | 2 +- aeon/distances/pointwise/_manhattan.py | 2 +- aeon/distances/pointwise/_squared.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/aeon/distances/_sbd.py b/aeon/distances/_sbd.py index b07652ad13..f96515b8bc 100644 --- a/aeon/distances/_sbd.py +++ b/aeon/distances/_sbd.py @@ -201,7 +201,7 @@ def sbd_pairwise_distance( if n_jobs > 1: warnings.warn( "You have set n_jobs > 1. For this distance function " - "unless your data is very large (> 10000 time series), it is " + "unless your data has a large number of time points, it is " "recommended to use n_jobs=1. If this function is slower than " "expected try setting n_jobs=1.", UserWarning, diff --git a/aeon/distances/pointwise/_euclidean.py b/aeon/distances/pointwise/_euclidean.py index c1e9794d60..2b5f05b5fe 100644 --- a/aeon/distances/pointwise/_euclidean.py +++ b/aeon/distances/pointwise/_euclidean.py @@ -141,7 +141,7 @@ def euclidean_pairwise_distance( if n_jobs > 1: warnings.warn( "You have set n_jobs > 1. For this distance function " - "unless your data is very large (> 10000 time series), it is " + "unless your data has a large number of time points, it is " "recommended to use n_jobs=1. If this function is slower than " "expected try setting n_jobs=1.", UserWarning, diff --git a/aeon/distances/pointwise/_manhattan.py b/aeon/distances/pointwise/_manhattan.py index 1b034a360d..fe58f8e772 100644 --- a/aeon/distances/pointwise/_manhattan.py +++ b/aeon/distances/pointwise/_manhattan.py @@ -146,7 +146,7 @@ def manhattan_pairwise_distance( if n_jobs > 1: warnings.warn( "You have set n_jobs > 1. For this distance function " - "unless your data is very large (> 10000 time series), it is " + "unless your data has a large number of time points, it is " "recommended to use n_jobs=1. If this function is slower than " "expected try setting n_jobs=1.", UserWarning, diff --git a/aeon/distances/pointwise/_squared.py b/aeon/distances/pointwise/_squared.py index a4de9cd131..5c3405bc50 100644 --- a/aeon/distances/pointwise/_squared.py +++ b/aeon/distances/pointwise/_squared.py @@ -144,7 +144,7 @@ def squared_pairwise_distance( if n_jobs > 1: warnings.warn( "You have set n_jobs > 1. For this distance function " - "unless your data is very large (> 10000 time series), it is " + "unless your data has a large number of time points, it is " "recommended to use n_jobs=1. If this function is slower than " "expected try setting n_jobs=1.", UserWarning, From 5f74e4e67b322aa9a6c20283c2aa6aaa986d2df1 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Fri, 28 Feb 2025 15:25:06 +0100 Subject: [PATCH 04/19] thread kneighbors --- .../distance_based/_time_series_neighbors.py | 109 +++++++++++------- .../tests/test_time_series_neighbors.py | 4 +- 2 files changed, 66 insertions(+), 47 deletions(-) diff --git a/aeon/classification/distance_based/_time_series_neighbors.py b/aeon/classification/distance_based/_time_series_neighbors.py index f89b1be636..4bde22da44 100644 --- a/aeon/classification/distance_based/_time_series_neighbors.py +++ b/aeon/classification/distance_based/_time_series_neighbors.py @@ -5,6 +5,7 @@ distances in aeon.distances. """ +import numbers from typing import Optional __maintainer__ = [] @@ -15,7 +16,8 @@ import numpy as np from aeon.classification.base import BaseClassifier -from aeon.distances import get_distance_function +from aeon.distances import pairwise_distance +from aeon.utils.validation import check_n_jobs WEIGHTS_SUPPORTED = ["uniform", "distance"] @@ -111,7 +113,6 @@ def _fit(self, X, y): y : array-like, shape = (n_cases) The class labels. """ - self.metric_ = get_distance_function(method=self.distance) self.X_ = X self.classes_, self.y_ = np.unique(y, return_inverse=True) return self @@ -163,61 +164,81 @@ def _predict(self, X): """ self._check_is_fitted() - preds = np.empty(len(X), dtype=self.classes_.dtype) - for i in range(len(X)): - scores = np.zeros(len(self.classes_)) - idx, weights = self._kneighbors(X[i]) - for id, w in zip(idx, weights): - predicted_class = self.y_[id] - scores[predicted_class] += w + indexes = self.kneighbors(X, return_distance=False)[:, 0] + return self.classes_[self.y_[indexes]] - preds[i] = self.classes_[np.argmax(scores)] + def kneighbors(self, X=None, n_neighbors=None, return_distance=True): + """Find the K-neighbors of a point. - return preds - - def _kneighbors(self, X): - """ - Find the K-neighbors of a point. - - Returns indices and weights of each point. + Returns indices of and distances to the neighbors of each point. Parameters ---------- - X : np.ndarray - A single time series instance if shape = (n_channels, n_timepoints) + X : 3D np.ndarray of shape = (n_cases, n_channels, n_timepoints) or list of + shape [n_cases] of 2D arrays shape (n_channels,n_timepoints_i) + The query point or points. + If not provided, neighbors of each indexed point are returned. + In this case, the query point is not considered its own neighbor. + n_neighbors : int, default=None + Number of neighbors required for each sample. The default is the value + passed to the constructor. + return_distance : bool, default=True + Whether or not to return the distances. Returns ------- - ind : array + neigh_dist : ndarray of shape (n_queries, n_neighbors) + Array representing the distances to points, only present if + return_distance=True. + neigh_ind : ndarray of shape (n_queries, n_neighbors) Indices of the nearest points in the population matrix. - ws : array - Array representing the weights of each neighbor. """ - distances = np.array( - [ - self.metric_(X, self.X_[j], **self._distance_params) - for j in range(len(self.X_)) - ] - ) + self._check_is_fitted() + n_jobs = check_n_jobs(self.n_jobs) + + if n_neighbors is None: + n_neighbors = self.n_neighbors + elif n_neighbors <= 0: + raise ValueError(f"Expected n_neighbors > 0. Got {n_neighbors}") + elif not isinstance(n_neighbors, numbers.Integral): + raise TypeError( + f"n_neighbors does not take {type(n_neighbors)} value, " + "enter integer value" + ) - # Find indices of k nearest neighbors using partitioning: - # [0..k-1], [k], [k+1..n-1] - # They might not be ordered within themselves, - # but it is not necessary and partitioning is - # O(n) while sorting is O(nlogn) - closest_idx = np.argpartition(distances, self.n_neighbors) - closest_idx = closest_idx[: self.n_neighbors] - - if self.weights == "distance": - ws = distances[closest_idx] - # Using epsilon ~= 0 to avoid division by zero - ws = 1 / (ws + np.finfo(float).eps) - elif self.weights == "uniform": - ws = np.repeat(1.0, self.n_neighbors) + query_is_train = X is None + if query_is_train: + X = self.X_ + n_neighbors += 1 else: - raise Exception(f"Invalid kNN weights: {self.weights}") + X = self._preprocess_collection(X, store_metadata=False) + self._check_shape(X) + + distances = pairwise_distance( + X, + self.X_ if not query_is_train else None, + method=self.distance, + n_jobs=n_jobs, + **self._distance_params, + ) + + sample_range = np.arange(distances.shape[0])[:, None] + neigh_ind = np.argpartition(distances, n_neighbors - 1, axis=1) + neigh_ind = neigh_ind[:, :n_neighbors] + neigh_ind = neigh_ind[ + sample_range, np.argsort(distances[sample_range, neigh_ind]) + ] + + if query_is_train: + neigh_ind = neigh_ind[:, 1:] + + if return_distance: + if query_is_train: + neigh_dist = distances[sample_range, neigh_ind] + return neigh_dist, neigh_ind + return distances[sample_range, neigh_ind], neigh_ind - return closest_idx, ws + return neigh_ind @classmethod def _get_test_params( diff --git a/aeon/classification/distance_based/tests/test_time_series_neighbors.py b/aeon/classification/distance_based/tests/test_time_series_neighbors.py index 7746439d94..d62ba3308a 100644 --- a/aeon/classification/distance_based/tests/test_time_series_neighbors.py +++ b/aeon/classification/distance_based/tests/test_time_series_neighbors.py @@ -45,9 +45,7 @@ def test_knn_on_unit_test(distance_key): # load arrowhead data for unit tests X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") - knn = KNeighborsTimeSeriesClassifier( - distance=distance_key, - ) + knn = KNeighborsTimeSeriesClassifier(distance=distance_key, n_neighbors=3) knn.fit(X_train, y_train) pred = knn.predict(X_test) correct = 0 From 4c95abbd5fa1fc6a624b678c89255ff9a58da660 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Fri, 28 Feb 2025 15:28:51 +0100 Subject: [PATCH 05/19] fix test --- .../distance_based/tests/test_time_series_neighbors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/classification/distance_based/tests/test_time_series_neighbors.py b/aeon/classification/distance_based/tests/test_time_series_neighbors.py index d62ba3308a..917b2628d4 100644 --- a/aeon/classification/distance_based/tests/test_time_series_neighbors.py +++ b/aeon/classification/distance_based/tests/test_time_series_neighbors.py @@ -45,7 +45,7 @@ def test_knn_on_unit_test(distance_key): # load arrowhead data for unit tests X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") - knn = KNeighborsTimeSeriesClassifier(distance=distance_key, n_neighbors=3) + knn = KNeighborsTimeSeriesClassifier(distance=distance_key) knn.fit(X_train, y_train) pred = knn.predict(X_test) correct = 0 From 3ff96b102437b0f84e5bda4a711c816e2dc364ad Mon Sep 17 00:00:00 2001 From: chrisholder Date: Mon, 3 Mar 2025 16:52:44 +0100 Subject: [PATCH 06/19] custom pairwise threaded --- aeon/distances/_distance.py | 66 +++++++++++++++++++++++++++---------- aeon/utils/numba/general.py | 26 +++++++++++++++ 2 files changed, 74 insertions(+), 18 deletions(-) diff --git a/aeon/distances/_distance.py b/aeon/distances/_distance.py index 1cf5c35dc0..25e9a13211 100644 --- a/aeon/distances/_distance.py +++ b/aeon/distances/_distance.py @@ -1,10 +1,11 @@ __maintainer__ = [] -import warnings from enum import Enum from typing import Any, Callable, Optional, TypedDict, Union import numpy as np +from joblib import Parallel, delayed +from numba import set_num_threads from typing_extensions import Unpack from aeon.distances._mpdist import mp_distance, mp_pairwise_distance @@ -84,6 +85,7 @@ squared_pairwise_distance, ) from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -264,40 +266,54 @@ def _custom_func_pairwise( n_jobs: int = 1, **kwargs: Unpack[DistanceKwargs], ) -> np.ndarray: + n_jobs = check_n_jobs(n_jobs) + set_num_threads(n_jobs) if dist_func is None: raise ValueError("dist_func must be a callable") - if n_jobs != 1: - warnings.warn( - "You are using a custom distance function with n_jobs > 1. " - "Aeon does not support parallelization for custom distance " - "functions. If it is an existing aeon distance try using the " - "string name instead.", - UserWarning, - stacklevel=2, - ) - multivariate_conversion = _is_numpy_list_multivariate(X, y) X, _ = _convert_collection_to_numba_list(X, "X", multivariate_conversion) + + if n_jobs > 1: + X = np.array(X) + if y is None: # To self - return _custom_pairwise_distance(X, dist_func, **kwargs) + return _custom_pairwise_distance(X, dist_func, n_jobs=n_jobs, **kwargs) y, _ = _convert_collection_to_numba_list(y, "y", multivariate_conversion) - return _custom_from_multiple_to_multiple_distance(X, y, dist_func, **kwargs) + if n_jobs > 1: + y = np.array(y) + return _custom_from_multiple_to_multiple_distance( + X, y, dist_func, n_jobs=n_jobs, **kwargs + ) def _custom_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], dist_func: DistanceFunction, + n_jobs: int = 1, **kwargs: Unpack[DistanceKwargs], ) -> np.ndarray: n_cases = len(X) distances = np.zeros((n_cases, n_cases)) - for i in range(n_cases): - for j in range(i + 1, n_cases): + def compute_single_distance(i, j): + return i, j, dist_func(X[i], X[j], **kwargs) + + indices = [(i, j) for i in range(n_cases) for j in range(i + 1, n_cases)] + + if n_jobs == 1: + for i, j in indices: distances[i, j] = dist_func(X[i], X[j], **kwargs) - distances[j, i] = distances[i, j] + distances[j, i] = distances[i, j] # Mirror for symmetry + else: + results = Parallel(n_jobs=n_jobs)( + delayed(compute_single_distance)(i, j) for i, j in indices + ) + + for i, j, dist in results: + distances[i, j] = dist + distances[j, i] = dist # Mirror for symmetry return distances @@ -306,15 +322,29 @@ def _custom_from_multiple_to_multiple_distance( x: Union[np.ndarray, list[np.ndarray]], y: Union[np.ndarray, list[np.ndarray]], dist_func: DistanceFunction, + n_jobs: int = 1, **kwargs: Unpack[DistanceKwargs], ) -> np.ndarray: n_cases = len(x) m_cases = len(y) distances = np.zeros((n_cases, m_cases)) - for i in range(n_cases): - for j in range(m_cases): + def compute_single_distance(i, j): + return i, j, dist_func(x[i], y[j], **kwargs) + + indices = [(i, j) for i in range(n_cases) for j in range(m_cases)] + + if n_jobs == 1: + for i, j in indices: distances[i, j] = dist_func(x[i], y[j], **kwargs) + else: + results = Parallel(n_jobs=n_jobs)( + delayed(compute_single_distance)(i, j) for i, j in indices + ) + + for i, j, dist in results: + distances[i, j] = dist + return distances diff --git a/aeon/utils/numba/general.py b/aeon/utils/numba/general.py index 10e96abde6..6fefa0e1d5 100644 --- a/aeon/utils/numba/general.py +++ b/aeon/utils/numba/general.py @@ -772,3 +772,29 @@ def get_all_subsequences(X: np.ndarray, length: int, dilation: int) -> np.ndarra out_shape = (n_timestamps - (length - 1) * dilation, n_features, np.int64(length)) strides = (s1, s0, s1 * dilation) return np.lib.stride_tricks.as_strided(X, shape=out_shape, strides=strides) + + +def is_numba_function(func) -> bool: + """Determine if a function is compiled with Numba. + + Parameters + ---------- + func : callable + The function to check. + + Returns + ------- + bool + True if the function is compiled with Numba. + """ + if hasattr(func, "nopython_signatures") or hasattr(func, "__numba__"): + return True + + if hasattr(func, "_numba_type_"): + return True + + module_name = getattr(func, "__module__", "") + if module_name and module_name.startswith("numba."): + return True + + return False From 2a74496c72d1e0c32b2d621345fe1e7d038320cc Mon Sep 17 00:00:00 2001 From: chrisholder Date: Mon, 3 Mar 2025 17:08:36 +0100 Subject: [PATCH 07/19] fixed --- aeon/utils/numba/general.py | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/aeon/utils/numba/general.py b/aeon/utils/numba/general.py index 6fefa0e1d5..10e96abde6 100644 --- a/aeon/utils/numba/general.py +++ b/aeon/utils/numba/general.py @@ -772,29 +772,3 @@ def get_all_subsequences(X: np.ndarray, length: int, dilation: int) -> np.ndarra out_shape = (n_timestamps - (length - 1) * dilation, n_features, np.int64(length)) strides = (s1, s0, s1 * dilation) return np.lib.stride_tricks.as_strided(X, shape=out_shape, strides=strides) - - -def is_numba_function(func) -> bool: - """Determine if a function is compiled with Numba. - - Parameters - ---------- - func : callable - The function to check. - - Returns - ------- - bool - True if the function is compiled with Numba. - """ - if hasattr(func, "nopython_signatures") or hasattr(func, "__numba__"): - return True - - if hasattr(func, "_numba_type_"): - return True - - module_name = getattr(func, "__module__", "") - if module_name and module_name.startswith("numba."): - return True - - return False From 4afc8dfafdd6a6b870340d4efba5148ff78b5951 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Tue, 4 Mar 2025 15:27:26 +0100 Subject: [PATCH 08/19] added threaded decorator --- aeon/distances/_distance.py | 6 +- aeon/distances/_mpdist.py | 53 +----- aeon/distances/_sbd.py | 7 +- aeon/distances/_shift_scale_invariant.py | 7 +- aeon/distances/elastic/_adtw.py | 7 +- aeon/distances/elastic/_ddtw.py | 7 +- aeon/distances/elastic/_dtw.py | 7 +- aeon/distances/elastic/_edr.py | 7 +- aeon/distances/elastic/_erp.py | 7 +- aeon/distances/elastic/_lcss.py | 7 +- aeon/distances/elastic/_msm.py | 7 +- aeon/distances/elastic/_shape_dtw.py | 7 +- aeon/distances/elastic/_soft_dtw.py | 7 +- aeon/distances/elastic/_twe.py | 7 +- aeon/distances/elastic/_wddtw.py | 7 +- aeon/distances/elastic/_wdtw.py | 7 +- aeon/distances/mindist/_dft_sfa.py | 7 +- aeon/distances/mindist/_paa_sax.py | 7 +- aeon/distances/mindist/_sax.py | 7 +- aeon/distances/mindist/_sfa.py | 7 +- aeon/distances/pointwise/_euclidean.py | 7 +- aeon/distances/pointwise/_manhattan.py | 7 +- aeon/distances/pointwise/_minkowski.py | 7 +- aeon/distances/pointwise/_squared.py | 7 +- aeon/utils/_threading.py | 52 +++++ aeon/utils/tests/test_threading_decorator.py | 189 +++++++++++++++++++ 26 files changed, 312 insertions(+), 142 deletions(-) create mode 100644 aeon/utils/_threading.py create mode 100644 aeon/utils/tests/test_threading_decorator.py diff --git a/aeon/distances/_distance.py b/aeon/distances/_distance.py index 25e9a13211..8a5a2fb369 100644 --- a/aeon/distances/_distance.py +++ b/aeon/distances/_distance.py @@ -5,7 +5,6 @@ import numpy as np from joblib import Parallel, delayed -from numba import set_num_threads from typing_extensions import Unpack from aeon.distances._mpdist import mp_distance, mp_pairwise_distance @@ -84,8 +83,8 @@ squared_distance, squared_pairwise_distance, ) +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -259,6 +258,7 @@ def pairwise_distance( raise ValueError("Method must be one of the supported strings or a callable") +@threaded def _custom_func_pairwise( X: Optional[Union[np.ndarray, list[np.ndarray]]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, @@ -266,8 +266,6 @@ def _custom_func_pairwise( n_jobs: int = 1, **kwargs: Unpack[DistanceKwargs], ) -> np.ndarray: - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) if dist_func is None: raise ValueError("dist_func must be a callable") diff --git a/aeon/distances/_mpdist.py b/aeon/distances/_mpdist.py index c9f195cba9..7bfab4526c 100644 --- a/aeon/distances/_mpdist.py +++ b/aeon/distances/_mpdist.py @@ -1,14 +1,9 @@ """Matrix Profile Distances.""" -import warnings from typing import Optional, Union import numpy as np from numba import njit -from numba.typed import List as NumbaList - -from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation.collection import _is_numpy_list_multivariate def mp_distance(x: np.ndarray, y: np.ndarray, m: int = 0) -> float: @@ -288,6 +283,7 @@ def mp_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, m: int = 0, + n_jobs: int = 1, **kwargs, ) -> np.ndarray: """Compute the mpdist pairwise distance between a set of time series. @@ -341,51 +337,8 @@ def mp_pairwise_distance( [2.82842712], [2.82842712]]) """ - if "n_jobs" in kwargs: - warnings.warn( - "n_jobs is not supported for the mpdist distance method and will be " - "ignored.", - UserWarning, - stacklevel=2, - ) if m == 0: m = int(X.shape[2] / 4) - multivariate_conversion = _is_numpy_list_multivariate(X, y) - _X, unequal_length = _convert_collection_to_numba_list( - X, "X", multivariate_conversion - ) - - if y is None: - return _mpdist_pairwise_distance_single(_X, m) - - _y, unequal_length = _convert_collection_to_numba_list( - y, "y", multivariate_conversion - ) - - return _mpdist_pairwise_distance(_X, _y, m) - - -def _mpdist_pairwise_distance_single(x: NumbaList[np.ndarray], m: int) -> np.ndarray: - n_cases = len(x) - distances = np.zeros((n_cases, n_cases)) - - for i in range(n_cases): - for j in range(i + 1, n_cases): - distances[i, j] = mp_distance(x[i], x[j], m) - distances[j, i] = distances[i, j] - - return distances - - -def _mpdist_pairwise_distance( - x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], m: int -) -> np.ndarray: - n_cases = len(x) - m_cases = len(y) - - distances = np.zeros((n_cases, m_cases)) + from aeon.distances._distance import pairwise_distance - for i in range(n_cases): - for j in range(m_cases): - distances[i, j] = mp_distance(x[i], y[j], m) - return distances + return pairwise_distance(X, y, method=mp_distance, m=m, n_jobs=n_jobs, **kwargs) diff --git a/aeon/distances/_sbd.py b/aeon/distances/_sbd.py index f96515b8bc..1e72d4eca6 100644 --- a/aeon/distances/_sbd.py +++ b/aeon/distances/_sbd.py @@ -6,12 +6,12 @@ from typing import Optional, Union import numpy as np -from numba import njit, objmode, prange, set_num_threads +from numba import njit, objmode, prange from numba.typed import List as NumbaList from scipy.signal import correlate +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -115,6 +115,7 @@ def sbd_distance(x: np.ndarray, y: np.ndarray, standardize: bool = True) -> floa raise ValueError("x and y must be 1D or 2D") +@threaded def sbd_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, @@ -196,8 +197,6 @@ def sbd_pairwise_distance( [0.36754447, 0. , 0.29289322], [0.5527864 , 0.29289322, 0. ]]) """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) if n_jobs > 1: warnings.warn( "You have set n_jobs > 1. For this distance function " diff --git a/aeon/distances/_shift_scale_invariant.py b/aeon/distances/_shift_scale_invariant.py index 66caa282fd..e425ca702a 100644 --- a/aeon/distances/_shift_scale_invariant.py +++ b/aeon/distances/_shift_scale_invariant.py @@ -3,11 +3,11 @@ from typing import Optional, Union import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange from numba.typed import List as NumbaList +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -157,6 +157,7 @@ def _univariate_shift_scale_invariant_distance( return min_dist, best_shifted_y +@threaded def shift_scale_invariant_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, @@ -230,8 +231,6 @@ def shift_scale_invariant_pairwise_distance( >>> y_univariate = np.array([11., 12., 13.]) >>> single_pw =shift_scale_invariant_pairwise_distance(X, y_univariate) """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) if max_shift is None: if y is None: max_shift = X.shape[-1] diff --git a/aeon/distances/elastic/_adtw.py b/aeon/distances/elastic/_adtw.py index 7ab182f394..f061f479ff 100644 --- a/aeon/distances/elastic/_adtw.py +++ b/aeon/distances/elastic/_adtw.py @@ -5,14 +5,14 @@ from typing import Optional, Union import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path from aeon.distances.elastic._bounding_matrix import create_bounding_matrix from aeon.distances.pointwise._squared import _univariate_squared_distance +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -198,6 +198,7 @@ def _adtw_cost_matrix( return cost_matrix[1:, 1:] +@threaded def adtw_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, @@ -279,8 +280,6 @@ def adtw_pairwise_distance( [ 44., 0., 87.], [294., 87., 0.]]) """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion diff --git a/aeon/distances/elastic/_ddtw.py b/aeon/distances/elastic/_ddtw.py index dffd3f71e2..a31745f2f0 100644 --- a/aeon/distances/elastic/_ddtw.py +++ b/aeon/distances/elastic/_ddtw.py @@ -5,7 +5,7 @@ from typing import Optional, Union import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path @@ -14,8 +14,8 @@ _dtw_distance, create_bounding_matrix, ) +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -168,6 +168,7 @@ def ddtw_cost_matrix( raise ValueError("x and y must be 1D or 2D") +@threaded def ddtw_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, @@ -244,8 +245,6 @@ def ddtw_pairwise_distance( [0., 0., 0.], [0., 0., 0.]]) """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion diff --git a/aeon/distances/elastic/_dtw.py b/aeon/distances/elastic/_dtw.py index 0f15892477..26add187dd 100644 --- a/aeon/distances/elastic/_dtw.py +++ b/aeon/distances/elastic/_dtw.py @@ -5,14 +5,14 @@ from typing import Optional, Union import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path from aeon.distances.elastic._bounding_matrix import create_bounding_matrix from aeon.distances.pointwise._squared import _univariate_squared_distance +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -229,6 +229,7 @@ def _dtw_cost_matrix( return cost_matrix[1:, 1:] +@threaded def dtw_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, @@ -321,8 +322,6 @@ def dtw_pairwise_distance( [ 42., 0., 83.], [292., 83., 0.]]) """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion diff --git a/aeon/distances/elastic/_edr.py b/aeon/distances/elastic/_edr.py index e3a2bf3bf1..19a4ec483b 100644 --- a/aeon/distances/elastic/_edr.py +++ b/aeon/distances/elastic/_edr.py @@ -5,14 +5,14 @@ from typing import Optional, Union import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path from aeon.distances.elastic._bounding_matrix import create_bounding_matrix from aeon.distances.pointwise._euclidean import _univariate_euclidean_distance +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -230,6 +230,7 @@ def _edr_cost_matrix( return cost_matrix[1:, 1:] +@threaded def edr_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, @@ -310,8 +311,6 @@ def edr_pairwise_distance( [0.75, 0. , 0.8 ], [0.6 , 0.8 , 0. ]]) """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion diff --git a/aeon/distances/elastic/_erp.py b/aeon/distances/elastic/_erp.py index 12ea5313ce..b1a7071a7b 100644 --- a/aeon/distances/elastic/_erp.py +++ b/aeon/distances/elastic/_erp.py @@ -5,14 +5,14 @@ from typing import Optional, Union import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path from aeon.distances.elastic._bounding_matrix import create_bounding_matrix from aeon.distances.pointwise._euclidean import _univariate_euclidean_distance +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -249,6 +249,7 @@ def _precompute_g( return gx_distance, x_sum +@threaded def erp_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, @@ -334,8 +335,6 @@ def erp_pairwise_distance( [16., 0., 28.], [44., 28., 0.]]) """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion diff --git a/aeon/distances/elastic/_lcss.py b/aeon/distances/elastic/_lcss.py index 6191335799..0cddbd9b0f 100644 --- a/aeon/distances/elastic/_lcss.py +++ b/aeon/distances/elastic/_lcss.py @@ -5,14 +5,14 @@ from typing import Optional, Union import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_lcss_return_path from aeon.distances.elastic._bounding_matrix import create_bounding_matrix from aeon.distances.pointwise._euclidean import _univariate_euclidean_distance +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -223,6 +223,7 @@ def _lcss_cost_matrix( return cost_matrix +@threaded def lcss_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, @@ -302,8 +303,6 @@ def lcss_pairwise_distance( [0.66666667, 0. , 0.75 ], [1. , 0.75 , 0. ]]) """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion diff --git a/aeon/distances/elastic/_msm.py b/aeon/distances/elastic/_msm.py index 24f87b66ab..f75ab5daf8 100644 --- a/aeon/distances/elastic/_msm.py +++ b/aeon/distances/elastic/_msm.py @@ -5,14 +5,14 @@ from typing import Optional, Union import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path from aeon.distances.elastic._bounding_matrix import create_bounding_matrix from aeon.distances.pointwise._squared import _univariate_squared_distance +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -344,6 +344,7 @@ def _cost_independent(x: float, y: float, z: float, c: float) -> float: return c + min(abs(x - y), abs(x - z)) +@threaded def msm_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, @@ -426,8 +427,6 @@ def msm_pairwise_distance( [10., 0., 14.], [17., 14., 0.]]) """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion diff --git a/aeon/distances/elastic/_shape_dtw.py b/aeon/distances/elastic/_shape_dtw.py index c4933f0ff1..894db087c3 100644 --- a/aeon/distances/elastic/_shape_dtw.py +++ b/aeon/distances/elastic/_shape_dtw.py @@ -5,15 +5,15 @@ from typing import Optional, Union import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path from aeon.distances.elastic._bounding_matrix import create_bounding_matrix from aeon.distances.elastic._dtw import _dtw_cost_matrix from aeon.distances.pointwise._squared import _univariate_squared_distance +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -516,6 +516,7 @@ def shape_dtw_alignment_path( return (compute_min_return_path(cost_matrix), shapedtw_dist) +@threaded def shape_dtw_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, @@ -616,8 +617,6 @@ def shape_dtw_pairwise_distance( [ 43., 0., 89.], [292., 89., 0.]]) """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion diff --git a/aeon/distances/elastic/_soft_dtw.py b/aeon/distances/elastic/_soft_dtw.py index 006498bbfd..861b9cf428 100644 --- a/aeon/distances/elastic/_soft_dtw.py +++ b/aeon/distances/elastic/_soft_dtw.py @@ -5,15 +5,15 @@ from typing import Optional, Union import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path from aeon.distances.elastic._bounding_matrix import create_bounding_matrix from aeon.distances.elastic._dtw import _dtw_cost_matrix from aeon.distances.pointwise._squared import _univariate_squared_distance +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -244,6 +244,7 @@ def _soft_dtw_cost_matrix( return cost_matrix[1:, 1:] +@threaded def soft_dtw_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, @@ -323,8 +324,6 @@ def soft_dtw_pairwise_distance( [ 41.44055555, 0. , 82.43894439], [291.99999969, 82.43894439, 0. ]]) """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion diff --git a/aeon/distances/elastic/_twe.py b/aeon/distances/elastic/_twe.py index 69003dd54c..077172e0a2 100644 --- a/aeon/distances/elastic/_twe.py +++ b/aeon/distances/elastic/_twe.py @@ -5,14 +5,14 @@ from typing import Optional, Union import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path from aeon.distances.elastic._bounding_matrix import create_bounding_matrix from aeon.distances.pointwise._euclidean import _univariate_euclidean_distance +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -244,6 +244,7 @@ def _pad_arrs(x: np.ndarray) -> np.ndarray: return padded_x +@threaded def twe_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, @@ -326,8 +327,6 @@ def twe_pairwise_distance( [13.005, 0. , 18.007], [19.006, 18.007, 0. ]]) """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion diff --git a/aeon/distances/elastic/_wddtw.py b/aeon/distances/elastic/_wddtw.py index 0987cf7555..5e064a8162 100644 --- a/aeon/distances/elastic/_wddtw.py +++ b/aeon/distances/elastic/_wddtw.py @@ -5,15 +5,15 @@ from typing import Optional, Union import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path from aeon.distances.elastic._bounding_matrix import create_bounding_matrix from aeon.distances.elastic._ddtw import average_of_slope from aeon.distances.elastic._wdtw import _wdtw_cost_matrix, _wdtw_distance +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -172,6 +172,7 @@ def wddtw_cost_matrix( raise ValueError("x and y must be 1D or 2D") +@threaded def wddtw_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, @@ -247,8 +248,6 @@ def wddtw_pairwise_distance( [0., 0., 0.], [0., 0., 0.]]) """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion diff --git a/aeon/distances/elastic/_wdtw.py b/aeon/distances/elastic/_wdtw.py index d3659bbd32..3573cedfc3 100644 --- a/aeon/distances/elastic/_wdtw.py +++ b/aeon/distances/elastic/_wdtw.py @@ -5,14 +5,14 @@ from typing import Optional, Union import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange from numba.typed import List as NumbaList from aeon.distances.elastic._alignment_paths import compute_min_return_path from aeon.distances.elastic._bounding_matrix import create_bounding_matrix from aeon.distances.pointwise._squared import _univariate_squared_distance +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -236,6 +236,7 @@ def _wdtw_cost_matrix( return cost_matrix[1:, 1:] +@threaded def wdtw_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, @@ -315,8 +316,6 @@ def wdtw_pairwise_distance( [ 20.25043711, 0. , 39.64543037], [139.70656066, 39.64543037, 0. ]]) """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion diff --git a/aeon/distances/mindist/_dft_sfa.py b/aeon/distances/mindist/_dft_sfa.py index 7d29cf8e9b..9d8bd80794 100644 --- a/aeon/distances/mindist/_dft_sfa.py +++ b/aeon/distances/mindist/_dft_sfa.py @@ -3,10 +3,10 @@ from typing import Union import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -86,6 +86,7 @@ def _univariate_dft_sfa_distance( return np.sqrt(2 * dist) +@threaded def mindist_dft_sfa_pairwise_distance( X: np.ndarray, y: np.ndarray, breakpoints: np.ndarray, n_jobs: int = 1, **kwargs ) -> np.ndarray: @@ -115,8 +116,6 @@ def mindist_dft_sfa_pairwise_distance( If X is not 2D array when only passing X. If X and y are not 1D, 2D arrays when passing both X and y. """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion diff --git a/aeon/distances/mindist/_paa_sax.py b/aeon/distances/mindist/_paa_sax.py index e8950f94f7..8d7fb42350 100644 --- a/aeon/distances/mindist/_paa_sax.py +++ b/aeon/distances/mindist/_paa_sax.py @@ -1,10 +1,10 @@ __maintainer__ = [] import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -91,6 +91,7 @@ def _univariate_paa_sax_distance( return np.sqrt(dist) +@threaded def mindist_paa_sax_pairwise_distance( X: np.ndarray, y: np.ndarray, @@ -128,8 +129,6 @@ def mindist_paa_sax_pairwise_distance( If X and y are not 1D, 2D arrays when passing both X and y. """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion diff --git a/aeon/distances/mindist/_sax.py b/aeon/distances/mindist/_sax.py index dec2582d86..8313de26bd 100644 --- a/aeon/distances/mindist/_sax.py +++ b/aeon/distances/mindist/_sax.py @@ -3,10 +3,10 @@ from typing import Union import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -85,6 +85,7 @@ def _univariate_sax_distance( return np.sqrt(dist) +@threaded def mindist_sax_pairwise_distance( X: np.ndarray, y: np.ndarray, @@ -122,8 +123,6 @@ def mindist_sax_pairwise_distance( If X and y are not 1D, 2D arrays when passing both X and y. """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion diff --git a/aeon/distances/mindist/_sfa.py b/aeon/distances/mindist/_sfa.py index 82effbfc4d..95dabd9cb6 100644 --- a/aeon/distances/mindist/_sfa.py +++ b/aeon/distances/mindist/_sfa.py @@ -3,10 +3,10 @@ from typing import Union import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -78,6 +78,7 @@ def _univariate_sfa_distance( return np.sqrt(2 * dist) +@threaded def mindist_sfa_pairwise_distance( X: np.ndarray, y: np.ndarray, breakpoints: np.ndarray, n_jobs: int = 1, **kwargs ) -> np.ndarray: @@ -108,8 +109,6 @@ def mindist_sfa_pairwise_distance( If X and y are not 1D, 2D arrays when passing both X and y. """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, unequal_length = _convert_collection_to_numba_list( X, "X", multivariate_conversion diff --git a/aeon/distances/pointwise/_euclidean.py b/aeon/distances/pointwise/_euclidean.py index 2b5f05b5fe..607032b887 100644 --- a/aeon/distances/pointwise/_euclidean.py +++ b/aeon/distances/pointwise/_euclidean.py @@ -4,15 +4,15 @@ from typing import Optional, Union import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange from numba.typed import List as NumbaList from aeon.distances.pointwise._squared import ( _univariate_squared_distance, squared_distance, ) +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -71,6 +71,7 @@ def _univariate_euclidean_distance(x: np.ndarray, y: np.ndarray) -> float: return np.sqrt(_univariate_squared_distance(x, y)) +@threaded def euclidean_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, @@ -136,8 +137,6 @@ def euclidean_pairwise_distance( [ 5.19615242, 0. , 8. ], [12.12435565, 8. , 0. ]]) """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) if n_jobs > 1: warnings.warn( "You have set n_jobs > 1. For this distance function " diff --git a/aeon/distances/pointwise/_manhattan.py b/aeon/distances/pointwise/_manhattan.py index fe58f8e772..4d3892aed4 100644 --- a/aeon/distances/pointwise/_manhattan.py +++ b/aeon/distances/pointwise/_manhattan.py @@ -4,11 +4,11 @@ from typing import Optional, Union import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange from numba.typed import List as NumbaList +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -76,6 +76,7 @@ def _univariate_manhattan_distance(x: np.ndarray, y: np.ndarray) -> float: return distance +@threaded def manhattan_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, @@ -141,8 +142,6 @@ def manhattan_pairwise_distance( [ 9., 0., 16.], [21., 16., 0.]]) """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) if n_jobs > 1: warnings.warn( "You have set n_jobs > 1. For this distance function " diff --git a/aeon/distances/pointwise/_minkowski.py b/aeon/distances/pointwise/_minkowski.py index f441727aa0..232e383eb1 100644 --- a/aeon/distances/pointwise/_minkowski.py +++ b/aeon/distances/pointwise/_minkowski.py @@ -3,11 +3,11 @@ from typing import Optional, Union import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange from numba.typed import List as NumbaList +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -127,6 +127,7 @@ def _multivariate_minkowski_distance( return dist ** (1.0 / p) +@threaded def minkowski_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, @@ -209,8 +210,6 @@ def minkowski_pairwise_distance( [ 5.19615242, 0. , 8. ], [12.12435565, 8. , 0. ]]) """ - n_jobs = check_n_jobs(n_jobs) - set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, _ = _convert_collection_to_numba_list(X, "X", multivariate_conversion) if y is None: diff --git a/aeon/distances/pointwise/_squared.py b/aeon/distances/pointwise/_squared.py index 5c3405bc50..f7391c8565 100644 --- a/aeon/distances/pointwise/_squared.py +++ b/aeon/distances/pointwise/_squared.py @@ -4,11 +4,11 @@ from typing import Optional, Union import numpy as np -from numba import njit, prange, set_num_threads +from numba import njit, prange from numba.typed import List as NumbaList +from aeon.utils._threading import threaded from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list -from aeon.utils.validation import check_n_jobs from aeon.utils.validation.collection import _is_numpy_list_multivariate @@ -75,6 +75,7 @@ def _univariate_squared_distance(x: np.ndarray, y: np.ndarray) -> float: return distance +@threaded def squared_pairwise_distance( X: Union[np.ndarray, list[np.ndarray]], y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, @@ -140,7 +141,6 @@ def squared_pairwise_distance( [ 27., 0., 64.], [147., 64., 0.]]) """ - n_jobs = check_n_jobs(n_jobs) if n_jobs > 1: warnings.warn( "You have set n_jobs > 1. For this distance function " @@ -150,7 +150,6 @@ def squared_pairwise_distance( UserWarning, stacklevel=2, ) - set_num_threads(n_jobs) multivariate_conversion = _is_numpy_list_multivariate(X, y) _X, _ = _convert_collection_to_numba_list(X, "X", multivariate_conversion) diff --git a/aeon/utils/_threading.py b/aeon/utils/_threading.py new file mode 100644 index 0000000000..9f33cd0d2a --- /dev/null +++ b/aeon/utils/_threading.py @@ -0,0 +1,52 @@ +import functools +import inspect +import os +import threading +from typing import Any, Callable + +from numba import set_num_threads + +from aeon.utils.validation import check_n_jobs + + +def threaded(func: Callable) -> Callable: + """Set thread count based on n_jobs parameter and restore it afterward. + + A decorator that sets the number of threads based on the n_jobs parameter + passed to the function, and restores the original thread count afterward. + + The decorated function is expected to have a 'n_jobs' parameter. + """ + + @functools.wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> Any: + numba_env_threads = os.environ.get("NUMBA_NUM_THREADS") + + if numba_env_threads is not None and numba_env_threads.isdigit(): + original_thread_count = int(numba_env_threads) + else: + original_thread_count = threading.active_count() + + if "n_jobs" in kwargs: + n_jobs = kwargs["n_jobs"] + else: + sig = inspect.signature(func) + param_names = list(sig.parameters.keys()) + + n_jobs_index = param_names.index("n_jobs") + if n_jobs_index < len(args): + n_jobs = args[n_jobs_index] + else: + default = sig.parameters["n_jobs"].default + n_jobs = default if default is not inspect.Parameter.empty else None + + adjusted_n_jobs = check_n_jobs(n_jobs) + set_num_threads(adjusted_n_jobs) + + try: + result = func(*args, **kwargs) + return result + finally: + set_num_threads(original_thread_count) + + return wrapper diff --git a/aeon/utils/tests/test_threading_decorator.py b/aeon/utils/tests/test_threading_decorator.py new file mode 100644 index 0000000000..a147c646a7 --- /dev/null +++ b/aeon/utils/tests/test_threading_decorator.py @@ -0,0 +1,189 @@ +"""Test threading decorator.""" + +import os +from unittest.mock import MagicMock, patch + +import pytest + +from aeon.utils._threading import threaded + + +def check_n_jobs(n_jobs): + """Mock implementation of check_n_jobs.""" + return n_jobs if n_jobs is not None else 1 + + +def set_num_threads(n_threads): + """Mock implementation of set_num_threads.""" + pass + + +@pytest.fixture +def clean_env(): + """Save and restore environment variables between tests.""" + original_env = os.environ.copy() + yield + os.environ.clear() + os.environ.update(original_env) + + +def test_basic_functionality(): + """Test that the decorator correctly sets and restores thread count.""" + check_jobs_mock = MagicMock(side_effect=lambda x: x if x is not None else 1) + set_threads_mock = MagicMock() + + with patch("aeon.utils._threading.check_n_jobs", check_jobs_mock): + with patch("aeon.utils._threading.set_num_threads", set_threads_mock): + + @threaded + def sample_func(n_jobs=None): + return "executed" + + result = sample_func(n_jobs=4) + + assert result == "executed" + check_jobs_mock.assert_called_once_with(4) + assert set_threads_mock.call_count == 2 + + +def test_numba_env_variable(clean_env): + """Test that the decorator respects NUMBA_NUM_THREADS environment variable.""" + os.environ["NUMBA_NUM_THREADS"] = "8" + + check_jobs_mock = MagicMock(side_effect=lambda x: x if x is not None else 1) + set_threads_mock = MagicMock() + + with patch("aeon.utils._threading.check_n_jobs", check_jobs_mock): + with patch("aeon.utils._threading.set_num_threads", set_threads_mock): + + @threaded + def sample_func(n_jobs=None): + return "executed" + + sample_func(n_jobs=4) + + assert set_threads_mock.call_args_list[0][0][0] == 4 + assert set_threads_mock.call_args_list[1][0][0] == 8 + + +def test_fallback_to_threading_count(clean_env): + """ + Test the fallback mechanism to the system's active thread count. + + When the NUMBA_NUM_THREADS environment variable is not set or is invalid, + the decorator should use the system's active thread count as the baseline. + This ensures proper thread management even when no explicit configuration is + provided. + """ + check_jobs_mock = MagicMock(side_effect=lambda x: x if x is not None else 1) + set_threads_mock = MagicMock() + thread_count_mock = MagicMock(return_value=3) + + with patch("aeon.utils._threading.check_n_jobs", check_jobs_mock): + with patch("aeon.utils._threading.set_num_threads", set_threads_mock): + with patch("threading.active_count", thread_count_mock): + + @threaded + def sample_func(n_jobs=None): + return "executed" + + sample_func(n_jobs=4) + + assert set_threads_mock.call_args_list[1][0][0] == 3 + + +def test_positional_argument(): + """ + Test the extraction of n_jobs when passed as a positional argument. + + The threaded decorator needs to correctly identify the n_jobs parameter + regardless of how it's passed to the function. This test verifies that + when n_jobs is passed as a positional argument, the decorator correctly + extracts its value and uses it to configure the thread count. + """ + check_jobs_mock = MagicMock(side_effect=lambda x: x if x is not None else 1) + set_threads_mock = MagicMock() + + with patch("aeon.utils._threading.check_n_jobs", check_jobs_mock): + with patch("aeon.utils._threading.set_num_threads", set_threads_mock): + + @threaded + def sample_func(data, n_jobs=None): + return data + + sample_func("test_data", 4) + + check_jobs_mock.assert_called_once_with(4) + + +def test_keyword_argument(): + """ + Test the extraction of n_jobs when passed as a keyword argument. + + Functions decorated with the threaded decorator can receive the n_jobs + parameter as a keyword argument. This test ensures that the decorator + correctly identifies and extracts the n_jobs value when passed this way, + demonstrating the decorator's flexibility in handling different calling styles. + """ + check_jobs_mock = MagicMock(side_effect=lambda x: x if x is not None else 1) + set_threads_mock = MagicMock() + + with patch("aeon.utils._threading.check_n_jobs", check_jobs_mock): + with patch("aeon.utils._threading.set_num_threads", set_threads_mock): + + @threaded + def sample_func(data, n_jobs=None): + return data + + sample_func(data="test_data", n_jobs=4) + + check_jobs_mock.assert_called_once_with(4) + + +def test_default_value(): + """ + Test the use of default n_jobs value when not explicitly provided. + + When a function has a default value for the n_jobs parameter and is called + without specifying this parameter, the threaded decorator should use the + function's default value. This test verifies this behavior, ensuring that + default function parameters are properly respected by the decorator. + """ + check_jobs_mock = MagicMock(side_effect=lambda x: x if x is not None else 1) + set_threads_mock = MagicMock() + + with patch("aeon.utils._threading.check_n_jobs", check_jobs_mock): + with patch("aeon.utils._threading.set_num_threads", set_threads_mock): + + @threaded + def sample_func(data, n_jobs=2): + return data + + sample_func("test_data") + + check_jobs_mock.assert_called_once_with(2) + + +def test_exception_handling(): + """ + Test resource cleanup when exceptions occur in the decorated function. + + A robust decorator must ensure resources are properly managed even when + the decorated function raises an exception. This test verifies that the + threaded decorator correctly restores the original thread count even when + the function execution fails with an exception, preventing resource leaks. + """ + check_jobs_mock = MagicMock(side_effect=lambda x: x if x is not None else 1) + set_threads_mock = MagicMock() + + with patch("aeon.utils._threading.check_n_jobs", check_jobs_mock): + with patch("aeon.utils._threading.set_num_threads", set_threads_mock): + + @threaded + def sample_func(n_jobs=None): + raise ValueError("Test exception") + + with pytest.raises(ValueError, match="Test exception"): + sample_func(n_jobs=4) + + assert set_threads_mock.call_count == 2 From 3b6233ed8736df4dadb23da141c4174078455436 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Tue, 4 Mar 2025 15:39:31 +0100 Subject: [PATCH 09/19] merge changes and fixed call --- aeon/classification/distance_based/_time_series_neighbors.py | 2 +- aeon/regression/distance_based/_time_series_neighbors.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/aeon/classification/distance_based/_time_series_neighbors.py b/aeon/classification/distance_based/_time_series_neighbors.py index 4bde22da44..b7f68e482c 100644 --- a/aeon/classification/distance_based/_time_series_neighbors.py +++ b/aeon/classification/distance_based/_time_series_neighbors.py @@ -137,7 +137,7 @@ def _predict_proba(self, X): """ preds = np.zeros((len(X), len(self.classes_))) for i in range(len(X)): - idx, weights = self._kneighbors(X[i]) + idx, weights = self.kneighbors(X[i]) for id, w in zip(idx, weights): predicted_class = self.y_[id] preds[i, predicted_class] += w diff --git a/aeon/regression/distance_based/_time_series_neighbors.py b/aeon/regression/distance_based/_time_series_neighbors.py index 9981e2dc12..1a2eff0163 100644 --- a/aeon/regression/distance_based/_time_series_neighbors.py +++ b/aeon/regression/distance_based/_time_series_neighbors.py @@ -134,7 +134,7 @@ def _predict(self, X): """ preds = np.empty(len(X)) for i in range(len(X)): - idx, weights = self._kneighbors(X[i]) + idx, weights = self.kneighbors(X[i]) preds[i] = np.average(self.y_[idx], weights=weights) return preds From b603ddb98b1dcf5bc1ef8d623f348fd450c5ca92 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Tue, 4 Mar 2025 17:58:30 +0100 Subject: [PATCH 10/19] fix --- aeon/utils/_threading.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/aeon/utils/_threading.py b/aeon/utils/_threading.py index 9f33cd0d2a..0c1a798020 100644 --- a/aeon/utils/_threading.py +++ b/aeon/utils/_threading.py @@ -25,7 +25,11 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: if numba_env_threads is not None and numba_env_threads.isdigit(): original_thread_count = int(numba_env_threads) else: - original_thread_count = threading.active_count() + active_count = threading.active_count() + if isinstance(active_count, int): + original_thread_count = threading.active_count() + else: + original_thread_count = 1 if "n_jobs" in kwargs: n_jobs = kwargs["n_jobs"] From 24b46b0116869bf49b66df2374f4c217f4c46436 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Tue, 4 Mar 2025 19:25:38 +0100 Subject: [PATCH 11/19] fix --- aeon/utils/tests/test_threading_decorator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/utils/tests/test_threading_decorator.py b/aeon/utils/tests/test_threading_decorator.py index a147c646a7..0224985707 100644 --- a/aeon/utils/tests/test_threading_decorator.py +++ b/aeon/utils/tests/test_threading_decorator.py @@ -1,4 +1,4 @@ -"""Test threading decorator.""" +"""Test threading util decorator.""" import os from unittest.mock import MagicMock, patch From 9f2e0106c5ddb02c56e54e3446fe8257011dd7cf Mon Sep 17 00:00:00 2001 From: chrisholder Date: Fri, 7 Mar 2025 15:54:14 +0100 Subject: [PATCH 12/19] fixed --- .../distance_based/_time_series_neighbors.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/aeon/classification/distance_based/_time_series_neighbors.py b/aeon/classification/distance_based/_time_series_neighbors.py index b7f68e482c..cf3f0faab2 100644 --- a/aeon/classification/distance_based/_time_series_neighbors.py +++ b/aeon/classification/distance_based/_time_series_neighbors.py @@ -17,7 +17,7 @@ from aeon.classification.base import BaseClassifier from aeon.distances import pairwise_distance -from aeon.utils.validation import check_n_jobs +from aeon.utils._threading import threaded WEIGHTS_SUPPORTED = ["uniform", "distance"] @@ -48,11 +48,10 @@ class KNeighborsTimeSeriesClassifier(BaseClassifier): n_timepoints)`` as input and returns a float. distance_params : dict, default = None Dictionary for metric parameters for the case that distance is a str. - n_jobs : int, default = None - The number of parallel jobs to run for neighbors search. - ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. - ``-1`` means using all processors. - for more details. Parameter for compatibility purposes, still unimplemented. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Examples -------- @@ -164,10 +163,11 @@ def _predict(self, X): """ self._check_is_fitted() - indexes = self.kneighbors(X, return_distance=False)[:, 0] + indexes = self.kneighbors(X, return_distance=False, n_jobs=self.n_jobs)[:, 0] return self.classes_[self.y_[indexes]] - def kneighbors(self, X=None, n_neighbors=None, return_distance=True): + @threaded + def kneighbors(self, X=None, n_neighbors=None, return_distance=True, n_jobs=1): """Find the K-neighbors of a point. Returns indices of and distances to the neighbors of each point. @@ -184,6 +184,10 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True): passed to the constructor. return_distance : bool, default=True Whether or not to return the distances. + n_jobs : int, default=1 + The number of jobs to run in parallel. If -1, then the number of jobs is set + to the number of CPU cores. If 1, then the function is executed in a single + thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -194,8 +198,6 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True): Indices of the nearest points in the population matrix. """ self._check_is_fitted() - n_jobs = check_n_jobs(self.n_jobs) - if n_neighbors is None: n_neighbors = self.n_neighbors elif n_neighbors <= 0: From 541e7fd6bf5a3606af9708ab034f4ad4c868b3bc Mon Sep 17 00:00:00 2001 From: chrisholder Date: Fri, 7 Mar 2025 16:08:37 +0100 Subject: [PATCH 13/19] expanded threaded decorator to work with classes --- aeon/utils/_threading.py | 18 ++-- aeon/utils/tests/test_threading_decorator.py | 93 ++++++++++++++++++++ 2 files changed, 105 insertions(+), 6 deletions(-) diff --git a/aeon/utils/_threading.py b/aeon/utils/_threading.py index 0c1a798020..2a6efac7af 100644 --- a/aeon/utils/_threading.py +++ b/aeon/utils/_threading.py @@ -31,18 +31,24 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: else: original_thread_count = 1 + n_jobs = None if "n_jobs" in kwargs: n_jobs = kwargs["n_jobs"] else: sig = inspect.signature(func) param_names = list(sig.parameters.keys()) - n_jobs_index = param_names.index("n_jobs") - if n_jobs_index < len(args): - n_jobs = args[n_jobs_index] - else: - default = sig.parameters["n_jobs"].default - n_jobs = default if default is not inspect.Parameter.empty else None + if "n_jobs" in param_names: + n_jobs_index = param_names.index("n_jobs") + if n_jobs_index < len(args): + n_jobs = args[n_jobs_index] + else: + default = sig.parameters["n_jobs"].default + n_jobs = default if default is not inspect.Parameter.empty else None + + if n_jobs is None and args and hasattr(args[0], "n_jobs"): + # This gets n_jobs if it belongs to a object (i.e. self.n_jobs) + n_jobs = args[0]["n_jobs"] adjusted_n_jobs = check_n_jobs(n_jobs) set_num_threads(adjusted_n_jobs) diff --git a/aeon/utils/tests/test_threading_decorator.py b/aeon/utils/tests/test_threading_decorator.py index 0224985707..76c0d80503 100644 --- a/aeon/utils/tests/test_threading_decorator.py +++ b/aeon/utils/tests/test_threading_decorator.py @@ -187,3 +187,96 @@ def sample_func(n_jobs=None): sample_func(n_jobs=4) assert set_threads_mock.call_count == 2 + + +def test_class_attribute(): + """ + Test the extraction of n_jobs from a class attribute. + + The threaded decorator should be able to extract the n_jobs value from + the first argument (typically 'self' in class methods) when it has an + n_jobs attribute. This test verifies that the decorator correctly identifies + and uses this attribute when the n_jobs parameter is not explicitly passed. + """ + check_jobs_mock = MagicMock(side_effect=lambda x: x if x is not None else 1) + set_threads_mock = MagicMock() + + with patch("aeon.utils._threading.check_n_jobs", check_jobs_mock): + with patch("aeon.utils._threading.set_num_threads", set_threads_mock): + + class TestClass: + def __init__(self, n_jobs): + self.n_jobs = n_jobs + + @threaded + def process_data(self, data): + return data + + test_instance = TestClass(n_jobs=5) + + test_instance.process_data("test_data") + + check_jobs_mock.assert_called_once_with(5) + assert set_threads_mock.call_count == 2 + + +def test_parameter_precedence_over_attribute(): + """ + Test that n_jobs parameter takes precedence over class attribute. + + When both a class attribute and a method parameter for n_jobs exist, + the parameter value should take precedence. This test verifies this + precedence rule, ensuring that explicit parameter values override + attribute values. + """ + check_jobs_mock = MagicMock(side_effect=lambda x: x if x is not None else 1) + set_threads_mock = MagicMock() + + with patch("aeon.utils._threading.check_n_jobs", check_jobs_mock): + with patch("aeon.utils._threading.set_num_threads", set_threads_mock): + + class TestClass: + def __init__(self, n_jobs): + self.n_jobs = n_jobs + + @threaded + def process_data(self, data, n_jobs=None): + return data + + test_instance = TestClass(n_jobs=5) + + test_instance.process_data("test_data", n_jobs=7) + + check_jobs_mock.assert_called_once_with(7) + assert set_threads_mock.call_count == 2 + + +def test_fallback_when_no_attribute(): + """ + Test fallback behavior when neither parameter nor attribute is available. + + When a class doesn't have an n_jobs attribute and the method doesn't + have an n_jobs parameter, the decorator should fall back to using None, + which will be converted to 1 by check_n_jobs. This test verifies this + fallback behavior. + """ + check_jobs_mock = MagicMock(side_effect=lambda x: x if x is not None else 1) + set_threads_mock = MagicMock() + + with patch("aeon.utils._threading.check_n_jobs", check_jobs_mock): + with patch("aeon.utils._threading.set_num_threads", set_threads_mock): + + class TestClass: + # No n_jobs attribute + pass + + @threaded + def process_data(self, data): + return data + + test_instance = TestClass() + + test_instance.process_data("test_data") + + check_jobs_mock.assert_called_once_with(None) + assert set_threads_mock.call_count == 2 From 32e93eb0a65d4d591c7e70f7ee6a9411bcf8ca5e Mon Sep 17 00:00:00 2001 From: chrisholder Date: Fri, 7 Mar 2025 16:12:33 +0100 Subject: [PATCH 14/19] fixed --- .../distance_based/_time_series_neighbors.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/aeon/classification/distance_based/_time_series_neighbors.py b/aeon/classification/distance_based/_time_series_neighbors.py index cf3f0faab2..6792ea0423 100644 --- a/aeon/classification/distance_based/_time_series_neighbors.py +++ b/aeon/classification/distance_based/_time_series_neighbors.py @@ -163,11 +163,11 @@ def _predict(self, X): """ self._check_is_fitted() - indexes = self.kneighbors(X, return_distance=False, n_jobs=self.n_jobs)[:, 0] + indexes = self.kneighbors(X, return_distance=False)[:, 0] return self.classes_[self.y_[indexes]] @threaded - def kneighbors(self, X=None, n_neighbors=None, return_distance=True, n_jobs=1): + def kneighbors(self, X=None, n_neighbors=None, return_distance=True): """Find the K-neighbors of a point. Returns indices of and distances to the neighbors of each point. @@ -184,10 +184,6 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True, n_jobs=1): passed to the constructor. return_distance : bool, default=True Whether or not to return the distances. - n_jobs : int, default=1 - The number of jobs to run in parallel. If -1, then the number of jobs is set - to the number of CPU cores. If 1, then the function is executed in a single - thread. If greater than 1, then the function is executed in parallel. Returns ------- @@ -220,7 +216,7 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True, n_jobs=1): X, self.X_ if not query_is_train else None, method=self.distance, - n_jobs=n_jobs, + n_jobs=self.n_jobs, **self._distance_params, ) From add714e3c9f05be35e4a73cf2c35607698310ff0 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Fri, 7 Mar 2025 16:13:15 +0100 Subject: [PATCH 15/19] fix --- aeon/utils/_threading.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/utils/_threading.py b/aeon/utils/_threading.py index 2a6efac7af..adf32df0a6 100644 --- a/aeon/utils/_threading.py +++ b/aeon/utils/_threading.py @@ -48,7 +48,7 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: if n_jobs is None and args and hasattr(args[0], "n_jobs"): # This gets n_jobs if it belongs to a object (i.e. self.n_jobs) - n_jobs = args[0]["n_jobs"] + n_jobs = args[0].n_jobs adjusted_n_jobs = check_n_jobs(n_jobs) set_num_threads(adjusted_n_jobs) From 7b026838412d90f8157969f176fcb2580bbcec02 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Fri, 7 Mar 2025 16:14:48 +0100 Subject: [PATCH 16/19] merge --- .../distance_based/tests/test_time_series_neighbors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/classification/distance_based/tests/test_time_series_neighbors.py b/aeon/classification/distance_based/tests/test_time_series_neighbors.py index 917b2628d4..0209d7de1c 100644 --- a/aeon/classification/distance_based/tests/test_time_series_neighbors.py +++ b/aeon/classification/distance_based/tests/test_time_series_neighbors.py @@ -45,7 +45,7 @@ def test_knn_on_unit_test(distance_key): # load arrowhead data for unit tests X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") - knn = KNeighborsTimeSeriesClassifier(distance=distance_key) + knn = KNeighborsTimeSeriesClassifier(distance=distance_key, n_jobs=7) knn.fit(X_train, y_train) pred = knn.predict(X_test) correct = 0 From a94cc8def731bcf47881c68e20bcb8585596e9f5 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Fri, 7 Mar 2025 16:58:09 +0100 Subject: [PATCH 17/19] added test for kneighbors --- .../tests/test_time_series_neighbors.py | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/aeon/classification/distance_based/tests/test_time_series_neighbors.py b/aeon/classification/distance_based/tests/test_time_series_neighbors.py index 0209d7de1c..bc5aea2eed 100644 --- a/aeon/classification/distance_based/tests/test_time_series_neighbors.py +++ b/aeon/classification/distance_based/tests/test_time_series_neighbors.py @@ -1,5 +1,6 @@ """Tests for KNeighborsTimeSeriesClassifier.""" +import numpy as np import pytest from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier @@ -45,7 +46,7 @@ def test_knn_on_unit_test(distance_key): # load arrowhead data for unit tests X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") - knn = KNeighborsTimeSeriesClassifier(distance=distance_key, n_jobs=7) + knn = KNeighborsTimeSeriesClassifier(distance=distance_key) knn.fit(X_train, y_train) pred = knn.predict(X_test) correct = 0 @@ -75,3 +76,26 @@ def test_knn_bounding_matrix(distance_key): if pred[j] == y_test[j]: correct = correct + 1 assert correct == expected_correct_window[distance_key] + + +@pytest.mark.parametrize("distance_key", distance_functions) +def test_knn_kneighbors(distance_key): + """Test knn kneighbors.""" + X_train, y_train = load_unit_test(split="train") + X_test, y_test = load_unit_test(split="test") + + knn = KNeighborsTimeSeriesClassifier(distance=distance_key) + knn.fit(X_train, y_train) + dists, ind = knn.kneighbors(X_test, n_neighbors=3) + assert isinstance(dists, np.ndarray) + assert isinstance(ind, np.ndarray) + assert dists.shape == (X_test.shape[0], 3) + assert ind.shape == (X_test.shape[0], 3) + indexes = ind[:, 0] + classes, y = np.unique(y_train, return_inverse=True) + pred = classes[y[indexes]] + correct = 0 + for j in range(0, len(pred)): + if pred[j] == y_test[j]: + correct = correct + 1 + assert correct == expected_correct_window[distance_key] From 447098955f44e9bbb9b4e0fc0447324b77d5266b Mon Sep 17 00:00:00 2001 From: chrisholder Date: Fri, 7 Mar 2025 16:58:13 +0100 Subject: [PATCH 18/19] added test for kneighbors --- .../distance_based/tests/test_time_series_neighbors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aeon/classification/distance_based/tests/test_time_series_neighbors.py b/aeon/classification/distance_based/tests/test_time_series_neighbors.py index bc5aea2eed..e68cad0019 100644 --- a/aeon/classification/distance_based/tests/test_time_series_neighbors.py +++ b/aeon/classification/distance_based/tests/test_time_series_neighbors.py @@ -43,7 +43,6 @@ @pytest.mark.parametrize("distance_key", distance_functions) def test_knn_on_unit_test(distance_key): """Test function for elastic knn, to be reinstated soon.""" - # load arrowhead data for unit tests X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") knn = KNeighborsTimeSeriesClassifier(distance=distance_key) From d9d594ce1ee511a59220e95dbc20a5daf888f523 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 2 Apr 2025 17:14:03 +0100 Subject: [PATCH 19/19] fix --- aeon/classification/distance_based/_time_series_neighbors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/classification/distance_based/_time_series_neighbors.py b/aeon/classification/distance_based/_time_series_neighbors.py index 6792ea0423..36afc755ae 100644 --- a/aeon/classification/distance_based/_time_series_neighbors.py +++ b/aeon/classification/distance_based/_time_series_neighbors.py @@ -136,7 +136,7 @@ def _predict_proba(self, X): """ preds = np.zeros((len(X), len(self.classes_))) for i in range(len(X)): - idx, weights = self.kneighbors(X[i]) + weights, idx = self.kneighbors(X[i]) for id, w in zip(idx, weights): predicted_class = self.y_[id] preds[i, predicted_class] += w