diff --git a/aeon/distances/__init__.py b/aeon/distances/__init__.py index d6ff3f776a..e218cc3fb7 100644 --- a/aeon/distances/__init__.py +++ b/aeon/distances/__init__.py @@ -82,6 +82,10 @@ "soft_dtw_pairwise_distance", "soft_dtw_alignment_path", "soft_dtw_cost_matrix", + "kdtw_distance", + "kdtw_alignment_path", + "kdtw_cost_matrix", + "kdtw_pairwise_distance", ] from aeon.distances._distance import ( @@ -157,6 +161,12 @@ wdtw_distance, wdtw_pairwise_distance, ) +from aeon.distances.kernel import ( + kdtw_alignment_path, + kdtw_cost_matrix, + kdtw_distance, + kdtw_pairwise_distance, +) from aeon.distances.mindist._dft_sfa import mindist_dft_sfa_distance from aeon.distances.mindist._paa_sax import mindist_paa_sax_distance from aeon.distances.mindist._sax import mindist_sax_distance diff --git a/aeon/distances/_distance.py b/aeon/distances/_distance.py index 33f9141440..cbde1a6f05 100644 --- a/aeon/distances/_distance.py +++ b/aeon/distances/_distance.py @@ -66,6 +66,12 @@ wdtw_distance, wdtw_pairwise_distance, ) +from aeon.distances.kernel import ( + kdtw_alignment_path, + kdtw_cost_matrix, + kdtw_distance, + kdtw_pairwise_distance, +) from aeon.distances.mindist import ( mindist_dft_sfa_distance, mindist_dft_sfa_pairwise_distance, @@ -109,6 +115,9 @@ class DistanceKwargs(TypedDict, total=False): m: int max_shift: Optional[int] gamma: float + sigma: float + normalize_input: bool + normalize_dist: bool DistanceFunction = Callable[[np.ndarray, np.ndarray, Any], float] @@ -469,6 +478,7 @@ def get_distance_function(method: Union[str, DistanceFunction]) -> DistanceFunct 'sbd' distances.sbd_distance 'shift_scale' distances.shift_scale_invariant_distance 'soft_dtw' distances.soft_dtw_distance + 'kdtw' distances.kdtw_distance =============== ======================================== Parameters @@ -528,6 +538,7 @@ def get_pairwise_distance_function( 'sbd' distances.sbd_pairwise_distance 'shift_scale' distances.shift_scale_invariant_pairwise_distance 'soft_dtw' distances.soft_dtw_pairwise_distance + 'kdtw' distances.kdtw_pairwise_distance =============== ======================================== Parameters @@ -582,6 +593,7 @@ def get_alignment_path_function(method: str) -> AlignmentPathFunction: 'twe' distances.twe_alignment_path 'lcss' distances.lcss_alignment_path 'soft_dtw' distances.soft_dtw_alignment_path + 'kdtw' distances.kdtw_alignment_path =============== ======================================== Parameters @@ -631,6 +643,7 @@ def get_cost_matrix_function(method: str) -> CostMatrixFunction: 'twe' distances.twe_cost_matrix 'lcss' distances.lcss_cost_matrix 'soft_dtw' distances.soft_dtw_cost_matrix + 'kdtw' distances.kdtw_cost_matrix =============== ======================================== Parameters @@ -685,6 +698,7 @@ class DistanceType(Enum): POINTWISE = "pointwise" ELASTIC = "elastic" + KERNEL = "kernel" CROSS_CORRELATION = "cross-correlation" MIN_DISTANCE = "min-dist" MATRIX_PROFILE = "matrix-profile" @@ -909,6 +923,16 @@ class DistanceType(Enum): "symmetric": True, "unequal_support": True, }, + { + "name": "kdtw", + "distance": kdtw_distance, + "pairwise_distance": kdtw_pairwise_distance, + "cost_matrix": kdtw_cost_matrix, + "alignment_path": kdtw_alignment_path, + "type": DistanceType.KERNEL, + "symmetric": True, + "unequal_support": True, + }, ] DISTANCES_DICT = {d["name"]: d for d in DISTANCES} @@ -922,6 +946,7 @@ class DistanceType(Enum): ] ELASTIC_DISTANCES = [d["name"] for d in DISTANCES if d["type"] == DistanceType.ELASTIC] +KERNEL_DISTANCES = [d["name"] for d in DISTANCES if d["type"] == DistanceType.KERNEL] POINTWISE_DISTANCES = [ d["name"] for d in DISTANCES if d["type"] == DistanceType.POINTWISE ] diff --git a/aeon/distances/elastic/_alignment_paths.py b/aeon/distances/elastic/_alignment_paths.py index f70a374cb2..7c4b6822a3 100644 --- a/aeon/distances/elastic/_alignment_paths.py +++ b/aeon/distances/elastic/_alignment_paths.py @@ -7,13 +7,17 @@ @njit(cache=True, fastmath=True) -def compute_min_return_path(cost_matrix: np.ndarray) -> list[tuple]: +def compute_min_return_path( + cost_matrix: np.ndarray, larger_is_better: bool = False +) -> list[tuple]: """Compute the minimum return path through a cost matrix. Parameters ---------- cost_matrix : np.ndarray, of shape (n_timepoints_x, n_timepoints_y) Cost matrix. + larger_is_better : bool, default=False + If True, the path will be computed for the maximum cost instead of the minimum. Returns ------- @@ -32,15 +36,17 @@ def compute_min_return_path(cost_matrix: np.ndarray) -> list[tuple]: elif j == 0: i -= 1 else: - min_index = np.argmin( - np.array( - [ - cost_matrix[i - 1, j - 1], - cost_matrix[i - 1, j], - cost_matrix[i, j - 1], - ] - ) + costs = np.array( + [ + cost_matrix[i - 1, j - 1], + cost_matrix[i - 1, j], + cost_matrix[i, j - 1], + ] ) + if larger_is_better: + min_index = np.argmax(costs) + else: + min_index = np.argmin(costs) if min_index == 0: i, j = i - 1, j - 1 diff --git a/aeon/distances/elastic/tests/test_cost_matrix.py b/aeon/distances/elastic/tests/test_cost_matrix.py index 36c75d0c02..3c91faac43 100644 --- a/aeon/distances/elastic/tests/test_cost_matrix.py +++ b/aeon/distances/elastic/tests/test_cost_matrix.py @@ -66,6 +66,26 @@ def _validate_cost_matrix_result( assert_almost_equal(curr_distance, distance_result) elif name == "soft_dtw": assert_almost_equal(abs(cost_matrix_result[-1, -1]), distance_result) + elif name == "kdtw": + # distance is normalized by default, so we need to do this here as well: + from aeon.distances.kernel._kdtw import ( + _kdtw_cost_to_distance, + _normalize_time_series, + ) + + _x = x + _y = y + if x.ndim == 1: + _x = x.reshape((1, x.shape[0])) + if y.ndim == 1: + _y = y.reshape((1, y.shape[0])) + + _x = _normalize_time_series(_x) + _y = _normalize_time_series(_y) + d = _kdtw_cost_to_distance( + cost_matrix_result, _x, _y, gamma=0.125, epsilon=1e-20, normalize_dist=True + ) + assert_almost_equal(d, distance_result) else: assert_almost_equal(cost_matrix_result[-1, -1], distance_result) diff --git a/aeon/distances/kernel/__init__.py b/aeon/distances/kernel/__init__.py new file mode 100644 index 0000000000..717ba943c0 --- /dev/null +++ b/aeon/distances/kernel/__init__.py @@ -0,0 +1,15 @@ +"""Kernel distances.""" + +__all__ = [ + "kdtw_distance", + "kdtw_alignment_path", + "kdtw_cost_matrix", + "kdtw_pairwise_distance", +] + +from aeon.distances.kernel._kdtw import ( + kdtw_alignment_path, + kdtw_cost_matrix, + kdtw_distance, + kdtw_pairwise_distance, +) diff --git a/aeon/distances/kernel/_kdtw.py b/aeon/distances/kernel/_kdtw.py new file mode 100644 index 0000000000..ef4268af30 --- /dev/null +++ b/aeon/distances/kernel/_kdtw.py @@ -0,0 +1,518 @@ +"""Dynamic time warping kernel (KDTW) distance between two time series.""" + +__maintainer__ = ["SebastianSchmidl"] +__all__ = [ + "kdtw_distance", + "kdtw_cost_matrix", + "kdtw_pairwise_distance", + "kdtw_alignment_path", +] + +from typing import Optional, Union + +import numpy as np +from numba import njit +from numba.typed import List as NumbaList + +from aeon.distances.elastic._alignment_paths import compute_min_return_path +from aeon.distances.pointwise import squared_distance +from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list +from aeon.utils.validation.collection import _is_numpy_list_multivariate + +_eps = np.finfo(np.float64).eps + + +@njit(cache=True, fastmath=True) +def _normalize_time_series(x: np.ndarray) -> np.ndarray: + """Normalize the time series to zero mean and unit variance. + + Parameters + ---------- + x : np.ndarray + Time series of shape ``(n_channels, n_timepoints)``. + + Returns + ------- + np.ndarray + Normalized time series of shape ``(n_channels, n_timepoints)``. + """ + _x = np.empty_like(x) + + # Numba mean and std do not support axis parameters + for i in range(x.shape[0]): + _x[i] = (x[i] - np.mean(x[i])) / (np.std(x[i]) + _eps) + return _x + + +@njit(cache=True, fastmath=True) +def kdtw_distance( + x: np.ndarray, + y: np.ndarray, + gamma: float = 0.125, + epsilon: float = 1e-20, + normalize_input: bool = True, + normalize_dist: bool = True, +) -> float: + r"""Compute the DTW kernel (KDTW) between two time series as a distance. + + KDTW is a similarity measure constructed from DTW and was introduced in [1]_. It has + the property that it is invariant to shifts in the time series. The kernel is + positive definite. This implementation provides a normalized distance [2]_ and + takes the default values from [2]_. Details can be found online: + https://people.irisa.fr/Pierre-Francois.Marteau/REDK/KDTW/KDTW.html + + Intuition of constructing a DTW kernel from DTW: + Instead of keeping only one of the best alignment paths, the new kernel will try to + sum up the costs of all the existing sub-sequence alignment paths with some + weighting factor that will favor good alignments while penalizing bad alignments. + + The current implementation performs no bounding on the dynamic programming matrix + (cost matrix) (uses no corridors). + + Parameters + ---------- + x : np.ndarray + First time series, either univariate, shape ``(n_timepoints,)``, or + multivariate, shape ``(n_channels, n_timepoints)``. + y : np.ndarray + Second time series, either univariate, shape ``(n_timepoints,)``, or + multivariate, shape ``(n_channels, n_timepoints)``. + gamma : float, default=0.125 + bandwidth parameter which weights the local contributions, i.e. the distances + between locally aligned positions. Must fulfill 0 < gamma < 1! + epsilon : float, default=1e-20 + Small value to avoid zero. The default is 1e-20. + normalize_input : bool, default=True + Whether to normalize the time series' channels to zero mean and unit variance + before computing the distance. Highly recommended! + normalize_dist : bool, default=True + Whether to normalize the distance by the product of the self distances of x and + y to avoid scaling effects and put the distance value between 0 and 1. + + Returns + ------- + float + KDTW distance between x and y. + + Raises + ------ + ValueError + If x and y are not 1D or 2D arrays. + + References + ---------- + .. [1] Pierre-François Marteau and Sylvie Gibet: On recursive edit distance kernels + with application to time series classification. IEEE Transactions on Neural + Networks and Learning Systems 26(6), 2014, pages 1121 - 1133. + + .. [2] Paparrizos, John, Chunwei Liu, Aaron J. Elmore, and Michael J. Franklin: + Debunking Four Long-Standing Misconceptions of Time-Series Distance Measures. In + Proceedings of the International Conference on Management of Data (SIGMOD), + 1887-1905, 2020. https://doi.org/10.1145/3318464.3389760. + + Examples + -------- + >>> import numpy as np + >>> from aeon.distances import kdtw_distance + >>> x = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) + >>> y = np.array([[11, 12, 13, 14, 15, 16, 17, 18, 19, 20]]) + >>> kdtw_distance(x, y, normalize_dist=False) + 0.9860651016277014 + >>> kdtw_distance(x, y) + 0.0 + """ + if x.ndim == 1 and y.ndim == 1: + _x = x.reshape((1, x.shape[0])) + _y = y.reshape((1, y.shape[0])) + return _kdtw_distance(_x, _y, gamma, epsilon, normalize_input, normalize_dist) + if x.ndim == 2 and y.ndim == 2: + return _kdtw_distance(x, y, gamma, epsilon, normalize_input, normalize_dist) + raise ValueError("x and y must be 1D or 2D") + + +@njit(cache=True, fastmath=True) +def kdtw_cost_matrix( + x: np.ndarray, + y: np.ndarray, + gamma: float = 0.125, + epsilon: float = 1e-20, + normalize_input: bool = True, +) -> np.ndarray: + """Compute the cost matrix for KDTW between two time series. + + Parameters + ---------- + x : np.ndarray + First time series, either univariate, shape ``(n_timepoints,)``, or + multivariate, shape ``(n_channels, n_timepoints)``. + y : np.ndarray + Second time series, either univariate, shape ``(n_timepoints,)``, or + multivariate, shape ``(n_channels, n_timepoints)``. + gamma : float, default=0.125 + bandwidth parameter which weights the local contributions, i.e. the distances + between locally aligned positions. + epsilon : float, default=1e-20 + Small value to avoid zero. The default is 1e-20. + normalize_input : bool, default=True + Whether to normalize the time series' channels to zero mean and unit variance + before computing the distance. + + Returns + ------- + np.ndarray (n_timepoints_x, n_timepoints_y) + KDTW cost matrix between x and y. + + Raises + ------ + ValueError + If x and y are not 1D or 2D arrays. + + Examples + -------- + >>> import numpy as np + >>> from aeon.distances import kdtw_cost_matrix + >>> x = np.array([[1, 2, 3, 4, 5]]) + >>> y = np.array([[1, 2, 3, 4, 5]]) + >>> kdtw_cost_matrix(x, y) + array([[1.11111111, 0.22232162, 0.08641977, 0.03292181, 0.01234568], + [0.22232162, 0.51858479, 0.20170132, 0.07820771, 0.02332192], + [0.08641977, 0.20170132, 0.30732914, 0.14910683, 0.03689383], + [0.03292181, 0.07820771, 0.14910683, 0.2018476 , 0.05442779], + [0.01234568, 0.02332192, 0.03689383, 0.05442779, 0.10356772]]) + """ + _x = x + _y = y + if x.ndim == 1: + _x = x.reshape((1, x.shape[0])) + if y.ndim == 1: + _y = y.reshape((1, y.shape[0])) + if _x.ndim != 2 or _y.ndim != 2: + raise ValueError("x and y must be 1D or 2D") + + if normalize_input: + _x = _normalize_time_series(_x) + _y = _normalize_time_series(_y) + + return _kdtw_cost_matrix(_x, _y, gamma, epsilon) + + +@njit(cache=True, fastmath=True) +def _kdtw_distance( + x: np.ndarray, + y: np.ndarray, + gamma: float, + epsilon: float, + normalize_input: bool, + normalize_dist: bool, +) -> float: + if normalize_input: + _x = _normalize_time_series(x) + _y = _normalize_time_series(y) + else: + _x = x + _y = y + + cost_matrix = _kdtw_cost_matrix(_x, _y, gamma, epsilon) + return _kdtw_cost_to_distance(cost_matrix, _x, _y, gamma, epsilon, normalize_dist) + + +@njit(cache=True, fastmath=True) +def _kdtw_cost_to_distance( + cost_matrix: np.ndarray, + x: np.ndarray, + y: np.ndarray, + gamma: float, + epsilon: float, + normalize_dist: bool, +) -> float: + n = x.shape[-1] - 1 + m = y.shape[-1] - 1 + current_cost = cost_matrix[n, m] + if normalize_dist: + self_x = _kdtw_cost_matrix(x, x, gamma, epsilon)[n, n] + self_y = _kdtw_cost_matrix(y, y, gamma, epsilon)[m, m] + norm_factor = np.sqrt(self_x * self_y) + if norm_factor != 0.0: + current_cost /= norm_factor + return 1.0 - current_cost + + +@njit(cache=True, fastmath=True) +def _local_kernel( + x: np.ndarray, y: np.ndarray, gamma: float, epsilon: float +) -> np.ndarray: + # 1 / c in the paper; beta on the website + factor = 1.0 / 3.0 + # Incoming shape is (n_channels, n_timepoints) + # We want to calculate the multivariate squared distance between the two time series + # considering each point in the time series as a separate instance, thus we need to + # reshape to (m_cases, m_channels, 1), where m_cases = n_timepoints and + # m_channels = n_channels. + x = x.T.reshape(-1, x.shape[0], 1) + y = y.T.reshape(-1, y.shape[0], 1) + n_cases = x.shape[0] + m_cases = y.shape[0] + distances = np.zeros((n_cases, m_cases)) + + for i in range(n_cases): + for j in range(m_cases): + # expects each input to have shape (n_channels, n_timepoints = 1) + distances[i, j] = squared_distance(x[i], y[j]) + + return factor * (np.exp(-distances / gamma) + epsilon) + + +@njit(cache=True, fastmath=True) +def _kdtw_cost_matrix( + x: np.ndarray, y: np.ndarray, gamma: float, epsilon: float +) -> np.ndarray: + # deals with multivariate time series, afterward, we just work with the distances + # and do not handle the channels anymore + local_kernel = _local_kernel(x, y, gamma, epsilon) + + # For the initial values of the cost matrix, we add 1 + n = np.shape(x)[-1] + 1 + m = np.shape(y)[-1] + 1 + + cost_matrix = np.zeros((n, m)) + cumulative_dp_diag = np.zeros((n, m)) + diagonal_weights = np.zeros(max(n, m)) + + # Initialize the diagonal weights + min_timepoints = min(n, m) + diagonal_weights[0] = 1.0 + for i in range(1, min_timepoints): + diagonal_weights[i] = local_kernel[i - 1, i - 1] + + # Initialize the cost matrix and cumulative dp diagonal + cost_matrix[0, 0] = 1 + cumulative_dp_diag[0, 0] = 1 + + # - left column + for i in range(1, n): + cost_matrix[i, 0] = cost_matrix[i - 1, 0] * local_kernel[i - 1, 0] + cumulative_dp_diag[i, 0] = cumulative_dp_diag[i - 1, 0] * diagonal_weights[i] + + # - top row + for j in range(1, m): + cost_matrix[0, j] = cost_matrix[0, j - 1] * local_kernel[0, j - 1] + cumulative_dp_diag[0, j] = cumulative_dp_diag[0, j - 1] * diagonal_weights[j] + + # Perform the main dynamic programming loop + for i in range(1, n): + for j in range(1, m): + local_cost = local_kernel[i - 1, j - 1] + cost_matrix[i, j] = ( + cost_matrix[i - 1, j] + + cost_matrix[i, j - 1] + + cost_matrix[i - 1, j - 1] + ) * local_cost + cumulative_dp_diag[i, j] = ( + cumulative_dp_diag[i - 1, j] * diagonal_weights[i] + + cumulative_dp_diag[i, j - 1] * diagonal_weights[j] + ) + if i == j: + cumulative_dp_diag[i, j] += ( + cumulative_dp_diag[i - 1, j - 1] * local_cost + ) + + # Add the cumulative dp diagonal to the cost matrix + cost_matrix = cost_matrix + cumulative_dp_diag + return cost_matrix[1:, 1:] + + +def kdtw_pairwise_distance( + X: Union[np.ndarray, list[np.ndarray]], + y: Optional[Union[np.ndarray, list[np.ndarray]]] = None, + gamma: float = 0.125, + epsilon: float = 1e-20, + normalize_input: bool = True, + normalize_dist: bool = True, +) -> np.ndarray: + """Compute the KDTW pairwise distance between a set of time series. + + Parameters + ---------- + X : np.ndarray + A collection of time series instances of shape ``(n_instances, n_timepoints)`` + or ``(n_instances, n_channels, n_timepoints)``. + y : np.ndarray or None, default=None + A single series or a collection of time series of shape ``(m_timepoints,)`` or + ``(m_instances, m_timepoints)`` or ``(m_instances, m_channels, m_timepoints)``. + If None, then the KDTW pairwise distance between the instances of X is + calculated. + gamma : float, default=0.125 + bandwidth parameter which weights the local contributions, i.e. the distances + between locally aligned positions. + epsilon : float, default=1e-20 + Small value to avoid zero. The default is 1e-20. + normalize_input : bool, default=True + Whether to normalize the time series' channels to zero mean and unit variance + before computing the distance. + normalize_dist : bool, default=True + Whether to normalize the distance by the product of the self distances of x and + y to avoid scaling effects and put the distance between 0 and 1. + + Returns + ------- + np.ndarray (n_instances, n_instances) + KDTW pairwise matrix between the instances of X. + + Raises + ------ + ValueError + If X is not 2D or 3D array when only passing X. + If X and y are not 1D, 2D or 3D arrays when passing both X and y. + + Examples + -------- + >>> import numpy as np + >>> from aeon.distances import kdtw_pairwise_distance + >>> # Distance between each time series in a collection of time series + >>> X = np.array([[[1, 2, 3]],[[4, 5, 6]], [[7, 8, 9]]]) + >>> kdtw_pairwise_distance(X, normalize_dist=False) + array([[0. , 0.73384612, 0.73384612], + [0.73384612, 0. , 0.73384612], + [0.73384612, 0.73384612, 0. ]] + + >>> # Distance between two collections of time series + >>> X = np.array([[[1, 2, 3]],[[4, 5, 6]], [[7, 8, 9]]]) + >>> y = np.array([[[11, 12, 13, 14]],[[15, 16, 17, 18]], [[19, 20, 21, 22]]]) + >>> kdtw_pairwise_distance(X, y) + array([[0.90035627, 0.90035627, 0.90035627], + [0.90035627, 0.90035627, 0.90035627], + [0.90035627, 0.90035627, 0.90035627]]) + + >>> X = np.array([[[1, 2, 3]],[[4, 5, 6]], [[7, 8, 9]]]) + >>> y_univariate = np.array([[11, 12, 13],[14, 15, 16], [17, 18, 19]]) + >>> kdtw_pairwise_distance(X, y_univariate, normalize_dist=False) + array([[0.73384612, 0.73384612, 0.73384612], + [0.73384612, 0.73384612, 0.73384612], + [0.73384612, 0.73384612, 0.73384612]]) + """ + multivariate_conversion = _is_numpy_list_multivariate(X, y) + _X, _ = _convert_collection_to_numba_list(X, "X", multivariate_conversion) + if y is None: + # To self + return _kdtw_pairwise_distance( + _X, gamma, epsilon, normalize_input, normalize_dist + ) + + _y, _ = _convert_collection_to_numba_list(y, "y", multivariate_conversion) + return _kdtw_from_multiple_to_multiple_distance( + _X, _y, gamma, epsilon, normalize_input, normalize_dist + ) + + +@njit(cache=True, fastmath=True) +def _kdtw_pairwise_distance( + X: NumbaList[np.ndarray], + gamma: float, + epsilon: float, + normalize_input: bool, + normalize_dist: bool, +) -> np.ndarray: + n_instances = len(X) + distances = np.zeros((n_instances, n_instances)) + + for i in range(n_instances): + for j in range(i + 1, n_instances): + distances[i, j] = _kdtw_distance( + X[i], X[j], gamma, epsilon, normalize_input, normalize_dist + ) + distances[j, i] = distances[i, j] + + return distances + + +@njit(cache=True, fastmath=True) +def _kdtw_from_multiple_to_multiple_distance( + x: NumbaList[np.ndarray], + y: NumbaList[np.ndarray], + gamma: float, + epsilon: float, + normalize_input: bool, + normalize_dist: bool, +) -> np.ndarray: + n_instances = len(x) + m_instances = len(y) + distances = np.zeros((n_instances, m_instances)) + + for i in range(n_instances): + for j in range(m_instances): + distances[i, j] = _kdtw_distance( + x[i], y[j], gamma, epsilon, normalize_input, normalize_dist + ) + return distances + + +@njit(cache=True) +def kdtw_alignment_path( + x: np.ndarray, + y: np.ndarray, + gamma: float = 0.125, + epsilon: float = 1e-20, + normalize_input: bool = True, + normalize_dist: bool = True, +) -> tuple[list[tuple[int, int]], float]: + """Compute the kdtw alignment path between two time series. + + Parameters + ---------- + x : np.ndarray + First time series, shape ``(n_channels, n_timepoints)`` or ``(n_timepoints,)``. + y : np.ndarray + Second time series, shape ``(m_channels, m_timepoints)`` or ``(m_timepoints,)``. + gamma : float, default=0.125 + bandwidth parameter which weights the local contributions, i.e. the distances + between locally aligned positions. + epsilon : float, default=1e-20 + Small value to avoid zero. The default is 1e-20. + normalize_input : bool, default=True + Whether to normalize the time series' channels to zero mean and unit variance + before computing the distance. + normalize_dist : bool, default=True + Whether to normalize the distance by the product of the self distances of x and + y to avoid scaling effects and put the distance between 0 and 1. + + Returns + ------- + List[Tuple[int, int]] + The alignment path between the two time series where each element is a tuple + of the index in x and the index in y that have the best alignment according + to the cost matrix. + float + The unnormalized kdtw distance betweeen the two time series. + + Raises + ------ + ValueError + If x and y are not 1D or 2D arrays. + + Examples + -------- + >>> import numpy as np + >>> from aeon.distances import kdtw_alignment_path + >>> x = np.array([[1, 2, 3, 6]]) + >>> y = np.array([[1, 2, 3, 4]]) + >>> kdtw_alignment_path(x, y, normalize_dist=False) + ([(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)], 0.8393218410741822) + """ + _x = x + _y = y + if x.ndim == 1: + _x = x.reshape((1, x.shape[0])) + if y.ndim == 1: + _y = y.reshape((1, y.shape[0])) + if _x.ndim != 2 or _y.ndim != 2: + raise ValueError("x and y must be 1D or 2D") + + if normalize_input: + _x = _normalize_time_series(_x) + _y = _normalize_time_series(_y) + + cost_matrix = _kdtw_cost_matrix(_x, _y, gamma, epsilon) + return ( + compute_min_return_path(cost_matrix, larger_is_better=True), + _kdtw_cost_to_distance(cost_matrix, _x, _y, gamma, epsilon, normalize_dist), + ) diff --git a/aeon/distances/kernel/tests/__init__.py b/aeon/distances/kernel/tests/__init__.py new file mode 100644 index 0000000000..6ccdc73d52 --- /dev/null +++ b/aeon/distances/kernel/tests/__init__.py @@ -0,0 +1 @@ +"""Kernel distance tests.""" diff --git a/aeon/distances/kernel/tests/test_distance_correctness.py b/aeon/distances/kernel/tests/test_distance_correctness.py new file mode 100644 index 0000000000..efd2caea28 --- /dev/null +++ b/aeon/distances/kernel/tests/test_distance_correctness.py @@ -0,0 +1,70 @@ +"""Test the correctness of the KDTW distance calculations. + +Compare the distance calculations on the 1D and 2D (d,m) format input against the +results generated with the Matlab/Octave implementation from +https://people.irisa.fr/Pierre-Francois.Marteau/REDK/KDTW/kdtw.m (adapted to support +multivariate time series). +""" + +from numpy.testing import assert_array_almost_equal_nulp + +from aeon.datasets import load_basic_motions, load_unit_test +from aeon.distances import kdtw_distance + +distances = [ + "kdtw", +] + +distance_parameters = { + "kdtw": [1e-5, 1e-2, 1e-1], # gamma +} +basic_motions_distances = { + "kdtw": [1.0, 1.0, 1.0], +} +unit_test_distances = { + "kdtw": [1.0, 1.0, 0.9984255139339736], +} + + +def test_multivariate_correctness(): + """Test distance correctness on BasicMotions: multivariate, equal length.""" + trainX, _ = load_basic_motions(return_type="numpy3D") + case1 = trainX[0] + case2 = trainX[1] + + for j in range(0, 3): + d = kdtw_distance( + case1, + case2, + gamma=distance_parameters["kdtw"][j], + normalize_input=True, + normalize_dist=True, + ) + assert_array_almost_equal_nulp(d, basic_motions_distances["kdtw"][j]) + + +def test_univariate_correctness(): + """Test correctness on UnitTest: univariate, equal length.""" + trainX, _ = load_unit_test(return_type="numpy3D") + trainX2, _ = load_unit_test(return_type="numpy2D") + # Test 2D and 3D instances from UnitTest + cases1 = [trainX[0], trainX2[0]] + cases2 = [trainX[2], trainX2[2]] + + for j in range(0, 3): + d = kdtw_distance( + cases1[0], + cases2[0], + gamma=distance_parameters["kdtw"][j], + normalize_input=True, + normalize_dist=True, + ) + d2 = kdtw_distance( + cases1[1], + cases2[1], + gamma=distance_parameters["kdtw"][j], + normalize_input=True, + normalize_dist=True, + ) + assert_array_almost_equal_nulp(d, unit_test_distances["kdtw"][j]) + assert d == d2 diff --git a/aeon/distances/tests/test_numba_distance_parameters.py b/aeon/distances/tests/test_numba_distance_parameters.py index 2293a239bd..ad734b12de 100644 --- a/aeon/distances/tests/test_numba_distance_parameters.py +++ b/aeon/distances/tests/test_numba_distance_parameters.py @@ -132,6 +132,10 @@ def _test_distance_params( "shape_dtw": BASIC_BOUNDING_PARAMS + [{"reach": 4}], "shift_scale": [{"max_shift": 1}, {"max_shift": None}], "soft_dtw": BASIC_BOUNDING_PARAMS + [{"gamma": 0.2}], + "kdtw": [ + {"gamma": 0.125, "epsilon": 1e-3}, + {"gamma": 0.125, "epsilon": 1e-3, "normalize_dist": False}, + ], } diff --git a/aeon/testing/expected_results/expected_distance_results.py b/aeon/testing/expected_results/expected_distance_results.py index 7126c5c624..bc11369904 100644 --- a/aeon/testing/expected_results/expected_distance_results.py +++ b/aeon/testing/expected_results/expected_distance_results.py @@ -125,6 +125,13 @@ None, 0.0, ], + "kdtw": [ + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + ], } @@ -190,4 +197,8 @@ [0.8103639073457298, 5.535457073146429], [0.6519267432870345, 5.491208968546096], ], + "kdtw": [ + [1.0, 1.0], + [1.0, 1.0], + ], } diff --git a/docs/api_reference/distances.rst b/docs/api_reference/distances.rst index 05db50f45f..d6ede03c90 100644 --- a/docs/api_reference/distances.rst +++ b/docs/api_reference/distances.rst @@ -222,6 +222,20 @@ Weighted Dynamic Time Warping (DTW) wdtw_cost_matrix wdtw_alignment_path +Dynamic Time Warping kernel distance (KDTW) +------------------------------------------- + +.. currentmodule:: aeon.distances + +.. autosummary:: + :toctree: auto_generated/ + :template: function.rst + + kdtw_distance + kdtw_pairwise_distance + kdtw_cost_matrix + kdtw_alignment_path + General methods with distance argument --------------------------------------