From 21c014e93f382d665e3b0b9a24f6777b89fda9f5 Mon Sep 17 00:00:00 2001 From: Divya Tiwari Date: Wed, 31 Jul 2024 17:00:03 +0530 Subject: [PATCH 1/4] parameterise dtw --- aeon/distances/_dtw.py | 110 +++++++++++++++++++++++------------------ 1 file changed, 62 insertions(+), 48 deletions(-) diff --git a/aeon/distances/_dtw.py b/aeon/distances/_dtw.py index 11e7399c41..a290f897df 100644 --- a/aeon/distances/_dtw.py +++ b/aeon/distances/_dtw.py @@ -10,7 +10,6 @@ from aeon.distances._alignment_paths import compute_min_return_path from aeon.distances._bounding_matrix import create_bounding_matrix -from aeon.distances._squared import _univariate_squared_distance from aeon.distances._utils import _convert_to_list, _is_multivariate @@ -19,6 +18,7 @@ def dtw_distance( x: np.ndarray, y: np.ndarray, window: Optional[float] = None, + p: Optional[float] = 2.0, itakura_max_slope: Optional[float] = None, ) -> float: r"""Compute the DTW distance between two time series. @@ -31,7 +31,7 @@ def dtw_distance( :math:`\mathbf{y}=\{y_1,y_2, \ldots,y_m\}` DTW first calculates :math:`M(\mathbf{x},\mathbf{y})`, the :math:`n \times m` pointwise distance matrix between series :math:`\mathbf{x}` and :math:`\mathbf{y}`, - where :math:`M_{i,j}= (x_i-y_j)^2`. + where :math:`M_{i,j}= (x_i-y_j)^p`. A warping path @@ -76,6 +76,8 @@ def dtw_distance( is used. window is a percentage deviation, so if ``window = 0.1`` then 10% of the series length is the max warping allowed. is used. + p : float, default=2.0 + The order of the Minkowski distance to use. itakura_max_slope : float, default=None Maximum slope as a proportion of the number of time points used to create Itakura parallelogram on the bounding matrix. Must be between 0. and 1. @@ -106,11 +108,11 @@ def dtw_distance( >>> x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) >>> y = np.array([11, 12, 13, 14, 15, 16, 17, 18, 19, 20]) >>> dtw_distance(x, y) # 1D series - 768.0 + 100.0 >>> x = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [0, 1, 0, 2, 0]]) >>> y = np.array([[11, 12, 13, 14],[7, 8, 9, 20],[1, 3, 4, 5]] ) - >>> dtw_distance(x, y) # 2D series with 3 channels, unequal length - 564.0 + >>> dtw_distance(x, y, p = 1) # 2D series with 3 channels, unequal length + 52.158582470567424 """ if x.ndim == 1 and y.ndim == 1: _x = x.reshape((1, x.shape[0])) @@ -118,12 +120,12 @@ def dtw_distance( bounding_matrix = create_bounding_matrix( _x.shape[1], _y.shape[1], window, itakura_max_slope ) - return _dtw_distance(_x, _y, bounding_matrix) + return _dtw_distance(_x, _y, bounding_matrix, p) if x.ndim == 2 and y.ndim == 2: bounding_matrix = create_bounding_matrix( x.shape[1], y.shape[1], window, itakura_max_slope ) - return _dtw_distance(x, y, bounding_matrix) + return _dtw_distance(x, y, bounding_matrix, p) raise ValueError("x and y must be 1D or 2D") @@ -132,12 +134,13 @@ def dtw_cost_matrix( x: np.ndarray, y: np.ndarray, window: Optional[float] = None, + p: Optional[float] = 2.0, itakura_max_slope: Optional[float] = None, ) -> np.ndarray: r"""Compute the DTW cost matrix between two time series. - The cost matrix is the pairwise Euclidean distance between all points - :math:`M_{i,j}=(x_i-x_j)^2`. It is used in the DTW path calculations. + The cost matrix is the pairwise Minkowski distance between all points + :math:`M_{i,j}=(x_i-x_j)^p`. It is used in the DTW path calculations. Parameters ---------- @@ -152,6 +155,8 @@ def dtw_cost_matrix( is used. window is a percentage deviation, so if ``window = 0.1``, 10% of the series length is the max warping allowed. is used. + p : float, default=2.0 + The order of the Minkowski distance to use. itakura_max_slope : float, default=None Maximum slope as a proportion of the number of time points used to create Itakura parallelogram on the bounding matrix. Must be between 0. and 1. @@ -173,16 +178,16 @@ def dtw_cost_matrix( >>> x = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) >>> y = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) >>> dtw_cost_matrix(x, y) - array([[ 0., 1., 5., 14., 30., 55., 91., 140., 204., 285.], - [ 1., 0., 1., 5., 14., 30., 55., 91., 140., 204.], - [ 5., 1., 0., 1., 5., 14., 30., 55., 91., 140.], - [ 14., 5., 1., 0., 1., 5., 14., 30., 55., 91.], - [ 30., 14., 5., 1., 0., 1., 5., 14., 30., 55.], - [ 55., 30., 14., 5., 1., 0., 1., 5., 14., 30.], - [ 91., 55., 30., 14., 5., 1., 0., 1., 5., 14.], - [140., 91., 55., 30., 14., 5., 1., 0., 1., 5.], - [204., 140., 91., 55., 30., 14., 5., 1., 0., 1.], - [285., 204., 140., 91., 55., 30., 14., 5., 1., 0.]]) + array([[ 0., 1., 3., 6., 10., 15., 21., 28., 36., 45.], + [ 1., 0., 1., 3., 6., 10., 15., 21., 28., 36.], + [ 3., 1., 0., 1., 3., 6., 10., 15., 21., 28.], + [ 6., 3., 1., 0., 1., 3., 6., 10., 15., 21.], + [10., 6., 3., 1., 0., 1., 3., 6., 10., 15.], + [15., 10., 6., 3., 1., 0., 1., 3., 6., 10.], + [21., 15., 10., 6., 3., 1., 0., 1., 3., 6.], + [28., 21., 15., 10., 6., 3., 1., 0., 1., 3.], + [36., 28., 21., 15., 10., 6., 3., 1., 0., 1.], + [45., 36., 28., 21., 15., 10., 6., 3., 1., 0.]]) """ if x.ndim == 1 and y.ndim == 1: _x = x.reshape((1, x.shape[0])) @@ -190,23 +195,25 @@ def dtw_cost_matrix( bounding_matrix = create_bounding_matrix( _x.shape[1], _y.shape[1], window, itakura_max_slope ) - return _dtw_cost_matrix(_x, _y, bounding_matrix) + return _dtw_cost_matrix(_x, _y, bounding_matrix, p) if x.ndim == 2 and y.ndim == 2: bounding_matrix = create_bounding_matrix( x.shape[1], y.shape[1], window, itakura_max_slope ) - return _dtw_cost_matrix(x, y, bounding_matrix) + return _dtw_cost_matrix(x, y, bounding_matrix, p) raise ValueError("x and y must be 1D or 2D") @njit(cache=True, fastmath=True) -def _dtw_distance(x: np.ndarray, y: np.ndarray, bounding_matrix: np.ndarray) -> float: - return _dtw_cost_matrix(x, y, bounding_matrix)[x.shape[1] - 1, y.shape[1] - 1] +def _dtw_distance( + x: np.ndarray, y: np.ndarray, bounding_matrix: np.ndarray, p: float +) -> float: + return _dtw_cost_matrix(x, y, bounding_matrix, p)[x.shape[1] - 1, y.shape[1] - 1] @njit(cache=True, fastmath=True) def _dtw_cost_matrix( - x: np.ndarray, y: np.ndarray, bounding_matrix: np.ndarray + x: np.ndarray, y: np.ndarray, bounding_matrix: np.ndarray, p: float ) -> np.ndarray: x_size = x.shape[1] y_size = y.shape[1] @@ -216,9 +223,8 @@ def _dtw_cost_matrix( for i in range(x_size): for j in range(y_size): if bounding_matrix[i, j]: - cost_matrix[i + 1, j + 1] = _univariate_squared_distance( - x[:, i], y[:, j] - ) + min( + dist = np.sum(np.abs(x[:, i] - y[:, j]) ** p) ** (1.0 / p) + cost_matrix[i + 1, j + 1] = dist + min( cost_matrix[i, j + 1], cost_matrix[i + 1, j], cost_matrix[i, j], @@ -231,6 +237,7 @@ def dtw_pairwise_distance( X: Union[np.ndarray, List[np.ndarray]], y: Optional[Union[np.ndarray, List[np.ndarray]]] = None, window: Optional[float] = None, + p: Optional[float] = 2.0, itakura_max_slope: Optional[float] = None, ) -> np.ndarray: r"""Compute the DTW pairwise distance between a set of time series. @@ -264,6 +271,8 @@ def dtw_pairwise_distance( window : float or None, default=None The window to use for the bounding matrix. If None, no bounding matrix is used. + p : float, default=2.0 + The order of the Minkowski distance to use. itakura_max_slope : float, default=None Maximum slope as a proportion of the number of time points used to create Itakura parallelogram on the bounding matrix. Must be between 0. and 1. @@ -287,41 +296,41 @@ def dtw_pairwise_distance( >>> # Distance between each time series in a collection of time series >>> X = np.array([[[1, 2, 3]],[[4, 5, 6]], [[7, 8, 9]]]) >>> dtw_pairwise_distance(X) - array([[ 0., 26., 108.], - [ 26., 0., 26.], - [108., 26., 0.]]) + array([[ 0., 9., 18.], + [ 9., 0., 9.], + [18., 9., 0.]]) >>> # Distance between two collections of time series >>> X = np.array([[[1, 2, 3]],[[4, 5, 6]], [[7, 8, 9]]]) >>> y = np.array([[[11, 12, 13]],[[14, 15, 16]], [[17, 18, 19]]]) - >>> dtw_pairwise_distance(X, y) - array([[300., 507., 768.], - [147., 300., 507.], - [ 48., 147., 300.]]) + >>> dtw_pairwise_distance(X, y, p=1) + array([[30., 39., 48.], + [21., 30., 39.], + [12., 21., 30.]]) >>> X = np.array([[[1, 2, 3]],[[4, 5, 6]], [[7, 8, 9]]]) >>> y_univariate = np.array([11, 12, 13]) - >>> dtw_pairwise_distance(X, y_univariate) - array([[300.], - [147.], - [ 48.]]) + >>> dtw_pairwise_distance(X, y_univariate, p=1.5) + array([[30.], + [21.], + [12.]]) >>> # Distance between each TS in a collection of unequal-length time series >>> X = [np.array([1, 2, 3]), np.array([4, 5, 6, 7]), np.array([8, 9, 10, 11, 12])] - >>> dtw_pairwise_distance(X) - array([[ 0., 42., 292.], - [ 42., 0., 83.], - [292., 83., 0.]]) + >>> dtw_pairwise_distance(X, p=1.2) + array([[ 0., 13., 38.], + [13., 0., 21.], + [38., 21., 0.]]) """ multivariate_conversion = _is_multivariate(X, y) _X, unequal_length = _convert_to_list(X, "X", multivariate_conversion) if y is None: # To self - return _dtw_pairwise_distance(_X, window, itakura_max_slope, unequal_length) + return _dtw_pairwise_distance(_X, window, p, itakura_max_slope, unequal_length) _y, unequal_length = _convert_to_list(y, "y", multivariate_conversion) return _dtw_from_multiple_to_multiple_distance( - _X, _y, window, itakura_max_slope, unequal_length + _X, _y, window, p, itakura_max_slope, unequal_length ) @@ -329,6 +338,7 @@ def dtw_pairwise_distance( def _dtw_pairwise_distance( X: NumbaList[np.ndarray], window: Optional[float], + p: Optional[float], itakura_max_slope: Optional[float], unequal_length: bool, ) -> np.ndarray: @@ -347,7 +357,7 @@ def _dtw_pairwise_distance( bounding_matrix = create_bounding_matrix( x1.shape[1], x2.shape[1], window, itakura_max_slope ) - distances[i, j] = _dtw_distance(x1, x2, bounding_matrix) + distances[i, j] = _dtw_distance(x1, x2, bounding_matrix, p) distances[j, i] = distances[i, j] return distances @@ -358,6 +368,7 @@ def _dtw_from_multiple_to_multiple_distance( x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], window: Optional[float], + p: Optional[float], itakura_max_slope: Optional[float], unequal_length: bool, ) -> np.ndarray: @@ -376,7 +387,7 @@ def _dtw_from_multiple_to_multiple_distance( bounding_matrix = create_bounding_matrix( x1.shape[1], y1.shape[1], window, itakura_max_slope ) - distances[i, j] = _dtw_distance(x1, y1, bounding_matrix) + distances[i, j] = _dtw_distance(x1, y1, bounding_matrix, p) return distances @@ -385,6 +396,7 @@ def dtw_alignment_path( x: np.ndarray, y: np.ndarray, window: Optional[float] = None, + p: Optional[float] = 2.0, itakura_max_slope: Optional[float] = None, ) -> Tuple[List[Tuple[int, int]], float]: """Compute the DTW alignment path between two time series. @@ -398,6 +410,8 @@ def dtw_alignment_path( window : float, default=None The window to use for the bounding matrix. If None, no bounding matrix is used. + p : float, default=2.0 + The order of the Minkowski distance to use. itakura_max_slope : float, default=None Maximum slope as a proportion of the number of time points used to create Itakura parallelogram on the bounding matrix. Must be between 0. and 1. @@ -423,9 +437,9 @@ def dtw_alignment_path( >>> x = np.array([[1, 2, 3, 6]]) >>> y = np.array([[1, 2, 3, 4]]) >>> dtw_alignment_path(x, y) - ([(0, 0), (1, 1), (2, 2), (3, 3)], 4.0) + ([(0, 0), (1, 1), (2, 2), (3, 3)], 2.0) """ - cost_matrix = dtw_cost_matrix(x, y, window, itakura_max_slope) + cost_matrix = dtw_cost_matrix(x, y, window, p, itakura_max_slope) return ( compute_min_return_path(cost_matrix), cost_matrix[x.shape[-1] - 1, y.shape[-1] - 1], From a22c831861d74c1ef3ca0a3034ce6d542cc62be5 Mon Sep 17 00:00:00 2001 From: Divya Tiwari Date: Thu, 1 Aug 2024 20:56:41 +0530 Subject: [PATCH 2/4] Distance tests passed --- aeon/distances/_distance.py | 26 ++++++++++++++++--- aeon/distances/_dtw.py | 6 +++-- .../tests/test_distance_correctness.py | 4 +-- .../expected_distance_results.py | 14 +++++----- 4 files changed, 35 insertions(+), 15 deletions(-) diff --git a/aeon/distances/_distance.py b/aeon/distances/_distance.py index 6e43d8066d..2c395c7566 100644 --- a/aeon/distances/_distance.py +++ b/aeon/distances/_distance.py @@ -161,7 +161,13 @@ def distance( elif metric == "minkowski": return minkowski_distance(x, y, kwargs.get("p", 2.0), kwargs.get("w", None)) elif metric == "dtw": - return dtw_distance(x, y, kwargs.get("window"), kwargs.get("itakura_max_slope")) + return dtw_distance( + x, + y, + kwargs.get("window"), + kwargs.get("p", 2.0), + kwargs.get("itakura_max_slope"), + ) elif metric == "ddtw": return ddtw_distance( x, y, kwargs.get("window"), kwargs.get("itakura_max_slope") @@ -330,7 +336,11 @@ def pairwise_distance( ) elif metric == "dtw": return dtw_pairwise_distance( - x, y, kwargs.get("window"), kwargs.get("itakura_max_slope") + x, + y, + kwargs.get("window"), + kwargs.get("p", 2.0), + kwargs.get("itakura_max_slope"), ) elif metric == "shape_dtw": return shape_dtw_pairwise_distance( @@ -524,7 +534,11 @@ def alignment_path( """ if metric == "dtw": return dtw_alignment_path( - x, y, kwargs.get("window"), kwargs.get("itakura_max_slope") + x, + y, + kwargs.get("window"), + kwargs.get("p", 2.0), + kwargs.get("itakura_max_slope"), ) elif metric == "shape_dtw": return shape_dtw_alignment_path( @@ -667,7 +681,11 @@ def cost_matrix( """ if metric == "dtw": return dtw_cost_matrix( - x, y, kwargs.get("window"), kwargs.get("itakura_max_slope") + x, + y, + kwargs.get("window"), + kwargs.get("p", 2.0), + kwargs.get("itakura_max_slope"), ) elif metric == "shape_dtw": return shape_dtw_cost_matrix( diff --git a/aeon/distances/_dtw.py b/aeon/distances/_dtw.py index a290f897df..17cb92c3b7 100644 --- a/aeon/distances/_dtw.py +++ b/aeon/distances/_dtw.py @@ -10,6 +10,7 @@ from aeon.distances._alignment_paths import compute_min_return_path from aeon.distances._bounding_matrix import create_bounding_matrix +from aeon.distances._minkowski import _univariate_minkowski_distance from aeon.distances._utils import _convert_to_list, _is_multivariate @@ -112,7 +113,7 @@ def dtw_distance( >>> x = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [0, 1, 0, 2, 0]]) >>> y = np.array([[11, 12, 13, 14],[7, 8, 9, 20],[1, 3, 4, 5]] ) >>> dtw_distance(x, y, p = 1) # 2D series with 3 channels, unequal length - 52.158582470567424 + 68.0 """ if x.ndim == 1 and y.ndim == 1: _x = x.reshape((1, x.shape[0])) @@ -223,7 +224,8 @@ def _dtw_cost_matrix( for i in range(x_size): for j in range(y_size): if bounding_matrix[i, j]: - dist = np.sum(np.abs(x[:, i] - y[:, j]) ** p) ** (1.0 / p) + _w = np.ones_like(x[:, i]) + dist = _univariate_minkowski_distance(x[:, i], y[:, j], p, _w) cost_matrix[i + 1, j + 1] = dist + min( cost_matrix[i, j + 1], cost_matrix[i + 1, j], diff --git a/aeon/distances/tests/test_distance_correctness.py b/aeon/distances/tests/test_distance_correctness.py index 470a38563b..d9a6165bf7 100644 --- a/aeon/distances/tests/test_distance_correctness.py +++ b/aeon/distances/tests/test_distance_correctness.py @@ -48,7 +48,7 @@ unit_test_distances = { "euclidean": 619.7959, "squared": 384147.0, - "dtw": [384147.0, 315012.0, 275854.0], + "dtw": [2275.0, 2063.0, 1988.0], "wdtw": [137927.0, 68406.15849, 2.2296], "erp": [168.0, 1107.0, 2275.0], "lcss": [1.0, 0.45833, 0.08333], @@ -62,7 +62,7 @@ basic_motions_distances = { "euclidean": 27.51835240, "squared": 757.25971908652, - "dtw": [757.259719, 330.834497, 330.834497], + "dtw": [169.3715777363612, 103.16627927361445, 103.16627927361445], "wdtw": [165.41724, 3.308425, 0], "msm": [70.014828, 89.814828, 268.014828], "erp": [0.2086269, 2.9942540, 102.097904], diff --git a/aeon/testing/expected_results/expected_distance_results.py b/aeon/testing/expected_results/expected_distance_results.py index 95dcac1a5b..fea66c46a4 100644 --- a/aeon/testing/expected_results/expected_distance_results.py +++ b/aeon/testing/expected_results/expected_distance_results.py @@ -35,11 +35,11 @@ 9.938223910720454, ], "dtw": [ - 25.0, - 10.840209465045632, - 184.1829978640948, - 11.01992239481733, - 98.76829449961573, + 5.0, + 8.559917930965497, + 44.3099600330504, + 9.594320378741982, + 29.148827543490025, ], "ddtw": [ 0.0, @@ -112,8 +112,8 @@ # Result structure: # [univariate series, multivariate series] "dtw": [ - [13.875064266550797, 184.18299786409477], - [14.250134348814871, 197.8788694266691], + [10.441974486203806, 44.3099600330504], + [10.8024655063829, 44.3099600330504], ], "ddtw": [ [10.735835993738842, 132.73682936639955], From bf54769124398af4e4233447ec2ce46c0a9cb4eb Mon Sep 17 00:00:00 2001 From: Divya Tiwari Date: Mon, 5 Aug 2024 11:20:30 +0530 Subject: [PATCH 3/4] Revert the changes --- aeon/distances/_distance.py | 26 +--- aeon/distances/_dtw.py | 112 ++++++++---------- .../tests/test_distance_correctness.py | 14 +-- .../expected_distance_results.py | 28 ++--- 4 files changed, 73 insertions(+), 107 deletions(-) diff --git a/aeon/distances/_distance.py b/aeon/distances/_distance.py index 2c395c7566..6e43d8066d 100644 --- a/aeon/distances/_distance.py +++ b/aeon/distances/_distance.py @@ -161,13 +161,7 @@ def distance( elif metric == "minkowski": return minkowski_distance(x, y, kwargs.get("p", 2.0), kwargs.get("w", None)) elif metric == "dtw": - return dtw_distance( - x, - y, - kwargs.get("window"), - kwargs.get("p", 2.0), - kwargs.get("itakura_max_slope"), - ) + return dtw_distance(x, y, kwargs.get("window"), kwargs.get("itakura_max_slope")) elif metric == "ddtw": return ddtw_distance( x, y, kwargs.get("window"), kwargs.get("itakura_max_slope") @@ -336,11 +330,7 @@ def pairwise_distance( ) elif metric == "dtw": return dtw_pairwise_distance( - x, - y, - kwargs.get("window"), - kwargs.get("p", 2.0), - kwargs.get("itakura_max_slope"), + x, y, kwargs.get("window"), kwargs.get("itakura_max_slope") ) elif metric == "shape_dtw": return shape_dtw_pairwise_distance( @@ -534,11 +524,7 @@ def alignment_path( """ if metric == "dtw": return dtw_alignment_path( - x, - y, - kwargs.get("window"), - kwargs.get("p", 2.0), - kwargs.get("itakura_max_slope"), + x, y, kwargs.get("window"), kwargs.get("itakura_max_slope") ) elif metric == "shape_dtw": return shape_dtw_alignment_path( @@ -681,11 +667,7 @@ def cost_matrix( """ if metric == "dtw": return dtw_cost_matrix( - x, - y, - kwargs.get("window"), - kwargs.get("p", 2.0), - kwargs.get("itakura_max_slope"), + x, y, kwargs.get("window"), kwargs.get("itakura_max_slope") ) elif metric == "shape_dtw": return shape_dtw_cost_matrix( diff --git a/aeon/distances/_dtw.py b/aeon/distances/_dtw.py index 17cb92c3b7..11e7399c41 100644 --- a/aeon/distances/_dtw.py +++ b/aeon/distances/_dtw.py @@ -10,7 +10,7 @@ from aeon.distances._alignment_paths import compute_min_return_path from aeon.distances._bounding_matrix import create_bounding_matrix -from aeon.distances._minkowski import _univariate_minkowski_distance +from aeon.distances._squared import _univariate_squared_distance from aeon.distances._utils import _convert_to_list, _is_multivariate @@ -19,7 +19,6 @@ def dtw_distance( x: np.ndarray, y: np.ndarray, window: Optional[float] = None, - p: Optional[float] = 2.0, itakura_max_slope: Optional[float] = None, ) -> float: r"""Compute the DTW distance between two time series. @@ -32,7 +31,7 @@ def dtw_distance( :math:`\mathbf{y}=\{y_1,y_2, \ldots,y_m\}` DTW first calculates :math:`M(\mathbf{x},\mathbf{y})`, the :math:`n \times m` pointwise distance matrix between series :math:`\mathbf{x}` and :math:`\mathbf{y}`, - where :math:`M_{i,j}= (x_i-y_j)^p`. + where :math:`M_{i,j}= (x_i-y_j)^2`. A warping path @@ -77,8 +76,6 @@ def dtw_distance( is used. window is a percentage deviation, so if ``window = 0.1`` then 10% of the series length is the max warping allowed. is used. - p : float, default=2.0 - The order of the Minkowski distance to use. itakura_max_slope : float, default=None Maximum slope as a proportion of the number of time points used to create Itakura parallelogram on the bounding matrix. Must be between 0. and 1. @@ -109,11 +106,11 @@ def dtw_distance( >>> x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) >>> y = np.array([11, 12, 13, 14, 15, 16, 17, 18, 19, 20]) >>> dtw_distance(x, y) # 1D series - 100.0 + 768.0 >>> x = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [0, 1, 0, 2, 0]]) >>> y = np.array([[11, 12, 13, 14],[7, 8, 9, 20],[1, 3, 4, 5]] ) - >>> dtw_distance(x, y, p = 1) # 2D series with 3 channels, unequal length - 68.0 + >>> dtw_distance(x, y) # 2D series with 3 channels, unequal length + 564.0 """ if x.ndim == 1 and y.ndim == 1: _x = x.reshape((1, x.shape[0])) @@ -121,12 +118,12 @@ def dtw_distance( bounding_matrix = create_bounding_matrix( _x.shape[1], _y.shape[1], window, itakura_max_slope ) - return _dtw_distance(_x, _y, bounding_matrix, p) + return _dtw_distance(_x, _y, bounding_matrix) if x.ndim == 2 and y.ndim == 2: bounding_matrix = create_bounding_matrix( x.shape[1], y.shape[1], window, itakura_max_slope ) - return _dtw_distance(x, y, bounding_matrix, p) + return _dtw_distance(x, y, bounding_matrix) raise ValueError("x and y must be 1D or 2D") @@ -135,13 +132,12 @@ def dtw_cost_matrix( x: np.ndarray, y: np.ndarray, window: Optional[float] = None, - p: Optional[float] = 2.0, itakura_max_slope: Optional[float] = None, ) -> np.ndarray: r"""Compute the DTW cost matrix between two time series. - The cost matrix is the pairwise Minkowski distance between all points - :math:`M_{i,j}=(x_i-x_j)^p`. It is used in the DTW path calculations. + The cost matrix is the pairwise Euclidean distance between all points + :math:`M_{i,j}=(x_i-x_j)^2`. It is used in the DTW path calculations. Parameters ---------- @@ -156,8 +152,6 @@ def dtw_cost_matrix( is used. window is a percentage deviation, so if ``window = 0.1``, 10% of the series length is the max warping allowed. is used. - p : float, default=2.0 - The order of the Minkowski distance to use. itakura_max_slope : float, default=None Maximum slope as a proportion of the number of time points used to create Itakura parallelogram on the bounding matrix. Must be between 0. and 1. @@ -179,16 +173,16 @@ def dtw_cost_matrix( >>> x = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) >>> y = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) >>> dtw_cost_matrix(x, y) - array([[ 0., 1., 3., 6., 10., 15., 21., 28., 36., 45.], - [ 1., 0., 1., 3., 6., 10., 15., 21., 28., 36.], - [ 3., 1., 0., 1., 3., 6., 10., 15., 21., 28.], - [ 6., 3., 1., 0., 1., 3., 6., 10., 15., 21.], - [10., 6., 3., 1., 0., 1., 3., 6., 10., 15.], - [15., 10., 6., 3., 1., 0., 1., 3., 6., 10.], - [21., 15., 10., 6., 3., 1., 0., 1., 3., 6.], - [28., 21., 15., 10., 6., 3., 1., 0., 1., 3.], - [36., 28., 21., 15., 10., 6., 3., 1., 0., 1.], - [45., 36., 28., 21., 15., 10., 6., 3., 1., 0.]]) + array([[ 0., 1., 5., 14., 30., 55., 91., 140., 204., 285.], + [ 1., 0., 1., 5., 14., 30., 55., 91., 140., 204.], + [ 5., 1., 0., 1., 5., 14., 30., 55., 91., 140.], + [ 14., 5., 1., 0., 1., 5., 14., 30., 55., 91.], + [ 30., 14., 5., 1., 0., 1., 5., 14., 30., 55.], + [ 55., 30., 14., 5., 1., 0., 1., 5., 14., 30.], + [ 91., 55., 30., 14., 5., 1., 0., 1., 5., 14.], + [140., 91., 55., 30., 14., 5., 1., 0., 1., 5.], + [204., 140., 91., 55., 30., 14., 5., 1., 0., 1.], + [285., 204., 140., 91., 55., 30., 14., 5., 1., 0.]]) """ if x.ndim == 1 and y.ndim == 1: _x = x.reshape((1, x.shape[0])) @@ -196,25 +190,23 @@ def dtw_cost_matrix( bounding_matrix = create_bounding_matrix( _x.shape[1], _y.shape[1], window, itakura_max_slope ) - return _dtw_cost_matrix(_x, _y, bounding_matrix, p) + return _dtw_cost_matrix(_x, _y, bounding_matrix) if x.ndim == 2 and y.ndim == 2: bounding_matrix = create_bounding_matrix( x.shape[1], y.shape[1], window, itakura_max_slope ) - return _dtw_cost_matrix(x, y, bounding_matrix, p) + return _dtw_cost_matrix(x, y, bounding_matrix) raise ValueError("x and y must be 1D or 2D") @njit(cache=True, fastmath=True) -def _dtw_distance( - x: np.ndarray, y: np.ndarray, bounding_matrix: np.ndarray, p: float -) -> float: - return _dtw_cost_matrix(x, y, bounding_matrix, p)[x.shape[1] - 1, y.shape[1] - 1] +def _dtw_distance(x: np.ndarray, y: np.ndarray, bounding_matrix: np.ndarray) -> float: + return _dtw_cost_matrix(x, y, bounding_matrix)[x.shape[1] - 1, y.shape[1] - 1] @njit(cache=True, fastmath=True) def _dtw_cost_matrix( - x: np.ndarray, y: np.ndarray, bounding_matrix: np.ndarray, p: float + x: np.ndarray, y: np.ndarray, bounding_matrix: np.ndarray ) -> np.ndarray: x_size = x.shape[1] y_size = y.shape[1] @@ -224,9 +216,9 @@ def _dtw_cost_matrix( for i in range(x_size): for j in range(y_size): if bounding_matrix[i, j]: - _w = np.ones_like(x[:, i]) - dist = _univariate_minkowski_distance(x[:, i], y[:, j], p, _w) - cost_matrix[i + 1, j + 1] = dist + min( + cost_matrix[i + 1, j + 1] = _univariate_squared_distance( + x[:, i], y[:, j] + ) + min( cost_matrix[i, j + 1], cost_matrix[i + 1, j], cost_matrix[i, j], @@ -239,7 +231,6 @@ def dtw_pairwise_distance( X: Union[np.ndarray, List[np.ndarray]], y: Optional[Union[np.ndarray, List[np.ndarray]]] = None, window: Optional[float] = None, - p: Optional[float] = 2.0, itakura_max_slope: Optional[float] = None, ) -> np.ndarray: r"""Compute the DTW pairwise distance between a set of time series. @@ -273,8 +264,6 @@ def dtw_pairwise_distance( window : float or None, default=None The window to use for the bounding matrix. If None, no bounding matrix is used. - p : float, default=2.0 - The order of the Minkowski distance to use. itakura_max_slope : float, default=None Maximum slope as a proportion of the number of time points used to create Itakura parallelogram on the bounding matrix. Must be between 0. and 1. @@ -298,41 +287,41 @@ def dtw_pairwise_distance( >>> # Distance between each time series in a collection of time series >>> X = np.array([[[1, 2, 3]],[[4, 5, 6]], [[7, 8, 9]]]) >>> dtw_pairwise_distance(X) - array([[ 0., 9., 18.], - [ 9., 0., 9.], - [18., 9., 0.]]) + array([[ 0., 26., 108.], + [ 26., 0., 26.], + [108., 26., 0.]]) >>> # Distance between two collections of time series >>> X = np.array([[[1, 2, 3]],[[4, 5, 6]], [[7, 8, 9]]]) >>> y = np.array([[[11, 12, 13]],[[14, 15, 16]], [[17, 18, 19]]]) - >>> dtw_pairwise_distance(X, y, p=1) - array([[30., 39., 48.], - [21., 30., 39.], - [12., 21., 30.]]) + >>> dtw_pairwise_distance(X, y) + array([[300., 507., 768.], + [147., 300., 507.], + [ 48., 147., 300.]]) >>> X = np.array([[[1, 2, 3]],[[4, 5, 6]], [[7, 8, 9]]]) >>> y_univariate = np.array([11, 12, 13]) - >>> dtw_pairwise_distance(X, y_univariate, p=1.5) - array([[30.], - [21.], - [12.]]) + >>> dtw_pairwise_distance(X, y_univariate) + array([[300.], + [147.], + [ 48.]]) >>> # Distance between each TS in a collection of unequal-length time series >>> X = [np.array([1, 2, 3]), np.array([4, 5, 6, 7]), np.array([8, 9, 10, 11, 12])] - >>> dtw_pairwise_distance(X, p=1.2) - array([[ 0., 13., 38.], - [13., 0., 21.], - [38., 21., 0.]]) + >>> dtw_pairwise_distance(X) + array([[ 0., 42., 292.], + [ 42., 0., 83.], + [292., 83., 0.]]) """ multivariate_conversion = _is_multivariate(X, y) _X, unequal_length = _convert_to_list(X, "X", multivariate_conversion) if y is None: # To self - return _dtw_pairwise_distance(_X, window, p, itakura_max_slope, unequal_length) + return _dtw_pairwise_distance(_X, window, itakura_max_slope, unequal_length) _y, unequal_length = _convert_to_list(y, "y", multivariate_conversion) return _dtw_from_multiple_to_multiple_distance( - _X, _y, window, p, itakura_max_slope, unequal_length + _X, _y, window, itakura_max_slope, unequal_length ) @@ -340,7 +329,6 @@ def dtw_pairwise_distance( def _dtw_pairwise_distance( X: NumbaList[np.ndarray], window: Optional[float], - p: Optional[float], itakura_max_slope: Optional[float], unequal_length: bool, ) -> np.ndarray: @@ -359,7 +347,7 @@ def _dtw_pairwise_distance( bounding_matrix = create_bounding_matrix( x1.shape[1], x2.shape[1], window, itakura_max_slope ) - distances[i, j] = _dtw_distance(x1, x2, bounding_matrix, p) + distances[i, j] = _dtw_distance(x1, x2, bounding_matrix) distances[j, i] = distances[i, j] return distances @@ -370,7 +358,6 @@ def _dtw_from_multiple_to_multiple_distance( x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], window: Optional[float], - p: Optional[float], itakura_max_slope: Optional[float], unequal_length: bool, ) -> np.ndarray: @@ -389,7 +376,7 @@ def _dtw_from_multiple_to_multiple_distance( bounding_matrix = create_bounding_matrix( x1.shape[1], y1.shape[1], window, itakura_max_slope ) - distances[i, j] = _dtw_distance(x1, y1, bounding_matrix, p) + distances[i, j] = _dtw_distance(x1, y1, bounding_matrix) return distances @@ -398,7 +385,6 @@ def dtw_alignment_path( x: np.ndarray, y: np.ndarray, window: Optional[float] = None, - p: Optional[float] = 2.0, itakura_max_slope: Optional[float] = None, ) -> Tuple[List[Tuple[int, int]], float]: """Compute the DTW alignment path between two time series. @@ -412,8 +398,6 @@ def dtw_alignment_path( window : float, default=None The window to use for the bounding matrix. If None, no bounding matrix is used. - p : float, default=2.0 - The order of the Minkowski distance to use. itakura_max_slope : float, default=None Maximum slope as a proportion of the number of time points used to create Itakura parallelogram on the bounding matrix. Must be between 0. and 1. @@ -439,9 +423,9 @@ def dtw_alignment_path( >>> x = np.array([[1, 2, 3, 6]]) >>> y = np.array([[1, 2, 3, 4]]) >>> dtw_alignment_path(x, y) - ([(0, 0), (1, 1), (2, 2), (3, 3)], 2.0) + ([(0, 0), (1, 1), (2, 2), (3, 3)], 4.0) """ - cost_matrix = dtw_cost_matrix(x, y, window, p, itakura_max_slope) + cost_matrix = dtw_cost_matrix(x, y, window, itakura_max_slope) return ( compute_min_return_path(cost_matrix), cost_matrix[x.shape[-1] - 1, y.shape[-1] - 1], diff --git a/aeon/distances/tests/test_distance_correctness.py b/aeon/distances/tests/test_distance_correctness.py index d9a6165bf7..f33ca088c3 100644 --- a/aeon/distances/tests/test_distance_correctness.py +++ b/aeon/distances/tests/test_distance_correctness.py @@ -48,33 +48,33 @@ unit_test_distances = { "euclidean": 619.7959, "squared": 384147.0, - "dtw": [2275.0, 2063.0, 1988.0], + "dtw": [384147.0, 315012.0, 275854.0], "wdtw": [137927.0, 68406.15849, 2.2296], - "erp": [168.0, 1107.0, 2275.0], + "erp": [2275.0, 2275.0, 2275.0], "lcss": [1.0, 0.45833, 0.08333], "edr": [1.0, 0.58333, 0.125], "ddtw": [80806.0, 76289.0625, 76289.0625], "wddtw": [38144.53125, 19121.4927, 1.34957], - "twe": [137.001, 567.0029999999999, 3030.036000000001], + "twe": [4536.0, 3192.0220, 3030.036000000001], "msm_ind": [1515.0, 1517.8000000000004, 1557.0], # msm with independent distance "msm_dep": [1897.0, 1898.6000000000001, 1921.0], # msm with dependent distance } basic_motions_distances = { "euclidean": 27.51835240, "squared": 757.25971908652, - "dtw": [169.3715777363612, 103.16627927361445, 103.16627927361445], + "dtw": [757.259719, 330.834497, 330.834497], "wdtw": [165.41724, 3.308425, 0], "msm": [70.014828, 89.814828, 268.014828], - "erp": [0.2086269, 2.9942540, 102.097904], + "erp": [169.3715, 102.0979, 102.097904], "edr": [1.0, 0.26, 0.07], "lcss": [1.0, 0.26, 0.05], "ddtw": [297.18771, 160.51311645984856, 160.29823], "wddtw": [80.149117, 1.458858, 0.0], - "twe": [1.001, 12.620531031063596, 173.3596688781867], + "twe": [338.4842162018424, 173.35966887818674, 173.3596688781867], # msm with independent distance "msm_ind": [84.36021099999999, 140.13788899999997, 262.6939920000001], # msm with dependent distance - "msm_dep": [33.068257441993, 71.14080843368329, 190.73978686253804], + "msm_dep": [33.06825, 71.1408, 190.7397], } diff --git a/aeon/testing/expected_results/expected_distance_results.py b/aeon/testing/expected_results/expected_distance_results.py index fea66c46a4..e2ba231979 100644 --- a/aeon/testing/expected_results/expected_distance_results.py +++ b/aeon/testing/expected_results/expected_distance_results.py @@ -35,11 +35,11 @@ 9.938223910720454, ], "dtw": [ - 5.0, - 8.559917930965497, - 44.3099600330504, - 9.594320378741982, - 29.148827543490025, + 25.0, + 10.840209465045632, + 184.1829978640948, + 11.01992239481733, + 98.76829449961573, ], "ddtw": [ 0.0, @@ -112,8 +112,8 @@ # Result structure: # [univariate series, multivariate series] "dtw": [ - [10.441974486203806, 44.3099600330504], - [10.8024655063829, 44.3099600330504], + [13.875064266550797, 184.18299786409477], + [14.250134348814871, 197.8788694266691], ], "ddtw": [ [10.735835993738842, 132.73682936639955], @@ -130,16 +130,16 @@ [0.3409987716261595, 2.7187979513671015], ], "lcss": [[0.30000000000000004, 1.0], [0.4, 1.0], [0.4, 1.0]], - "edr": [[0.3, 0.3], [0.1, 0.1], [0.5, 1.0]], + "edr": [[0.7, 1.0], [1.0, 1.0], [0.5, 1.0]], "twe": [ - [5.087449975445656, 15.161815735222117], - [1.1499446039354893, 5.995665808293953], + [20.507374214378885, 78.81976840746147], + [21.48930550350685, 82.55907793607852], [15.243281318807819, 77.81976840746147], [27.97089924329228, 83.97624505343292], ], "msm": [ - [4.080245996952201, 43.583053575960584], - [1.0, 15.829914369482566], + [12.099213975730216, 92.75733240032741], + [12.153142672084059, 102.90914530531768], [12.023580258367444, 88.80013932627139], [7.115130579734542, 61.80633627614831], ], @@ -150,8 +150,8 @@ ], "sbd": [[0.13378563362841267, 0.12052110294129567]], "erp": [ - [6.1963403666089425, 23.958805888780923], - [2.2271884807416047, 9.205416143392629], + [13.782010409379064, 44.3099600330504], + [13.782010409379064, 44.3099600330504], [12.782010409379064, 44.3099600330504], [15.460501609188993, 44.3099600330504], ], From 5df6685b610125752a44b385530ccd2afa577157 Mon Sep 17 00:00:00 2001 From: Divya Tiwari Date: Mon, 5 Aug 2024 12:22:20 +0530 Subject: [PATCH 4/4] private distances --- .../distance_based/_distances.py | 240 ++++++++++++++++++ 1 file changed, 240 insertions(+) create mode 100644 aeon/classification/distance_based/_distances.py diff --git a/aeon/classification/distance_based/_distances.py b/aeon/classification/distance_based/_distances.py new file mode 100644 index 0000000000..83f3de0cf7 --- /dev/null +++ b/aeon/classification/distance_based/_distances.py @@ -0,0 +1,240 @@ +"""Parameterised distances for Proximity Forest 2.0.""" + +from typing import Optional + +import numpy as np +from numba import njit + +from aeon.distances._bounding_matrix import create_bounding_matrix +from aeon.distances._minkowski import _univariate_minkowski_distance + + +@njit(cache=True, fastmath=True) +def _dtw_distance( + x: np.ndarray, + y: np.ndarray, + p: Optional[float] = 2.0, + window: Optional[float] = None, + itakura_max_slope: Optional[float] = None, +) -> float: + r"""Compute the DTW distance between two time series. + + DTW is the most widely researched and used elastic distance measure. It mitigates + distortions in the time axis by realligning (warping) the series to best match + each other. A good background into DTW can be found in [1]_. For two series, + possibly of unequal length, + :math:`\mathbf{x}=\{x_1,x_2,\ldots,x_n\}` and + :math:`\mathbf{y}=\{y_1,y_2, \ldots,y_m\}` DTW first calculates + :math:`M(\mathbf{x},\mathbf{y})`, the :math:`n \times m` + pointwise distance matrix between series :math:`\mathbf{x}` and :math:`\mathbf{y}`, + where :math:`M_{i,j}= (x_i-y_j)^p`. + + A warping path + + .. math:: + P = <(e_1, f_1), (e_2, f_2), \ldots, (e_s, f_s)> + + is a set of pairs of indices that define a traversal of matrix :math:`M`. A + valid warping path must start at location :math:`(1,1)` and end at point :math:`( + n,m)` and not backtrack, i.e. :math:`0 \leq e_{i+1}-e_{i} \leq 1` and :math:`0 + \leq f_{i+1}- f_i \leq 1` for all :math:`1< i < m`. + + The DTW distance between series is the path through :math:`M` that minimizes the + total distance. The distance for any path :math:`P` of length :math:`s` is + + .. math:: + D_P(\mathbf{x},\mathbf{y}, M) =\sum_{i=1}^s M_{e_i,f_i} + + If :math:`\mathcal{P}` is the space of all possible paths, the DTW path :math:`P^*` + is the path that has the minimum distance, hence the DTW distance between series is + + .. math:: + d_{dtw}(\mathbf{x}, \mathbf{x}) =D_{P*}(\mathbf{x},\mathbf{x}, M). + + The optimal warping path :math:`P^*` can be found exactly through a dynamic + programming formulation. This can be a time consuming operation, and it is common to + put a restriction on the amount of warping allowed. This is implemented through + the bounding_matrix structure, that supplies a mask for allowable warpings. + The most common bounding strategies include the Sakoe-Chiba band [2]_. The width + of the allowed warping is controlled through the ``window`` parameter + which sets the maximum proportion of warping allowed. + + Parameters + ---------- + x : np.ndarray + First time series, either univariate, shape ``(n_timepoints,)``, or + multivariate, shape ``(n_channels, n_timepoints)``. + y : np.ndarray + Second time series, either univariate, shape ``(n_timepoints,)``, or + multivariate, shape ``(n_channels, n_timepoints)``. + p : float, default=2.0 + The order of the norm of the difference + (default is 2.0, which represents the Euclidean distance). + window : float or None, default=None + The window to use for the bounding matrix. If None, no bounding matrix + is used. window is a percentage deviation, so if ``window = 0.1`` then + 10% of the series length is the max warping allowed. + is used. + itakura_max_slope : float, default=None + Maximum slope as a proportion of the number of time points used to create + Itakura parallelogram on the bounding matrix. Must be between 0. and 1. + + Returns + ------- + float + DTW distance between x and y, minimum value 0. + + Raises + ------ + ValueError + If x and y are not 1D or 2D arrays. + + References + ---------- + .. [1] Ratanamahatana C and Keogh E.: Three myths about dynamic time warping data + mining, Proceedings of 5th SIAM International Conference on Data Mining, 2005. + + .. [2] Sakoe H. and Chiba S.: Dynamic programming algorithm optimization for + spoken word recognition. IEEE Transactions on Acoustics, Speech, and Signal + Processing 26(1):43–49, 1978. + """ + if x.ndim == 1 and y.ndim == 1: + _x = x.reshape((1, x.shape[0])) + _y = y.reshape((1, y.shape[0])) + bounding_matrix = create_bounding_matrix( + _x.shape[1], _y.shape[1], window, itakura_max_slope + ) + return _dtw_cost_matrix(x, y, p, bounding_matrix)[ + x.shape[1] - 1, y.shape[1] - 1 + ] + if x.ndim == 2 and y.ndim == 2: + bounding_matrix = create_bounding_matrix( + x.shape[1], y.shape[1], window, itakura_max_slope + ) + return _dtw_cost_matrix(x, y, p, bounding_matrix)[ + x.shape[1] - 1, y.shape[1] - 1 + ] + raise ValueError("x and y must be 1D or 2D") + + +@njit(cache=True, fastmath=True) +def _dtw_cost_matrix( + x: np.ndarray, y: np.ndarray, p: float, bounding_matrix: np.ndarray +) -> np.ndarray: + x_size = x.shape[1] + y_size = y.shape[1] + cost_matrix = np.full((x_size + 1, y_size + 1), np.inf) + cost_matrix[0, 0] = 0.0 + _w = np.ones_like(x) + for i in range(x_size): + for j in range(y_size): + if bounding_matrix[i, j]: + cost_matrix[i + 1, j + 1] = _univariate_minkowski_distance( + x[:, i], y[:, j], p, _w[:, i] + ) + min( + cost_matrix[i, j + 1], + cost_matrix[i + 1, j], + cost_matrix[i, j], + ) + + return cost_matrix[1:, 1:] + + +@njit(cache=True, fastmath=True) +def _adtw_distance( + x: np.ndarray, + y: np.ndarray, + p: Optional[float] = 2.0, + window: Optional[float] = None, + itakura_max_slope: Optional[float] = None, + warp_penalty: float = 1.0, +) -> float: + r"""Compute the ADTW distance between two time series. + + Amercing Dynamic Time Warping (ADTW) [1]_ is a variant of DTW that uses a + explicit warping penalty to encourage or discourage warping. The warping + penalty is a constant value that is added to the cost of warping. A high + value will encourage the algorithm to warp less and if the value is low warping + is more likely. + + Parameters + ---------- + x : np.ndarray + First time series, either univariate, shape ``(n_timepoints,)``, or + multivariate, shape ``(n_channels, n_timepoints)``. + y : np.ndarray + Second time series, either univariate, shape ``(n_timepoints,)``, or + multivariate, shape ``(n_channels, n_timepoints)``. + p : float, default=2.0 + The order of the norm of the difference + (default is 2.0, which represents the Euclidean distance). + window : float or None, default=None + The window to use for the bounding matrix. If None, no bounding matrix + is used. window is a percentage deviation, so if ``window = 0.1`` then + 10% of the series length is the max warping allowed. + itakura_max_slope : float, default=None + Maximum slope as a proportion of the number of time points used to create + Itakura parallelogram on the bounding matrix. Must be between 0.0 and 1.0 + warp_penalty: float, default=1.0 + Penalty for warping. A high value will mean less warping. + + Returns + ------- + float + ADTW distance between x and y, minimum value 0. + + Raises + ------ + ValueError + If x and y are not 1D or 2D arrays. + + References + ---------- + .. [1] Matthieu Herrmann, Geoffrey I. Webb: Amercing: An intuitive and effective + constraint for dynamic time warping, Pattern Recognition, Volume 137, 2023. + """ + if x.ndim == 1 and y.ndim == 1: + _x = x.reshape((1, x.shape[0])) + _y = y.reshape((1, y.shape[0])) + bounding_matrix = create_bounding_matrix( + _x.shape[1], _y.shape[1], window, itakura_max_slope + ) + return _adtw_cost_matrix(x, y, p, bounding_matrix, warp_penalty)[ + x.shape[1] - 1, y.shape[1] - 1 + ] + if x.ndim == 2 and y.ndim == 2: + bounding_matrix = create_bounding_matrix( + x.shape[1], y.shape[1], window, itakura_max_slope + ) + return _adtw_cost_matrix(x, y, p, bounding_matrix, warp_penalty)[ + x.shape[1] - 1, y.shape[1] - 1 + ] + raise ValueError("x and y must be 1D or 2D") + + +@njit(cache=True, fastmath=True) +def _adtw_cost_matrix( + x: np.ndarray, + y: np.ndarray, + p: float, + bounding_matrix: np.ndarray, + warp_penalty: float, +) -> np.ndarray: + x_size = x.shape[1] + y_size = y.shape[1] + cost_matrix = np.full((x_size + 1, y_size + 1), np.inf) + cost_matrix[0, 0] = 0.0 + + _w = np.ones_like(x) + for i in range(x_size): + for j in range(y_size): + if bounding_matrix[i, j]: + cost_matrix[i + 1, j + 1] = _univariate_minkowski_distance( + x[:, i], y[:, j], p, _w[:, i] + ) + min( + cost_matrix[i, j + 1] + warp_penalty, + cost_matrix[i + 1, j] + warp_penalty, + cost_matrix[i, j], + ) + + return cost_matrix[1:, 1:]