@@ -34,12 +34,12 @@ very large memory requirements at higher vertical resolutions.
34
34
35
35
When the number of values in each column is very large, computing the entire
36
36
dense matrix in a single evaluation of `implicit_tendency!` can be too expensive
37
- to compile and run. So, the dual number components are split into batches with a
38
- maximum size of `max_simultaneous_derivatives`, and we call `implicit_tendency!`
39
- once for each batch . That is, if the batch size is ``s``, then the first batch
40
- evaluates the coefficients of ``ε₁`` through ``εₛ``, the second evaluates the
41
- coefficients of ``εₛ₊₁`` through ``ε₂ₛ``, and so on until ``εₙ``. The default
42
- batch size is 32.
37
+ to compile and run. So, the dual number components are split into partitions
38
+ with a maximum size of `max_simultaneous_derivatives`, and we call
39
+ `implicit_tendency!` once for each partition . That is, if the partition size is
40
+ ``s``, then the first partition evaluates the coefficients of ``ε₁`` through
41
+ ``εₛ``, the second evaluates the coefficients of ``εₛ₊₁`` through ``ε₂ₛ``, and so
42
+ on until ``εₙ``. The default partition size is 32.
43
43
"""
44
44
struct AutoDenseJacobian{S} <: JacobianAlgorithm end
45
45
AutoDenseJacobian (max_simultaneous_derivatives = 32 ) =
@@ -53,11 +53,11 @@ function jacobian_cache(alg::AutoDenseJacobian, Y, atmos)
53
53
DA = ClimaComms. array_type (Y)
54
54
55
55
FT_dual = ForwardDiff. Dual{Jacobian, FT, max_simultaneous_derivatives (alg)}
56
- Y_dual = replace_parent_type (Y, FT_dual)
57
- Yₜ_dual = similar (Y_dual)
58
56
precomputed_dual =
59
57
replace_parent_type (implicit_precomputed_quantities (Y, atmos), FT_dual)
60
58
scratch_dual = replace_parent_type (temporary_quantities (Y, atmos), FT_dual)
59
+ Y_dual = replace_parent_type (Y, FT_dual)
60
+ Yₜ_dual = similar (Y_dual)
61
61
62
62
N = length (Fields. column (Y, 1 , 1 , 1 ))
63
63
n_columns = Fields. ncolumns (Y. c)
@@ -72,10 +72,10 @@ function jacobian_cache(alg::AutoDenseJacobian, Y, atmos)
72
72
I_matrix = reshape (I_column_matrix, N, N, 1 )
73
73
74
74
return (;
75
- Y_dual,
76
- Yₜ_dual,
77
75
precomputed_dual,
78
76
scratch_dual,
77
+ Y_dual,
78
+ Yₜ_dual,
79
79
column_matrices,
80
80
column_lu_factors,
81
81
column_lu_vectors,
@@ -85,7 +85,7 @@ function jacobian_cache(alg::AutoDenseJacobian, Y, atmos)
85
85
end
86
86
87
87
function update_column_matrices! (alg:: AutoDenseJacobian , cache, Y, p, t)
88
- (; Y_dual, Yₜ_dual, precomputed_dual, scratch_dual , column_matrices) = cache
88
+ (; precomputed_dual, scratch_dual, Y_dual, Yₜ_dual , column_matrices) = cache
89
89
device = ClimaComms. device (Y. c)
90
90
column_indices = column_index_iterator (Y)
91
91
scalar_names = scalar_field_names (Y)
@@ -99,48 +99,47 @@ function update_column_matrices!(alg::AutoDenseJacobian, cache, Y, p, t)
99
99
for jacobian_index_to_Y_index_map_partition in
100
100
ClimaComms. threadable (device, jacobian_index_to_Y_index_map_partitions)
101
101
102
- # Add a unique ε to Y for each Jacobian column index in this batch . With
102
+ # Add a unique ε to each value in Y that is part of this partition . With
103
103
# Y_col and Yᴰ_col denoting the columns of Y and Y_dual at column_index,
104
- # set Yᴰ_col to Y_col + I[:, jacobian_indices_in_partition ] * εs, where
105
- # I is the identity matrix for Y_col (i.e., the value of ∂Y_col/∂Y_col),
106
- # εs is a vector of max_simultaneous_derivatives(alg) dual number
107
- # components, and jacobian_indices_in_partition is equal to
104
+ # set Yᴰ_col to Y_col + I[:, jacobian_column_indices ] * εs, where I is
105
+ # the identity matrix for Y_col (i.e., the value of ∂Y_col/∂Y_col), εs
106
+ # is a vector of max_simultaneous_derivatives(alg) dual number
107
+ # components, and jacobian_column_indices is equal to
108
108
# first.(jacobian_index_to_Y_index_map_partition).
109
109
Y_dual .= Y
110
110
ClimaComms. @threaded device begin
111
111
# On multithreaded devices, assign one thread to each combination of
112
- # spatial column index and Jacobian index in this batch .
112
+ # spatial column index and Jacobian index in this partition .
113
113
for column_index in column_indices,
114
- (ε_index , (_, (scalar_index, level_index))) in
114
+ (diagonal_ε_index , (_, (scalar_index, level_index))) in
115
115
enumerate (jacobian_index_to_Y_index_map_partition)
116
116
117
- Y_partials =
118
- ntuple (== (ε_index), Val (max_simultaneous_derivatives (alg)))
119
- Y_dual_εs_value = ForwardDiff. Dual {Jacobian} (0 , Y_partials)
117
+ n_εs_val = Val (max_simultaneous_derivatives (alg))
118
+ Y_dual_ε_coefficients = ntuple (== (diagonal_ε_index), n_εs_val)
120
119
unrolled_applyat (scalar_index, scalar_names) do name
121
120
field = MatrixFields. get_field (Y_dual, name)
122
121
@inbounds point (field, level_index, column_index... )[] +=
123
- Y_dual_εs_value
122
+ ForwardDiff . Dual {Jacobian} ( 0 , Y_dual_ε_coefficients)
124
123
end
125
124
end
126
125
end
127
126
128
- # Compute this batch's portions of ∂p/∂Y and ∂Yₜ/∂Y.
127
+ # Compute this partition of ∂p/∂Y and ∂Yₜ/∂Y.
129
128
set_implicit_precomputed_quantities! (Y_dual, p_dual, t)
130
129
implicit_tendency! (Yₜ_dual, Y_dual, p_dual, t)
131
130
132
- # Copy this batch's portion of ∂Yₜ/∂Y into column_matrices. With Yₜ_col
133
- # and Yₜᴰ_col denoting the columns of Yₜ and Yₜ_dual at column_index, and
131
+ # Copy this partition of ∂Yₜ/∂Y into column_matrices. With Yₜ_col and
132
+ # Yₜᴰ_col denoting the columns of Yₜ and Yₜ_dual at column_index, and
134
133
# with col_matrix denoting the matrix at the corresponding matrix_index
135
134
# in column_matrices, copy the coefficients of the εs in Yₜᴰ_col into
136
135
# col_matrix, where the previous steps have set Yₜᴰ_col to
137
- # Yₜ_col + (∂Yₜ_col/∂Y_col)[:, jacobian_indices_in_batch ] * εs. In other
138
- # words, set col_matrix[jacobian_row_index, jacobian_column_index] to
139
- # ∂Yₜ_col[jacobian_row_index]/∂Y_col[jacobian_column_index], obtaining
140
- # this derivative from the coefficient of εs[ε_index] in
141
- # Yₜᴰ_col[jacobian_row_index], where ε_index is the index of
142
- # jacobian_column_index in jacobian_indices_in_batch. After all batches
143
- # are processed, col_matrix contains the full Jacobian ∂Yₜ_col/∂Y_col .
136
+ # Yₜ_col + (∂Yₜ_col/∂Y_col)[:, jacobian_column_indices ] * εs. In
137
+ # other words, set col_matrix[jacobian_row_index, jacobian_column_index]
138
+ # to ∂Yₜ_col[jacobian_row_index]/∂Y_col[jacobian_column_index],
139
+ # obtaining this derivative from the coefficient of
140
+ # εs[jacobian_column_ε_index] in Yₜᴰ_col[jacobian_row_index], where
141
+ # jacobian_column_ε_index is the index of jacobian_column_index in
142
+ # jacobian_column_indices .
144
143
ClimaComms. @threaded device begin
145
144
# On multithreaded devices, assign one thread to each combination of
146
145
# spatial column index and scalar level index.
@@ -153,16 +152,16 @@ function update_column_matrices!(alg::AutoDenseJacobian, cache, Y, p, t)
153
152
field = MatrixFields. get_field (Yₜ_dual, name)
154
153
@inbounds point (field, level_index, column_index... )[]
155
154
end
156
- Yₜ_partials = ForwardDiff. partials (Yₜ_dual_value)
157
- for (ε_index , (jacobian_column_index, _)) in
155
+ Yₜ_dual_ε_coefficients = ForwardDiff. partials (Yₜ_dual_value)
156
+ for (jacobian_column_ε_index , (jacobian_column_index, _)) in
158
157
enumerate (jacobian_index_to_Y_index_map_partition)
159
158
cartesian_index = (
160
159
jacobian_row_index,
161
160
jacobian_column_index,
162
161
matrix_index,
163
162
)
164
163
@inbounds column_matrices[cartesian_index... ] =
165
- Yₜ_partials[ε_index ]
164
+ Yₜ_dual_ε_coefficients[jacobian_column_ε_index ]
166
165
end
167
166
end
168
167
end
0 commit comments