Skip to content

Commit c49659f

Browse files
committed
Try using map instead of ntuple
1 parent 7554bd1 commit c49659f

File tree

2 files changed

+28
-30
lines changed

2 files changed

+28
-30
lines changed

src/prognostic_equations/implicit/auto_dense_jacobian.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ function update_column_matrices!(alg::AutoDenseJacobian, cache, Y, p, t)
152152
field = MatrixFields.get_field(Yₜ_dual, name)
153153
@inbounds point(field, level_index, column_index...)[]
154154
end
155-
ε_coefficients = ForwardDiff.partials(dual_number).values
155+
ε_coefficients = ForwardDiff.partials(dual_number)
156156
for (jacobian_column_ε_index, (jacobian_column_index, _)) in
157157
enumerate(jacobian_index_to_Y_index_map_partition)
158158
cartesian_index = (

src/prognostic_equations/implicit/auto_sparse_jacobian.jl

Lines changed: 27 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ function jacobian_cache(alg::AutoSparseJacobian, Y, atmos; verbose = true)
175175
# making memory_for_I_matrix negligible compared to max_memory.
176176
n_εs_max = (max_memory - memory_for_I_matrix) ÷ memory_per_ε - 1
177177
n_εs_max >= 1 || error(
178-
"Not enough free memory ($(cld(free_memory, 2^30)) GB) to allocate \
178+
"Not enough free memory ($(cld(max_memory, 2^30)) GB) to allocate \
179179
Jacobian ($(cld(memory_for_I_matrix, 2^30)) GB for the identity \
180180
matrix and another $(cld(memory_per_ε, 2^30)) GB per ε component)",
181181
)
@@ -237,11 +237,11 @@ function jacobian_cache(alg::AutoSparseJacobian, Y, atmos; verbose = true)
237237
column_index in column_indices,
238238
index_pair in DA(collect(Y_index_to_diagonal_color_map))
239239

240-
((scalar_index, level_index), diagonal_color) = index_pair
241-
color_offset = (partition - 1) * n_εs
240+
((scalar_index, level_index), diagonal_entry_color) = index_pair
241+
ε_offset = (partition - 1) * n_εs
242242
diagonal_ε_index =
243-
color_offset < diagonal_color <= color_offset + n_εs ?
244-
diagonal_color - color_offset : 0
243+
ε_offset < diagonal_entry_color <= ε_offset + n_εs ?
244+
diagonal_entry_color - ε_offset : 0
245245
n_εs_val = Val(ForwardDiff.npartials(eltype(I_matrix_partition_εs)))
246246
ε_coefficients = ntuple(==(diagonal_ε_index), n_εs_val)
247247
unrolled_applyat(scalar_index, scalar_names) do name
@@ -285,7 +285,7 @@ function jacobian_cache(alg::AutoSparseJacobian, Y, atmos; verbose = true)
285285
block_colors = last.(block_Y_index_to_color_map)
286286

287287
map(block_Yₜ_indices) do (scalar_index, level_index)
288-
band_colors = ntuple(colors_per_band_matrix_row) do band_index
288+
entry_colors = ntuple(colors_per_band_matrix_row) do band_index
289289
band = lower_band + band_index - 1
290290
level_index_min = band < 0 ? 1 - band : 1
291291
level_index_max =
@@ -296,7 +296,7 @@ function jacobian_cache(alg::AutoSparseJacobian, Y, atmos; verbose = true)
296296
level_index_min <= level_index <= level_index_max
297297
is_color_at_index ? block_colors[level_index + band] : 0
298298
end
299-
((block_index, level_index), (scalar_index, band_colors))
299+
((block_index, level_index), (scalar_index, entry_colors))
300300
end
301301
end
302302

@@ -335,43 +335,41 @@ function update_jacobian!(::AutoSparseJacobian, cache, Y, p, dtγ, t)
335335
set_implicit_precomputed_quantities!(Y_dual, p_dual, t)
336336
implicit_tendency!(Yₜ_dual, Y_dual, p_dual, t)
337337

338-
# Move the entries of ∂Yₜ/∂Y * I_matrix_partition from Yₜ_dual into
339-
# the blocks of autodiff_matrix. Drop the spatial data from every Field
338+
# Move the entries of ∂Yₜ/∂Y * I_matrix_partition from Yₜ_dual into the
339+
# blocks of autodiff_matrix. Drop spatial information from every Field
340340
# to ensure that this kernel stays below the GPU parameter memory limit.
341-
Yₜ_dual_no_spaces =
341+
Yₜ_dual_data =
342342
unrolled_map(Fields.field_values, Fields._values(Yₜ_dual))
343-
matrix_fields_no_spaces =
343+
matrix_fields_data =
344344
unrolled_map(Fields.field_values, values(autodiff_matrix))
345345
ClimaComms.@threaded device begin
346346
# On multithreaded devices, use one thread for each band matrix row.
347347
# TODO: Modify the map and use one thread for each dual number.
348348
for column_index in column_indices,
349349
index_pair in band_matrix_row_index_to_colors_map
350350

351-
((block_index, level_index), (scalar_index, band_colors)) =
351+
((block_index, level_index), (scalar_index, entry_colors)) =
352352
index_pair
353353
dual_number =
354354
unrolled_applyat(scalar_index, scalar_names) do name
355-
data = MatrixFields.get_field(Yₜ_dual_no_spaces, name)
355+
data = MatrixFields.get_field(Yₜ_dual_data, name)
356356
@inbounds point(data, level_index, column_index...)[]
357357
end
358-
ε_coefficients = ForwardDiff.partials(dual_number).values
358+
ε_coefficients = ForwardDiff.partials(dual_number)
359359
n_εs = length(ε_coefficients)
360-
color_offset = (partition - 1) * n_εs
361-
unrolled_applyat(block_index, matrix_fields_no_spaces) do data
362-
@inbounds band_entries =
363-
point(data.entries, level_index, column_index...)
364-
preexisting_band_entries = band_entries[]
365-
n_bands_val = Val(length(preexisting_band_entries))
366-
band_entries[] = ntuple(n_bands_val) do band_index
367-
# If this band's color is in the current partition, set
368-
# its entry to the ε coefficient for that color.
369-
# Otherwise, keep the entry already in the block's data.
370-
@inbounds band_color = band_colors[band_index]
371-
color_offset < band_color <= color_offset + n_εs ?
372-
(@inbounds ε_coefficients[band_color - color_offset]) :
373-
(@inbounds preexisting_band_entries[band_index])
374-
end
360+
ε_offset = (partition - 1) * n_εs
361+
unrolled_applyat(block_index, matrix_fields_data) do block_data
362+
@inbounds entries_data =
363+
point(block_data, level_index, column_index...).entries
364+
entries_data[] =
365+
map(entry_colors, entries_data[]) do entry_color, entry
366+
# If the entry has a color in the current partition,
367+
# set the entry to the ε coefficient for that color.
368+
# Otherwise, keep the value from the block's data.
369+
ε_offset < entry_color <= ε_offset + n_εs ?
370+
(@inbounds ε_coefficients[entry_color - ε_offset]) :
371+
entry
372+
end
375373
end
376374
end
377375
end

0 commit comments

Comments
 (0)