@@ -175,7 +175,7 @@ function jacobian_cache(alg::AutoSparseJacobian, Y, atmos; verbose = true)
175
175
# making memory_for_I_matrix negligible compared to max_memory.
176
176
n_εs_max = (max_memory - memory_for_I_matrix) ÷ memory_per_ε - 1
177
177
n_εs_max >= 1 || error (
178
- " Not enough free memory ($(cld (free_memory , 2 ^ 30 )) GB) to allocate \
178
+ " Not enough free memory ($(cld (max_memory , 2 ^ 30 )) GB) to allocate \
179
179
Jacobian ($(cld (memory_for_I_matrix, 2 ^ 30 )) GB for the identity \
180
180
matrix and another $(cld (memory_per_ε, 2 ^ 30 )) GB per ε component)" ,
181
181
)
@@ -237,11 +237,11 @@ function jacobian_cache(alg::AutoSparseJacobian, Y, atmos; verbose = true)
237
237
column_index in column_indices,
238
238
index_pair in DA (collect (Y_index_to_diagonal_color_map))
239
239
240
- ((scalar_index, level_index), diagonal_color ) = index_pair
241
- color_offset = (partition - 1 ) * n_εs
240
+ ((scalar_index, level_index), diagonal_entry_color ) = index_pair
241
+ ε_offset = (partition - 1 ) * n_εs
242
242
diagonal_ε_index =
243
- color_offset < diagonal_color <= color_offset + n_εs ?
244
- diagonal_color - color_offset : 0
243
+ ε_offset < diagonal_entry_color <= ε_offset + n_εs ?
244
+ diagonal_entry_color - ε_offset : 0
245
245
n_εs_val = Val (ForwardDiff. npartials (eltype (I_matrix_partition_εs)))
246
246
ε_coefficients = ntuple (== (diagonal_ε_index), n_εs_val)
247
247
unrolled_applyat (scalar_index, scalar_names) do name
@@ -285,7 +285,7 @@ function jacobian_cache(alg::AutoSparseJacobian, Y, atmos; verbose = true)
285
285
block_colors = last .(block_Y_index_to_color_map)
286
286
287
287
map (block_Yₜ_indices) do (scalar_index, level_index)
288
- band_colors = ntuple (colors_per_band_matrix_row) do band_index
288
+ entry_colors = ntuple (colors_per_band_matrix_row) do band_index
289
289
band = lower_band + band_index - 1
290
290
level_index_min = band < 0 ? 1 - band : 1
291
291
level_index_max =
@@ -296,7 +296,7 @@ function jacobian_cache(alg::AutoSparseJacobian, Y, atmos; verbose = true)
296
296
level_index_min <= level_index <= level_index_max
297
297
is_color_at_index ? block_colors[level_index + band] : 0
298
298
end
299
- ((block_index, level_index), (scalar_index, band_colors ))
299
+ ((block_index, level_index), (scalar_index, entry_colors ))
300
300
end
301
301
end
302
302
@@ -335,43 +335,41 @@ function update_jacobian!(::AutoSparseJacobian, cache, Y, p, dtγ, t)
335
335
set_implicit_precomputed_quantities! (Y_dual, p_dual, t)
336
336
implicit_tendency! (Yₜ_dual, Y_dual, p_dual, t)
337
337
338
- # Move the entries of ∂Yₜ/∂Y * I_matrix_partition from Yₜ_dual into
339
- # the blocks of autodiff_matrix. Drop the spatial data from every Field
338
+ # Move the entries of ∂Yₜ/∂Y * I_matrix_partition from Yₜ_dual into the
339
+ # blocks of autodiff_matrix. Drop spatial information from every Field
340
340
# to ensure that this kernel stays below the GPU parameter memory limit.
341
- Yₜ_dual_no_spaces =
341
+ Yₜ_dual_data =
342
342
unrolled_map (Fields. field_values, Fields. _values (Yₜ_dual))
343
- matrix_fields_no_spaces =
343
+ matrix_fields_data =
344
344
unrolled_map (Fields. field_values, values (autodiff_matrix))
345
345
ClimaComms. @threaded device begin
346
346
# On multithreaded devices, use one thread for each band matrix row.
347
347
# TODO : Modify the map and use one thread for each dual number.
348
348
for column_index in column_indices,
349
349
index_pair in band_matrix_row_index_to_colors_map
350
350
351
- ((block_index, level_index), (scalar_index, band_colors )) =
351
+ ((block_index, level_index), (scalar_index, entry_colors )) =
352
352
index_pair
353
353
dual_number =
354
354
unrolled_applyat (scalar_index, scalar_names) do name
355
- data = MatrixFields. get_field (Yₜ_dual_no_spaces , name)
355
+ data = MatrixFields. get_field (Yₜ_dual_data , name)
356
356
@inbounds point (data, level_index, column_index... )[]
357
357
end
358
- ε_coefficients = ForwardDiff. partials (dual_number). values
358
+ ε_coefficients = ForwardDiff. partials (dual_number)
359
359
n_εs = length (ε_coefficients)
360
- color_offset = (partition - 1 ) * n_εs
361
- unrolled_applyat (block_index, matrix_fields_no_spaces) do data
362
- @inbounds band_entries =
363
- point (data. entries, level_index, column_index... )
364
- preexisting_band_entries = band_entries[]
365
- n_bands_val = Val (length (preexisting_band_entries))
366
- band_entries[] = ntuple (n_bands_val) do band_index
367
- # If this band's color is in the current partition, set
368
- # its entry to the ε coefficient for that color.
369
- # Otherwise, keep the entry already in the block's data.
370
- @inbounds band_color = band_colors[band_index]
371
- color_offset < band_color <= color_offset + n_εs ?
372
- (@inbounds ε_coefficients[band_color - color_offset]) :
373
- (@inbounds preexisting_band_entries[band_index])
374
- end
360
+ ε_offset = (partition - 1 ) * n_εs
361
+ unrolled_applyat (block_index, matrix_fields_data) do block_data
362
+ @inbounds entries_data =
363
+ point (block_data, level_index, column_index... ). entries
364
+ entries_data[] =
365
+ map (entry_colors, entries_data[]) do entry_color, entry
366
+ # If the entry has a color in the current partition,
367
+ # set the entry to the ε coefficient for that color.
368
+ # Otherwise, keep the value from the block's data.
369
+ ε_offset < entry_color <= ε_offset + n_εs ?
370
+ (@inbounds ε_coefficients[entry_color - ε_offset]) :
371
+ entry
372
+ end
375
373
end
376
374
end
377
375
end
0 commit comments