@@ -33,41 +33,6 @@ field_vars(::Type{FT}) where {FT} = (;
33
33
ᶠw = Geometry. Covariant3Vector (FT (0 )),
34
34
)
35
35
36
- # ####
37
- # #### Second order interpolation / derivatives
38
- # ####
39
-
40
- #= e.g., any 2nd order interpolation / derivative operator =#
41
- function op_2mul_1add! (x, y, D, U)
42
- y1 = @view y[1 : (end - 1 )]
43
- y2 = @view y[2 : end ]
44
- @inbounds for i in eachindex (x)
45
- x[i] = D[i] * y1[i] + U[i] * y2[i]
46
- end
47
- return nothing
48
- end
49
-
50
- #= e.g., div(grad(scalar)), div(interp(vec)) =#
51
- function op_3mul_2add! (x, y, L, D, U)
52
- y1 = @view y[1 : (end - 1 )]
53
- y2 = @view y[2 : (end - 1 )]
54
- y3 = @view y[2 : end ]
55
- @inbounds for i in eachindex (x)
56
- i== 1 && continue
57
- i== length (x) && continue
58
- x[i] = L[i] * y1[i] + D[i] * y2[i] + U[i] * y3[i]
59
- end
60
- return nothing
61
- end
62
-
63
- #= e.g., curlC2F =#
64
- function curl_like! (curluₕ, uₕ_x, uₕ_y, D, U)
65
- @inbounds for i in eachindex (curluₕ)
66
- curluₕ[i] = D[i] * uₕ_x[i] + U[i] * uₕ_y[i]
67
- end
68
- return nothing
69
- end
70
-
71
36
function set_value_bcs (c)
72
37
FT = Spaces. undertype (axes (c))
73
38
return (;bottom = Operators. SetValue (FT (0 )),
@@ -211,7 +176,8 @@ bc_name(bcs::Tuple) = (:none,)
211
176
bc_name_base (bcs: :@NamedTuple {}) = (:none ,)
212
177
bc_name (bcs: :@NamedTuple {}) = (:none ,)
213
178
214
- include (" benchmark_column_kernels.jl" )
179
+ include (" benchmark_stencils_array_kernels.jl" )
180
+ include (" benchmark_stencils_climacore_kernels.jl" )
215
181
216
182
uses_bycolumn (:: typeof (op_broadcast_example0!)) = true
217
183
uses_bycolumn (:: typeof (op_broadcast_example1!)) = true
@@ -268,54 +234,88 @@ function benchmark_func!(t_min, trials, fun, c, f, verbose = false)
268
234
end
269
235
end
270
236
271
- function benchmark_arrays (z_elems, :: Type{FT} ) where {FT}
272
- L = zeros (FT, z_elems)
273
- D = zeros (FT, z_elems)
274
- U = zeros (FT, z_elems)
275
- xarr = rand (FT, z_elems)
276
- uₕ_x = rand (FT, z_elems)
277
- uₕ_y = rand (FT, z_elems)
278
- yarr = rand (FT, z_elems + 1 )
279
-
280
- println (" \n ############################ 2-point stencil" )
281
- trial = BenchmarkTools. @benchmark op_2mul_1add! ($ xarr, $ yarr, $ D, $ U)
282
- show (stdout , MIME (" text/plain" ), trial)
283
- println ()
284
- println (" \n ############################ 3-point stencil" )
285
- trial = BenchmarkTools. @benchmark op_3mul_2add! ($ xarr, $ yarr, $ L, $ D, $ U)
286
- show (stdout , MIME (" text/plain" ), trial)
287
- println ()
288
- println (" \n ############################ curl-like stencil" )
289
- trial = BenchmarkTools. @benchmark curl_like! ($ xarr, $ uₕ_x, $ uₕ_y, $ D, $ U)
290
- show (stdout , MIME (" text/plain" ), trial)
291
- println ()
237
+ function column_benchmark_arrays (device, z_elems, :: Type{FT} ) where {FT}
238
+ ArrayType = ClimaComms. array_type (device)
239
+ L = ArrayType (zeros (FT, z_elems))
240
+ D = ArrayType (zeros (FT, z_elems))
241
+ U = ArrayType (zeros (FT, z_elems))
242
+ xarr = ArrayType (rand (FT, z_elems))
243
+ uₕ_x = ArrayType (rand (FT, z_elems))
244
+ uₕ_y = ArrayType (rand (FT, z_elems))
245
+ yarr = ArrayType (rand (FT, z_elems + 1 ))
246
+
247
+ if device isa ClimaComms. CUDADevice
248
+ println (" \n ############################ column 2-point stencil" )
249
+ trial = BenchmarkTools. @benchmark ClimaComms. @cuda_sync $ device column_op_2mul_1add_cuda! ($ xarr, $ yarr, $ D, $ U)
250
+ show (stdout , MIME (" text/plain" ), trial)
251
+ println ()
252
+ else
253
+ println (" \n ############################ column 2-point stencil" )
254
+ trial = BenchmarkTools. @benchmark column_op_2mul_1add! ($ xarr, $ yarr, $ D, $ U)
255
+ show (stdout , MIME (" text/plain" ), trial)
256
+ println ()
257
+ println (" \n ############################ column 3-point stencil" )
258
+ trial = BenchmarkTools. @benchmark column_op_3mul_2add! ($ xarr, $ yarr, $ L, $ D, $ U)
259
+ show (stdout , MIME (" text/plain" ), trial)
260
+ println ()
261
+ println (" \n ############################ column curl-like stencil" )
262
+ trial = BenchmarkTools. @benchmark column_curl_like! ($ xarr, $ uₕ_x, $ uₕ_y, $ D, $ U)
263
+ show (stdout , MIME (" text/plain" ), trial)
264
+ println ()
265
+ end
266
+ end
267
+
268
+ function sphere_benchmark_arrays (device, z_elems, helem, Nq, :: Type{FT} ) where {FT}
269
+ ArrayType = ClimaComms. array_type (device)
270
+ # VIJFH
271
+ Nh = helem * helem * 6
272
+ cdims = (z_elems , Nq, Nq, 1 , Nh)
273
+ fdims = (z_elems+ 1 , Nq, Nq, 1 , Nh)
274
+ L = ArrayType (zeros (FT, cdims... ))
275
+ D = ArrayType (zeros (FT, cdims... ))
276
+ U = ArrayType (zeros (FT, cdims... ))
277
+ xarr = ArrayType (rand (FT, cdims... ))
278
+ uₕ_x = ArrayType (rand (FT, cdims... ))
279
+ uₕ_y = ArrayType (rand (FT, cdims... ))
280
+ yarr = ArrayType (rand (FT, fdims... ))
281
+
282
+ if device isa ClimaComms. CUDADevice
283
+ println (" \n ############################ sphere 2-point stencil" )
284
+ trial = BenchmarkTools. @benchmark ClimaComms. @cuda_sync $ device sphere_op_2mul_1add_cuda! ($ xarr, $ yarr, $ D, $ U)
285
+ show (stdout , MIME (" text/plain" ), trial)
286
+ println ()
287
+ else
288
+ @info " Sphere CPU kernels have not been added yet."
289
+ end
292
290
end
293
291
294
292
function benchmark_operators (:: Type{FT} ; z_elems, helem, Nq) where {FT}
295
- @show ClimaComms. device ()
293
+ device = ClimaComms. device ()
294
+ @show device
296
295
trials = OrderedCollections. OrderedDict ()
297
296
t_min = OrderedCollections. OrderedDict ()
298
- benchmark_arrays (z_elems, FT)
297
+ column_benchmark_arrays (device, z_elems, FT)
298
+ sphere_benchmark_arrays (device, z_elems, helem, Nq, FT)
299
299
300
300
cspace = TU. ColumnCenterFiniteDifferenceSpace (FT; zelem= z_elems)
301
301
fspace = Spaces. FaceFiniteDifferenceSpace (cspace)
302
302
cfield = fill (field_vars (FT), cspace)
303
303
ffield = fill (field_vars (FT), fspace)
304
- benchmark_operators_base (trials, t_min, cfield, ffield)
304
+ benchmark_operators_base (trials, t_min, cfield, ffield, " column " )
305
305
306
306
cspace = TU. CenterExtrudedFiniteDifferenceSpace (FT; zelem= z_elems, helem, Nq)
307
307
fspace = Spaces. FaceExtrudedFiniteDifferenceSpace (cspace)
308
308
cfield = fill (field_vars (FT), cspace)
309
309
ffield = fill (field_vars (FT), fspace)
310
- benchmark_operators_base (trials, t_min, cfield, ffield)
310
+ benchmark_operators_base (trials, t_min, cfield, ffield, " sphere " )
311
311
312
312
# Tests are removed since they're flakey. And maintaining
313
313
# them before they're converged is a bit of work..
314
314
test_results (t_min)
315
315
return (; trials, t_min)
316
316
end
317
317
318
- function benchmark_operators_base (trials, t_min, cfield, ffield)
318
+ function benchmark_operators_base (trials, t_min, cfield, ffield, name )
319
319
ops = [
320
320
# ### Core discrete operators
321
321
op_GradientF2C!,
@@ -346,7 +346,7 @@ function benchmark_operators_base(trials, t_min, cfield, ffield)
346
346
op_divgrad_uₕ!,
347
347
]
348
348
349
- @info " Benchmarking operators, this may take a minute or two..."
349
+ @info " Benchmarking $name operators, this may take a minute or two..."
350
350
for op in ops
351
351
if uses_bycolumn (op) && axes (cfield) isa Spaces. FiniteDifferenceSpace
352
352
continue
0 commit comments