Skip to content

Commit 0f85e49

Browse files
Break down stencil benchmarks
1 parent aed16b0 commit 0f85e49

File tree

2 files changed

+76
-23
lines changed

2 files changed

+76
-23
lines changed

test/Operators/finitedifference/benchmark_stencils.jl

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,14 @@ using Revise; include(joinpath("test", "Operators", "finitedifference", "benchma
44
=#
55
include("benchmark_stencils_utils.jl")
66

7+
#! format: off
78
@testset "Benchmark operators" begin
8-
benchmark_operators(Float64; z_elems = 63, helem = 30, Nq = 4)
9+
# benchmark_operators_column(Float64; z_elems = 63, helem = 30, Nq = 4, compile = true)
10+
benchmark_operators_column(Float64; z_elems = 63, helem = 30, Nq = 4)
11+
12+
# benchmark_operators_sphere(Float64; z_elems = 63, helem = 30, Nq = 4, compile = true)
13+
benchmark_operators_sphere(Float64; z_elems = 63, helem = 30, Nq = 4)
914
end
15+
#! format: on
1016

1117
nothing

test/Operators/finitedifference/benchmark_stencils_utils.jl

Lines changed: 69 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -219,22 +219,28 @@ bcs_tested(c, ::typeof(op_divgrad_uₕ!)) =
219219
(; inner = (;), outer = set_value_divgrad_uₕ_maybe_field_bcs(c)),
220220
)
221221

222-
function benchmark_func!(t_min, trials, fun, c, f, verbose = false)
222+
function benchmark_func!(t_min, trials, fun, c, f, verbose = false; compile::Bool)
223223
device = ClimaComms.device(c)
224224
for bcs in bcs_tested(c, fun)
225225
h_space = nameof(typeof(axes(c)))
226226
key = (h_space, fun, bc_name(bcs)...)
227-
verbose && @info "\n@benchmarking $key"
228-
trials[key] = BenchmarkTools.@benchmark ClimaComms.@cuda_sync $device $fun($c, $f, $bcs)
229-
verbose && show(stdout, MIME("text/plain"), trials[key])
227+
if compile
228+
fun(c, f, bcs)
229+
else
230+
verbose && @info "\n@benchmarking $key"
231+
trials[key] = BenchmarkTools.@benchmark ClimaComms.@cuda_sync $device $fun($c, $f, $bcs)
232+
end
233+
if haskey(trials, key)
234+
verbose && show(stdout, MIME("text/plain"), trials[key])
230235

231-
t_min[key] = minimum(trials[key].times) # nano seconds
232-
t_pretty = BenchmarkTools.prettytime(t_min[key])
233-
verbose || @info "$t_pretty <=> t_min[$key]"
236+
t_min[key] = minimum(trials[key].times) # nano seconds
237+
t_pretty = BenchmarkTools.prettytime(t_min[key])
238+
verbose || @info "$t_pretty <=> t_min[$key]"
239+
end
234240
end
235241
end
236242

237-
function column_benchmark_arrays(device, z_elems, ::Type{FT}) where {FT}
243+
function column_benchmark_arrays(device, z_elems, ::Type{FT}; compile::Bool) where {FT}
238244
ArrayType = ClimaComms.array_type(device)
239245
L = ArrayType(zeros(FT, z_elems))
240246
D = ArrayType(zeros(FT, z_elems))
@@ -243,6 +249,16 @@ function column_benchmark_arrays(device, z_elems, ::Type{FT}) where {FT}
243249
uₕ_x = ArrayType(rand(FT, z_elems))
244250
uₕ_y = ArrayType(rand(FT, z_elems))
245251
yarr = ArrayType(rand(FT, z_elems + 1))
252+
if compile
253+
if device isa ClimaComms.CUDADevice
254+
column_op_2mul_1add_cuda!(xarr, yarr, D, U)
255+
else
256+
column_op_2mul_1add!(xarr, yarr, D, U)
257+
column_op_3mul_2add!(xarr, yarr, L, D, U)
258+
column_curl_like!(xarr, uₕ_x, uₕ_y, D, U)
259+
end
260+
return nothing
261+
end
246262

247263
if device isa ClimaComms.CUDADevice
248264
println("\n############################ column 2-point stencil")
@@ -265,7 +281,7 @@ function column_benchmark_arrays(device, z_elems, ::Type{FT}) where {FT}
265281
end
266282
end
267283

268-
function sphere_benchmark_arrays(device, z_elems, helem, Nq, ::Type{FT}) where {FT}
284+
function sphere_benchmark_arrays(device, z_elems, helem, Nq, ::Type{FT}; compile::Bool) where {FT}
269285
ArrayType = ClimaComms.array_type(device)
270286
# VIJFH
271287
Nh = helem * helem * 6
@@ -280,42 +296,58 @@ function sphere_benchmark_arrays(device, z_elems, helem, Nq, ::Type{FT}) where {
280296
yarr = ArrayType(rand(FT, fdims...))
281297

282298
if device isa ClimaComms.CUDADevice
283-
println("\n############################ sphere 2-point stencil")
284-
trial = BenchmarkTools.@benchmark ClimaComms.@cuda_sync $device sphere_op_2mul_1add_cuda!($xarr, $yarr, $D, $U)
285-
show(stdout, MIME("text/plain"), trial)
286-
println()
299+
if compile
300+
sphere_op_2mul_1add_cuda!(xarr, yarr, D, U)
301+
else
302+
println("\n############################ sphere 2-point stencil")
303+
trial = BenchmarkTools.@benchmark ClimaComms.@cuda_sync $device sphere_op_2mul_1add_cuda!($xarr, $yarr, $D, $U)
304+
show(stdout, MIME("text/plain"), trial)
305+
println()
306+
end
287307
else
288308
@info "Sphere CPU kernels have not been added yet."
289309
end
290310
end
291311

292-
function benchmark_operators(::Type{FT}; z_elems, helem, Nq) where {FT}
312+
function benchmark_operators_column(::Type{FT}; z_elems, helem, Nq, compile::Bool = false) where {FT}
293313
device = ClimaComms.device()
294314
@show device
295315
trials = OrderedCollections.OrderedDict()
296316
t_min = OrderedCollections.OrderedDict()
297-
column_benchmark_arrays(device, z_elems, FT)
298-
sphere_benchmark_arrays(device, z_elems, helem, Nq, FT)
317+
column_benchmark_arrays(device, z_elems, FT; compile)
299318

300319
cspace = TU.ColumnCenterFiniteDifferenceSpace(FT; zelem=z_elems)
301320
fspace = Spaces.FaceFiniteDifferenceSpace(cspace)
302321
cfield = fill(field_vars(FT), cspace)
303322
ffield = fill(field_vars(FT), fspace)
304-
benchmark_operators_base(trials, t_min, cfield, ffield, "column")
323+
benchmark_operators_base(trials, t_min, cfield, ffield, "column"; compile)
324+
325+
# Tests are removed since they're flakey. And maintaining
326+
# them before they're converged is a bit of work..
327+
compile || test_results_column(t_min)
328+
return (; trials, t_min)
329+
end
330+
331+
function benchmark_operators_sphere(::Type{FT}; z_elems, helem, Nq, compile::Bool = false) where {FT}
332+
device = ClimaComms.device()
333+
@show device
334+
trials = OrderedCollections.OrderedDict()
335+
t_min = OrderedCollections.OrderedDict()
336+
sphere_benchmark_arrays(device, z_elems, helem, Nq, FT; compile)
305337

306338
cspace = TU.CenterExtrudedFiniteDifferenceSpace(FT; zelem=z_elems, helem, Nq)
307339
fspace = Spaces.FaceExtrudedFiniteDifferenceSpace(cspace)
308340
cfield = fill(field_vars(FT), cspace)
309341
ffield = fill(field_vars(FT), fspace)
310-
benchmark_operators_base(trials, t_min, cfield, ffield, "sphere")
342+
benchmark_operators_base(trials, t_min, cfield, ffield, "sphere"; compile)
311343

312344
# Tests are removed since they're flakey. And maintaining
313345
# them before they're converged is a bit of work..
314-
test_results(t_min)
346+
compile || test_results_sphere(t_min)
315347
return (; trials, t_min)
316348
end
317349

318-
function benchmark_operators_base(trials, t_min, cfield, ffield, name)
350+
function benchmark_operators_base(trials, t_min, cfield, ffield, name; compile::Bool)
319351
ops = [
320352
#### Core discrete operators
321353
op_GradientF2C!,
@@ -351,13 +383,13 @@ function benchmark_operators_base(trials, t_min, cfield, ffield, name)
351383
if uses_bycolumn(op) && axes(cfield) isa Spaces.FiniteDifferenceSpace
352384
continue
353385
end
354-
benchmark_func!(t_min, trials, op, cfield, ffield, #= verbose = =# false)
386+
benchmark_func!(t_min, trials, op, cfield, ffield, #= verbose = =# false; compile)
355387
end
356388

357389
return nothing
358390
end
359391

360-
function test_results(t_min)
392+
function test_results_column(t_min)
361393
# If these tests fail, just update the numbers (or the
362394
# buffer) so long its not an egregious regression.
363395
buffer = 2
@@ -393,7 +425,22 @@ function test_results(t_min)
393425
[(:FiniteDifferenceSpace, op_div_interp_FF!, :none, :SetValue, :SetValue), 686.581*ns*buffer],
394426
[(:FiniteDifferenceSpace, op_divgrad_uₕ!, :none, :SetValue, :Extrapolate), 4.960*μs*buffer],
395427
[(:FiniteDifferenceSpace, op_divgrad_uₕ!, :none, :SetValue, :SetValue), 5.047*μs*buffer],
428+
]
429+
for (params, ref_time) in results
430+
if !(t_min[params] ref_time)
431+
@warn "Possible regression: $params, time=$(t_min[params]), ref_time=$ref_time"
432+
end
433+
end
434+
end
396435

436+
function test_results_sphere(t_min)
437+
# If these tests fail, just update the numbers (or the
438+
# buffer) so long its not an egregious regression.
439+
buffer = 2
440+
ns = 1
441+
μs = 10^3
442+
ms = 10^6
443+
results = [
397444
[(:ExtrudedFiniteDifferenceSpace, op_GradientF2C!, :none), 1.746*ms*buffer],
398445
[(:ExtrudedFiniteDifferenceSpace, op_GradientF2C!, :SetValue, :SetValue), 1.754*ms*buffer],
399446
[(:ExtrudedFiniteDifferenceSpace, op_GradientC2F!, :SetGradient, :SetGradient), 1.899*ms*buffer],

0 commit comments

Comments
 (0)