Skip to content

Commit 699cb62

Browse files
Update thermo benchmark script
1 parent ee2b83e commit 699cb62

File tree

1 file changed

+38
-12
lines changed

1 file changed

+38
-12
lines changed

benchmarks/scripts/thermo_bench_bw.jl

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,21 @@ Problem size: (63, 4, 4, 1, 5400), float_type = Float32, device_bandwidth_GBs=20
1818
┌────────────────────────────────────────────────────┬───────────────────────────────────┬─────────┬─────────────┬────────────────┬────────┐
1919
│ funcs │ time per call │ bw % │ achieved bw │ n-reads/writes │ n-reps │
2020
├────────────────────────────────────────────────────┼───────────────────────────────────┼─────────┼─────────────┼────────────────┼────────┤
21-
│ TBB.thermo_func_bc!(x, us; nreps=100, bm) │ 797 microseconds, 92 nanoseconds │ 12.4764254.39410 │ 100 │
22-
│ TBB.thermo_func_sol!(x_vec, us; nreps=100, bm) │ 131 microseconds, 851 nanoseconds │ 75.42521537.9210 │ 100 │
23-
│ TBB.thermo_func_bc!(x, us; nreps=100, bm) │ 797 microseconds, 164 nanoseconds │ 12.4753 │ 254.371 │ 10 │ 100 │
24-
│ TBB.thermo_func_sol!(x_vec, us; nreps=100, bm) │ 131 microseconds, 943 nanoseconds │ 75.3725 │ 1536.84 │ 10 │ 100 │
21+
│ TBB.singlefield_bc!(x_soa, us; nreps=100, bm) │ 67 microseconds, 554 nanoseconds │ 29.4429600.3412 │ 100 │
22+
│ TBB.singlefield_bc!(x_aos, us; nreps=100, bm) │ 69 microseconds, 653 nanoseconds │ 28.5556582.2482 │ 100 │
23+
│ TBB.thermo_func_bc!(x, us; nreps=100, bm) │ 796 microseconds, 877 nanoseconds │ 12.4798 │ 254.462 │ 10 │ 100 │
24+
│ TBB.thermo_func_sol!(x_vec, us; nreps=100, bm) │ 131 microseconds, 72 nanoseconds │ 75.873 │ 1547.05 │ 10 │ 100 │
2525
└────────────────────────────────────────────────────┴───────────────────────────────────┴─────────┴─────────────┴────────────────┴────────┘
2626
2727
[ Info: device = ClimaComms.CUDADevice()
2828
Problem size: (63, 4, 4, 1, 5400), float_type = Float64, device_bandwidth_GBs=2039
2929
┌────────────────────────────────────────────────────┬───────────────────────────────────┬─────────┬─────────────┬────────────────┬────────┐
3030
│ funcs │ time per call │ bw % │ achieved bw │ n-reads/writes │ n-reps │
3131
├────────────────────────────────────────────────────┼───────────────────────────────────┼─────────┼─────────────┼────────────────┼────────┤
32-
│ TBB.thermo_func_bc!(x, us; nreps=100, bm) │ 1 millisecond, 45 microseconds │ 19.0163387.74310 │ 100 │
33-
│ TBB.thermo_func_sol!(x_vec, us; nreps=100, bm) │ 258 microseconds, 120 nanoseconds │ 77.05591571.1710 │ 100 │
34-
│ TBB.thermo_func_bc!(x, us; nreps=100, bm) │ 1 millisecond, 46 microseconds │ 19.0147387.709 │ 10 │ 100 │
35-
│ TBB.thermo_func_sol!(x_vec, us; nreps=100, bm) │ 257 microseconds, 915 nanoseconds │ 77.1171 │ 1572.42 │ 10 │ 100 │
32+
│ TBB.singlefield_bc!(x_soa, us; nreps=100, bm) │ 108 microseconds, 790 nanoseconds │ 36.5653745.5672 │ 100 │
33+
│ TBB.singlefield_bc!(x_aos, us; nreps=100, bm) │ 123 microseconds, 730 nanoseconds │ 32.1501655.5412 │ 100 │
34+
│ TBB.thermo_func_bc!(x, us; nreps=100, bm) │ 1 millisecond, 43 microseconds │ 19.0568388.569 │ 10 │ 100 │
35+
│ TBB.thermo_func_sol!(x_vec, us; nreps=100, bm) │ 256 microseconds, 717 nanoseconds │ 77.477 │ 1579.76 │ 10 │ 100 │
3636
└────────────────────────────────────────────────────┴───────────────────────────────────┴─────────┴─────────────┴────────────────┴────────┘
3737
```
3838
=#
@@ -63,6 +63,21 @@ end
6363
@inline Base.zero(::Type{PhaseEquil{FT}}) where {FT} =
6464
PhaseEquil{FT}(0, 0, 0, 0, 0)
6565

66+
function singlefield_bc!(x, us; nreps = 1, bm=nothing, n_trials = 30)
67+
e = Inf
68+
for t in 1:n_trials
69+
et = CUDA.@elapsed begin
70+
for _ in 1:nreps
71+
(; ρ_read, ρ_write) = x
72+
@. ρ_write = ρ_read
73+
end
74+
end
75+
e = min(e, et)
76+
end
77+
push_info(bm; e, nreps, caller = @caller_name(@__FILE__),n_reads_writes=2)
78+
return nothing
79+
end
80+
6681
function thermo_func_bc!(x, us; nreps = 1, bm=nothing, n_trials = 30)
6782
e = Inf
6883
for t in 1:n_trials
@@ -173,16 +188,27 @@ using Test
173188
end
174189
x_vec = to_vec(xv)
175190

191+
x_aos = fill((; ρ_read = FT(0), ρ_write = FT(0)), cspace)
192+
x_soa = (;
193+
ρ_read = Fields.Field(FT, cspace),
194+
ρ_write = Fields.Field(FT, cspace),
195+
)
196+
@. x_soa.ρ_read = 6
197+
@. x_soa.ρ_write = 7
198+
@. x_aos.ρ_read = 6
199+
@. x_aos.ρ_write = 7
200+
TBB.singlefield_bc!(x_soa, us; nreps=1, n_trials = 1)
201+
TBB.singlefield_bc!(x_aos, us; nreps=1, n_trials = 1)
202+
176203
TBB.thermo_func_bc!(x, us; nreps=1, n_trials = 1)
177204
TBB.thermo_func_sol!(x_vec, us; nreps=1, n_trials = 1)
178205

179206
rc = Fields.rcompare(x_vec, to_vec(x))
180-
rc || Fields.rprint_diff(x_vec, to_vec(x)) # test correctness (should print nothing)
207+
rc || Fields.@rprint_diff(x_vec, to_vec(x)) # test correctness (should print nothing)
181208
@test rc # test correctness
182209

183-
TBB.thermo_func_bc!(x, us; nreps=100, bm)
184-
TBB.thermo_func_sol!(x_vec, us; nreps=100, bm)
185-
210+
TBB.singlefield_bc!(x_soa, us; nreps=100, bm)
211+
TBB.singlefield_bc!(x_aos, us; nreps=100, bm)
186212
TBB.thermo_func_bc!(x, us; nreps=100, bm)
187213
TBB.thermo_func_sol!(x_vec, us; nreps=100, bm)
188214

0 commit comments

Comments
 (0)