@@ -18,21 +18,21 @@ Problem size: (63, 4, 4, 1, 5400), float_type = Float32, device_bandwidth_GBs=20
18
18
┌────────────────────────────────────────────────────┬───────────────────────────────────┬─────────┬─────────────┬────────────────┬────────┐
19
19
│ funcs │ time per call │ bw % │ achieved bw │ n-reads/writes │ n-reps │
20
20
├────────────────────────────────────────────────────┼───────────────────────────────────┼─────────┼─────────────┼────────────────┼────────┤
21
- │ TBB.thermo_func_bc!(x , us; nreps=100, bm) │ 797 microseconds, 92 nanoseconds │ 12.4764 │ 254.394 │ 10 │ 100 │
22
- │ TBB.thermo_func_sol!(x_vec , us; nreps=100, bm) │ 131 microseconds, 851 nanoseconds │ 75.4252 │ 1537.92 │ 10 │ 100 │
23
- │ TBB.thermo_func_bc!(x, us; nreps=100, bm) │ 797 microseconds, 164 nanoseconds │ 12.4753 │ 254.371 │ 10 │ 100 │
24
- │ TBB.thermo_func_sol!(x_vec, us; nreps=100, bm) │ 131 microseconds, 943 nanoseconds │ 75.3725 │ 1536.84 │ 10 │ 100 │
21
+ │ TBB.singlefield_bc!(x_soa , us; nreps=100, bm) │ 67 microseconds, 554 nanoseconds │ 29.4429 │ 600.341 │ 2 │ 100 │
22
+ │ TBB.singlefield_bc!(x_aos , us; nreps=100, bm) │ 69 microseconds, 653 nanoseconds │ 28.5556 │ 582.248 │ 2 │ 100 │
23
+ │ TBB.thermo_func_bc!(x, us; nreps=100, bm) │ 796 microseconds, 877 nanoseconds │ 12.4798 │ 254.462 │ 10 │ 100 │
24
+ │ TBB.thermo_func_sol!(x_vec, us; nreps=100, bm) │ 131 microseconds, 72 nanoseconds │ 75.873 │ 1547.05 │ 10 │ 100 │
25
25
└────────────────────────────────────────────────────┴───────────────────────────────────┴─────────┴─────────────┴────────────────┴────────┘
26
26
27
27
[ Info: device = ClimaComms.CUDADevice()
28
28
Problem size: (63, 4, 4, 1, 5400), float_type = Float64, device_bandwidth_GBs=2039
29
29
┌────────────────────────────────────────────────────┬───────────────────────────────────┬─────────┬─────────────┬────────────────┬────────┐
30
30
│ funcs │ time per call │ bw % │ achieved bw │ n-reads/writes │ n-reps │
31
31
├────────────────────────────────────────────────────┼───────────────────────────────────┼─────────┼─────────────┼────────────────┼────────┤
32
- │ TBB.thermo_func_bc!(x , us; nreps=100, bm) │ 1 millisecond, 45 microseconds │ 19.0163 │ 387.743 │ 10 │ 100 │
33
- │ TBB.thermo_func_sol!(x_vec , us; nreps=100, bm) │ 258 microseconds, 120 nanoseconds │ 77.0559 │ 1571.17 │ 10 │ 100 │
34
- │ TBB.thermo_func_bc!(x, us; nreps=100, bm) │ 1 millisecond, 46 microseconds │ 19.0147 │ 387.709 │ 10 │ 100 │
35
- │ TBB.thermo_func_sol!(x_vec, us; nreps=100, bm) │ 257 microseconds, 915 nanoseconds │ 77.1171 │ 1572.42 │ 10 │ 100 │
32
+ │ TBB.singlefield_bc!(x_soa , us; nreps=100, bm) │ 108 microseconds, 790 nanoseconds │ 36.5653 │ 745.567 │ 2 │ 100 │
33
+ │ TBB.singlefield_bc!(x_aos , us; nreps=100, bm) │ 123 microseconds, 730 nanoseconds │ 32.1501 │ 655.541 │ 2 │ 100 │
34
+ │ TBB.thermo_func_bc!(x, us; nreps=100, bm) │ 1 millisecond, 43 microseconds │ 19.0568 │ 388.569 │ 10 │ 100 │
35
+ │ TBB.thermo_func_sol!(x_vec, us; nreps=100, bm) │ 256 microseconds, 717 nanoseconds │ 77.477 │ 1579.76 │ 10 │ 100 │
36
36
└────────────────────────────────────────────────────┴───────────────────────────────────┴─────────┴─────────────┴────────────────┴────────┘
37
37
```
38
38
=#
63
63
@inline Base. zero (:: Type{PhaseEquil{FT}} ) where {FT} =
64
64
PhaseEquil {FT} (0 , 0 , 0 , 0 , 0 )
65
65
66
+ function singlefield_bc! (x, us; nreps = 1 , bm= nothing , n_trials = 30 )
67
+ e = Inf
68
+ for t in 1 : n_trials
69
+ et = CUDA. @elapsed begin
70
+ for _ in 1 : nreps
71
+ (; ρ_read, ρ_write) = x
72
+ @. ρ_write = ρ_read
73
+ end
74
+ end
75
+ e = min (e, et)
76
+ end
77
+ push_info (bm; e, nreps, caller = @caller_name (@__FILE__ ),n_reads_writes= 2 )
78
+ return nothing
79
+ end
80
+
66
81
function thermo_func_bc! (x, us; nreps = 1 , bm= nothing , n_trials = 30 )
67
82
e = Inf
68
83
for t in 1 : n_trials
@@ -173,16 +188,27 @@ using Test
173
188
end
174
189
x_vec = to_vec (xv)
175
190
191
+ x_aos = fill ((; ρ_read = FT (0 ), ρ_write = FT (0 )), cspace)
192
+ x_soa = (;
193
+ ρ_read = Fields. Field (FT, cspace),
194
+ ρ_write = Fields. Field (FT, cspace),
195
+ )
196
+ @. x_soa. ρ_read = 6
197
+ @. x_soa. ρ_write = 7
198
+ @. x_aos. ρ_read = 6
199
+ @. x_aos. ρ_write = 7
200
+ TBB. singlefield_bc! (x_soa, us; nreps= 1 , n_trials = 1 )
201
+ TBB. singlefield_bc! (x_aos, us; nreps= 1 , n_trials = 1 )
202
+
176
203
TBB. thermo_func_bc! (x, us; nreps= 1 , n_trials = 1 )
177
204
TBB. thermo_func_sol! (x_vec, us; nreps= 1 , n_trials = 1 )
178
205
179
206
rc = Fields. rcompare (x_vec, to_vec (x))
180
- rc || Fields. rprint_diff (x_vec, to_vec (x)) # test correctness (should print nothing)
207
+ rc || Fields. @ rprint_diff (x_vec, to_vec (x)) # test correctness (should print nothing)
181
208
@test rc # test correctness
182
209
183
- TBB. thermo_func_bc! (x, us; nreps= 100 , bm)
184
- TBB. thermo_func_sol! (x_vec, us; nreps= 100 , bm)
185
-
210
+ TBB. singlefield_bc! (x_soa, us; nreps= 100 , bm)
211
+ TBB. singlefield_bc! (x_aos, us; nreps= 100 , bm)
186
212
TBB. thermo_func_bc! (x, us; nreps= 100 , bm)
187
213
TBB. thermo_func_sol! (x_vec, us; nreps= 100 , bm)
188
214
0 commit comments