Skip to content

Commit b38c1fb

Browse files
Merge pull request #1736 from CliMA/as/covar-deriv
Covariant derivatives on GPU
2 parents d3d027f + 014484a commit b38c1fb

File tree

7 files changed

+233
-18
lines changed

7 files changed

+233
-18
lines changed

.buildkite/pipeline.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,17 @@ steps:
643643
CLIMACOMMS_DEVICE: "CUDA"
644644
agents:
645645
slurm_gpus: 1
646+
647+
- label: "Unit: velocity grad tensor ops"
648+
key: unit_spectral_tensor_op_cuda
649+
command: "julia --color=yes --check-bounds=yes --project=.buildkite test/Operators/spectralelement/covar_deriv_ops.jl"
650+
command:
651+
- "julia --project=.buildkite -e 'using CUDA; CUDA.versioninfo()'"
652+
- "julia --color=yes --check-bounds=yes --project=.buildkite test/Operators/spectralelement/covar_deriv_ops.jl CUDA"
653+
env:
654+
CLIMACOMMS_DEVICE: "CUDA"
655+
agents:
656+
slurm_gpus: 1
646657

647658
- label: "Unit: hybrid operators cuda"
648659
key: unit_ops_cuda

ext/cuda/operators_sem_shmem.jl

Lines changed: 57 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -94,23 +94,75 @@ Base.@propagate_inbounds function operator_shmem(
9494
FT = Spaces.undertype(space)
9595
QS = Spaces.quadrature_style(space)
9696
Nq = Quadratures.degrees_of_freedom(QS)
97-
# allocate temp output
9897
IT = eltype(arg)
99-
input = CUDA.CuStaticSharedArray(IT, (Nq, Nq, Nvt))
100-
return (input,)
98+
ET = eltype(IT)
99+
RT = operator_return_eltype(op, IT)
100+
RT12 = Geometry.AxisTensor{
101+
ET,
102+
2,
103+
Tuple{Geometry.CovariantAxis{(1, 2)}, Geometry.LocalAxis{(1, 2)}},
104+
SMatrix{2, 2, ET, 4},
105+
}
106+
RT123 = Geometry.AxisTensor{
107+
ET,
108+
2,
109+
Tuple{Geometry.CovariantAxis{(1, 2)}, Geometry.LocalAxis{(1, 2, 3)}},
110+
SMatrix{2, 3, ET, 6},
111+
}
112+
if RT <: Geometry.Covariant12Vector
113+
# allocate temp output
114+
input = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt))
115+
return (input,)
116+
elseif RT <: RT12
117+
v₁ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt))
118+
v₂ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt))
119+
return (v₁, v₂)
120+
elseif RT <: RT123
121+
v₁ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt))
122+
v₂ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt))
123+
v₃ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt))
124+
return (v₁, v₂, v₃)
125+
end
101126
end
102127

103128
Base.@propagate_inbounds function operator_fill_shmem!(
104129
op::Gradient{(1, 2)},
105-
(input,),
130+
input,
106131
space,
107132
ij,
108133
slabidx,
109134
arg,
110135
)
111136
vt = threadIdx().z
112137
i, j = ij.I
113-
input[i, j, vt] = arg
138+
local_geometry = get_local_geometry(space, ij, slabidx)
139+
RT = operator_return_eltype(op, typeof(arg))
140+
ET = eltype(eltype(arg))
141+
RT12 = Geometry.AxisTensor{
142+
ET,
143+
2,
144+
Tuple{Geometry.CovariantAxis{(1, 2)}, Geometry.LocalAxis{(1, 2)}},
145+
SMatrix{2, 2, ET, 4},
146+
}
147+
RT123 = Geometry.AxisTensor{
148+
ET,
149+
2,
150+
Tuple{Geometry.CovariantAxis{(1, 2)}, Geometry.LocalAxis{(1, 2, 3)}},
151+
SMatrix{2, 3, ET, 6},
152+
}
153+
if RT <: Geometry.Covariant12Vector
154+
(v,) = input
155+
v[i, j, vt] = arg
156+
elseif RT <: RT12
157+
v₁, v₂ = input
158+
v₁[i, j, vt] = Geometry.LocalVector(arg, local_geometry).u
159+
v₂[i, j, vt] = Geometry.LocalVector(arg, local_geometry).v
160+
elseif RT <: RT123
161+
v₁, v₂, v₃ = input
162+
v₁[i, j, vt] = Geometry.LocalVector(arg, local_geometry).u
163+
v₂[i, j, vt] = Geometry.LocalVector(arg, local_geometry).v
164+
v₃[i, j, vt] = Geometry.LocalVector(arg, local_geometry).w
165+
end
114166
end
115167

116168

ext/cuda/operators_spectral_element.jl

Lines changed: 53 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import ClimaCore: Spaces, Quadratures, Topologies
22
import ClimaCore: Operators, Geometry, Quadratures, RecursiveApply
33
import ClimaComms
4-
using CUDA: @cuda
54
using CUDA
65
import ClimaCore.Operators: AbstractSpectralStyle, strip_space
76
import ClimaCore.Operators: SpectralBroadcasted, set_node!, get_node
@@ -256,7 +255,7 @@ end
256255

257256
Base.@propagate_inbounds function operator_evaluate(
258257
op::Gradient{(1, 2)},
259-
(input,),
258+
input,
260259
space,
261260
ij,
262261
slabidx,
@@ -269,14 +268,59 @@ Base.@propagate_inbounds function operator_evaluate(
269268
Nq = Quadratures.degrees_of_freedom(QS)
270269
D = Quadratures.differentiation_matrix(FT, QS)
271270

272-
∂f∂ξ₁ = D[i, 1] * input[1, j, vt]
273-
∂f∂ξ₂ = D[j, 1] * input[i, 1, vt]
274-
275-
for k in 2:Nq
276-
∂f∂ξ₁ += D[i, k] * input[k, j, vt]
277-
∂f∂ξ₂ += D[j, k] * input[i, k, vt]
271+
if length(input) == 1 # check types
272+
(v₁,) = input
273+
@inbounds begin
274+
∂f∂ξ₁ = D[i, 1] v₁[1, j, vt]
275+
∂f∂ξ₂ = D[j, 1] v₁[i, 1, vt]
276+
for k in 2:Nq
277+
∂f∂ξ₁ = ∂f∂ξ₁ D[i, k] v₁[k, j, vt]
278+
∂f∂ξ₂ = ∂f∂ξ₂ D[j, k] v₁[i, k, vt]
279+
end
280+
end
281+
return Geometry.Covariant12Vector(∂f∂ξ₁, ∂f∂ξ₂)
282+
elseif length(input) == 2
283+
# Update `shmem`
284+
v₁, v₂ = input
285+
@inbounds begin
286+
∂f₁∂ξ₁ = D[i, 1] v₁[1, j, vt]
287+
∂f₁∂ξ₂ = D[j, 1] v₁[i, 1, vt]
288+
∂f₂∂ξ₁ = D[i, 1] v₂[1, j, vt]
289+
∂f₂∂ξ₂ = D[j, 1] v₂[i, 1, vt]
290+
@simd for k in 2:Nq
291+
∂f₁∂ξ₁ = ∂f₁∂ξ₁ D[i, k] v₁[k, j, vt]
292+
∂f₁∂ξ₂ = ∂f₁∂ξ₂ D[j, k] v₁[i, k, vt]
293+
∂f₂∂ξ₁ = ∂f₂∂ξ₁ D[i, k] v₂[k, j, vt]
294+
∂f₂∂ξ₂ = ∂f₂∂ξ₂ D[j, k] v₂[i, k, vt]
295+
end
296+
end
297+
return Geometry.AxisTensor(
298+
(Geometry.Covariant12Axis(), Geometry.UVAxis()),
299+
(∂f₁∂ξ₁, ∂f₁∂ξ₂, ∂f₂∂ξ₁, ∂f₂∂ξ₂),
300+
)
301+
else
302+
v₁, v₂, v₃ = input
303+
@inbounds begin
304+
∂f₁∂ξ₁ = D[i, 1] v₁[1, j, vt]
305+
∂f₁∂ξ₂ = D[j, 1] v₁[i, 1, vt]
306+
∂f₂∂ξ₁ = D[i, 1] v₂[1, j, vt]
307+
∂f₂∂ξ₂ = D[j, 1] v₂[i, 1, vt]
308+
∂f₃∂ξ₁ = D[i, 1] v₃[1, j, vt]
309+
∂f₃∂ξ₂ = D[j, 1] v₃[i, 1, vt]
310+
@simd for k in 2:Nq
311+
∂f₁∂ξ₁ = ∂f₁∂ξ₁ D[i, k] v₁[k, j, vt]
312+
∂f₁∂ξ₂ = ∂f₁∂ξ₂ D[j, k] v₁[i, k, vt]
313+
∂f₂∂ξ₁ = ∂f₂∂ξ₁ D[i, k] v₂[k, j, vt]
314+
∂f₂∂ξ₂ = ∂f₂∂ξ₂ D[j, k] v₂[i, k, vt]
315+
∂f₃∂ξ₁ = ∂f₃∂ξ₁ D[i, k] v₃[k, j, vt]
316+
∂f₃∂ξ₂ = ∂f₃∂ξ₂ D[j, k] v₃[i, k, vt]
317+
end
318+
end
319+
return Geometry.AxisTensor(
320+
(Geometry.Covariant12Axis(), Geometry.UVWAxis()),
321+
(∂f₁∂ξ₁, ∂f₁∂ξ₂, ∂f₂∂ξ₁, ∂f₂∂ξ₂, ∂f₃∂ξ₁, ∂f₃∂ξ₂),
322+
)
278323
end
279-
return Geometry.Covariant12Vector(∂f∂ξ₁, ∂f∂ξ₂)
280324
end
281325

282326
Base.@propagate_inbounds function operator_evaluate(

test/Operators/hybrid/extruded_3dbox_cuda.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,9 +176,9 @@ end
176176

177177
# Test wdiv(grad()) composed Laplace operator
178178
grad = Operators.Gradient()
179+
@test parent(grad.(g_cpu)) Array(parent(grad.(g_gpu)))
179180
@test parent(wdiv.(grad.(f_cpu))) Array(parent(wdiv.(grad.(f_gpu))))
180-
@test_broken parent(wdiv.(grad.(x_cpu)))
181-
Array(parent(wdiv.(grad.(x_gpu))))
181+
@test parent(wdiv.(grad.(x_cpu))) Array(parent(wdiv.(grad.(x_gpu))))
182182

183183
# Test DSS
184184
@test parent(Spaces.weighted_dss!(wdiv.(grad.(f_cpu))))

test/Operators/hybrid/extruded_sphere_cuda.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,9 @@ end
150150

151151
# Test wdiv(grad()) composed Laplace operator
152152
grad = Operators.Gradient()
153+
@test parent(grad.(g_cpu)) Array(parent(grad.(g_gpu)))
153154
@test parent(wdiv.(grad.(f_cpu))) Array(parent(wdiv.(grad.(f_gpu))))
154-
@test_broken parent(wdiv.(grad.(x_cpu)))
155-
Array(parent(wdiv.(grad.(x_gpu))))
155+
@test parent(wdiv.(grad.(x_cpu))) Array(parent(wdiv.(grad.(x_gpu))))
156156

157157
# Test DSS
158158
@test parent(Spaces.weighted_dss!(wdiv.(grad.(f_cpu))))
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
using Logging
2+
using Test
3+
4+
import ClimaCore:
5+
Domains,
6+
Fields,
7+
Geometry,
8+
Meshes,
9+
Operators,
10+
Spaces,
11+
Quadratures,
12+
Topologies,
13+
DataLayouts,
14+
Grids
15+
16+
using ClimaComms
17+
ClimaComms.@import_required_backends
18+
device = ClimaComms.device()
19+
using ClimaComms
20+
using IntervalSets
21+
22+
FT = Float64
23+
xlim = (FT(0), FT(2π))
24+
zlim = (FT(0), FT(1))
25+
helem = 5
26+
velem = 5
27+
npoly = 5
28+
ndims = 3
29+
stretch = Meshes.Uniform()
30+
comms_context = ClimaComms.SingletonCommsContext(device)
31+
FT = eltype(xlim)
32+
33+
# Horizontal Grid Construction
34+
quad = Quadratures.GLL{npoly + 1}()
35+
horzdomain = Domains.RectangleDomain(
36+
Geometry.XPoint{FT}(xlim[1]) .. Geometry.XPoint{FT}(xlim[2]),
37+
Geometry.YPoint{FT}(xlim[1]) .. Geometry.YPoint{FT}(xlim[2]),
38+
x1periodic = true,
39+
x2periodic = true,
40+
)
41+
# Assume same number of elems (helem) in (x,y) directions
42+
horzmesh = Meshes.RectilinearMesh(horzdomain, helem, helem)
43+
horz_topology = Topologies.Topology2D(
44+
comms_context,
45+
horzmesh,
46+
Topologies.spacefillingcurve(horzmesh),
47+
);
48+
h_space =
49+
Spaces.SpectralElementSpace2D(horz_topology, quad, enable_bubble = true);
50+
horz_grid = Spaces.grid(h_space)
51+
52+
# Vertical Grid Construction
53+
vertdomain = Domains.IntervalDomain(
54+
Geometry.ZPoint{FT}(zlim[1]),
55+
Geometry.ZPoint{FT}(zlim[2]);
56+
boundary_names = (:bottom, :top),
57+
)
58+
vertmesh = Meshes.IntervalMesh(vertdomain, stretch, nelems = velem)
59+
vert_face_space = Spaces.FaceFiniteDifferenceSpace(vertmesh)
60+
vert_topology = Topologies.IntervalTopology(
61+
ClimaComms.SingletonCommsContext(device),
62+
vertmesh,
63+
)
64+
vert_grid = Grids.FiniteDifferenceGrid(vert_topology)
65+
66+
grid = Grids.ExtrudedFiniteDifferenceGrid(horz_grid, vert_grid)
67+
cent_space = Spaces.CenterExtrudedFiniteDifferenceSpace(grid)
68+
face_space = Spaces.FaceExtrudedFiniteDifferenceSpace(grid)
69+
ccoords = Fields.coordinate_field(cent_space)
70+
fcoords = Fields.coordinate_field(face_space)
71+
72+
= Operators.Gradient();
73+
74+
η = @. sin(ccoords.x) + cos(ccoords.y)
75+
η_test1 = @. Geometry.project(Geometry.UVAxis(), (η)).components.data.:1
76+
η_test2 = @. Geometry.project(Geometry.UVAxis(), (η)).components.data.:2
77+
Spaces.weighted_dss!(η_test1)
78+
Spaces.weighted_dss!(η_test2)
79+
80+
𝒻₁ = @. Geometry.UVVector(η, 2η)
81+
𝒻₂ = @. Geometry.UVWVector(η, 2η, 3η)
82+
83+
∇η = @. (η)
84+
∇𝒻₁ = @. Geometry.project(Geometry.UVAxis(), (𝒻₁))
85+
for ii in 1:4
86+
Spaces.weighted_dss!(∇𝒻₁.components.data.:($ii))
87+
end
88+
89+
# Check against known solution component-wise
90+
device isa ClimaComms.CUDADevice ? CUDA.allowscalar(true) : nothing
91+
@test parent(η_test1) parent(∇𝒻₁.components.data.:1)
92+
@test parent(η_test2) parent(∇𝒻₁.components.data.:2)
93+
@test parent(2 .* η_test1) parent(∇𝒻₁.components.data.:3)
94+
@test parent(2 .* η_test2) parent(∇𝒻₁.components.data.:4)
95+
96+
∇𝒻₂ = @. Geometry.project(Geometry.UVAxis(), (𝒻₂))
97+
for ii in 1:6
98+
Spaces.weighted_dss!(∇𝒻₂.components.data.:($ii))
99+
end
100+
101+
# Check against known solution component-wise
102+
@test parent(η_test1) parent(∇𝒻₂.components.data.:1)
103+
@test parent(η_test2) parent(∇𝒻₂.components.data.:2)
104+
@test parent(2 .* η_test1) parent(∇𝒻₂.components.data.:3)
105+
@test parent(2 .* η_test2) parent(∇𝒻₂.components.data.:4)
106+
@test parent(3 .* η_test1) parent(∇𝒻₂.components.data.:5)
107+
@test parent(3 .* η_test2) parent(∇𝒻₂.components.data.:6)

test/runtests.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ UnitTest("Sphere spaces" ,"Spaces/sphere.jl"),
3737
UnitTest("Fields" ,"Fields/unit_field.jl"), # has benchmarks
3838
UnitTest("Spectral elem - rectilinear" ,"Operators/spectralelement/rectilinear.jl"),
3939
UnitTest("Spectral elem - opt" ,"Operators/spectralelement/opt.jl"),
40+
UnitTest("Spectral elem - gradient tensor" ,"Operators/spectralelement/covar_deriv_ops.jl"),
4041
UnitTest("Spectral elem - Diffusion 2d" ,"Operators/spectralelement/unit_diffusion2d.jl"),
4142
UnitTest("Spectral elem - sphere geometry" ,"Operators/spectralelement/sphere_geometry.jl"),
4243
UnitTest("Spectral elem - sphere gradient" ,"Operators/spectralelement/sphere_gradient.jl"),

0 commit comments

Comments
 (0)