From 9c63fe88d813f7f00f6b750c0c2d263f77155dbe Mon Sep 17 00:00:00 2001 From: Charlie Kawczynski Date: Wed, 16 Apr 2025 17:54:07 -0700 Subject: [PATCH] Add matrix-field benchmark --- .buildkite/pipeline.yml | 8 +++ ...mark_fd_ops_shared_memory_matrix_fields.jl | 69 +++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 test/Operators/finitedifference/benchmark_fd_ops_shared_memory_matrix_fields.jl diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index fad9238495..55bed3369a 100755 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -1404,6 +1404,14 @@ steps: agents: slurm_gpus: 1 + - label: "Perf: FD operator matrix field operation (example)" + key: "perf_fd_ops_mat_field_example" + command: "julia --color=yes --project=.buildkite test/Operators/finitedifference/benchmark_fd_ops_shared_memory_matrix_fields.jl" + env: + CLIMACOMMS_DEVICE: "CUDA" + agents: + slurm_mem: 20GB + - label: "Perf: SEM operator benchmarks (cuda Float32)" key: "perf_gpu_spectral_ops_cuda_float32" command: diff --git a/test/Operators/finitedifference/benchmark_fd_ops_shared_memory_matrix_fields.jl b/test/Operators/finitedifference/benchmark_fd_ops_shared_memory_matrix_fields.jl new file mode 100644 index 0000000000..6a64c51e11 --- /dev/null +++ b/test/Operators/finitedifference/benchmark_fd_ops_shared_memory_matrix_fields.jl @@ -0,0 +1,69 @@ +#= +julia --project=.buildkite +using Revise; include("test/Operators/finitedifference/benchmark_fd_ops_shared_memory_matrix_fields.jl") +=# +include("utils_fd_ops_shared_memory.jl") +using ClimaComms +using LinearAlgebra +using BenchmarkTools +ClimaComms.@import_required_backends +using ClimaCore.CommonSpaces +using ClimaCore: Operators, Fields, MatrixFields, Geometry, Spaces +using ClimaCore.Utilities: half + +covariant3_unit_vector(lg) = + Geometry.Covariant3Vector( + 1 / Geometry._norm(Geometry.Covariant3Vector(1), lg) + ) + +#! format: off +function bench_kernels!(L, K, C) + space = axes(K) + ᶠspace = Spaces.face_space(space) + levels = Spaces.nlevels(ᶠspace) + ᶠlg_N = Fields.level( + Fields.local_geometry_field(ᶠspace), + levels - half, + ) + topfluxBC = @. covariant3_unit_vector(ᶠlg_N) * 0 + topBC_op = Operators.SetBoundaryOperator( + top = Operators.SetValue(topfluxBC), + bottom = Operators.SetValue(Geometry.Covariant3Vector(0)), + ) + interpc2f_op = Operators.InterpolateC2F( + bottom = Operators.Extrapolate(), + top = Operators.Extrapolate(), + ) + divf2c_op = Operators.DivergenceF2C() + divf2c_matrix = MatrixFields.operator_matrix(divf2c_op) + gradc2f_op = Operators.GradientC2F( + top = Operators.SetGradient(Geometry.WVector(0)), + bottom = Operators.SetGradient(Geometry.WVector(0)), + ) + gradc2f_matrix = MatrixFields.operator_matrix(gradc2f_op) + args = (L, K, C, gradc2f_matrix, divf2c_matrix, interpc2f_op, topBC_op) + @benchmark CUDA.@sync kernel!($args...) +end + +function kernel!(L, K, C, gradc2f_matrix, divf2c_matrix, interpc2f_op, topBC_op) + @. L = ( + divf2c_matrix() * ( + MatrixFields.DiagonalMatrixRow(interpc2f_op(K)) * + gradc2f_matrix() * MatrixFields.DiagonalMatrixRow(C) + + MatrixFields.LowerDiagonalMatrixRow( + topBC_op(Geometry.Covariant3Vector(zero(interpc2f_op(K)))), + ) + ) + ) - (I,) +end + +let FT = Float64 + ᶜspace = + get_space_extruded(ClimaComms.device(), FT; z_elem = 10, h_elem = 30); + K = Fields.Field(Float64, ᶜspace); + C = Fields.Field(Float64, ᶜspace); + L = Fields.Field(MatrixFields.TridiagonalMatrixRow{FT}, ᶜspace); + fill!(K, 1); + fill!(C, 1); + bench_kernels!(L, K, C) +end