From ef84bb0e6d00390a7fe9ff57324310056201f6cb Mon Sep 17 00:00:00 2001 From: LasNikas Date: Wed, 21 May 2025 14:47:21 +0200 Subject: [PATCH 01/17] adapt array in interpolation --- src/general/gpu.jl | 8 ++++++++ src/general/interpolation.jl | 27 +++++++++++++++++---------- src/general/semidiscretization.jl | 17 ++++++++--------- 3 files changed, 33 insertions(+), 19 deletions(-) diff --git a/src/general/gpu.jl b/src/general/gpu.jl index 87b5f95aa4..9898da475d 100644 --- a/src/general/gpu.jl +++ b/src/general/gpu.jl @@ -29,3 +29,11 @@ end @inline function PointNeighbors.parallel_foreach(f, iterator, semi::Semidiscretization) PointNeighbors.parallel_foreach(f, iterator, semi.parallelization_backend) end + +function allocate(backend::KernelAbstractions.Backend, ELTYPE, size) + return KernelAbstractions.allocate(backend, ELTYPE, size) +end + +function allocate(backend, ELTYPE, size) + return Array{ELTYPE, length(size)}(undef, size) +end diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl index a17cddddba..8ab2969cad 100644 --- a/src/general/interpolation.jl +++ b/src/general/interpolation.jl @@ -512,12 +512,12 @@ end n_points = size(point_coords, 2) ELTYPE = eltype(point_coords) - computed_density = zeros(ELTYPE, n_points) - other_density = zeros(ELTYPE, n_points) - neighbor_count = zeros(Int, n_points) - shepard_coefficient = zeros(ELTYPE, n_points) + computed_density = allocate(semi.parallelization_backend, ELTYPE, n_points) + other_density = allocate(semi.parallelization_backend, ELTYPE, n_points) + shepard_coefficient = allocate(semi.parallelization_backend, ELTYPE, n_points) + neighbor_count = allocate(semi.parallelization_backend, Int, n_points) - cache = create_cache_interpolation(ref_system, n_points) + cache = create_cache_interpolation(ref_system, n_points, semi) ref_id = system_indices(ref_system, semi) ref_smoothing_kernel = ref_system.smoothing_kernel @@ -587,15 +587,22 @@ end return (; computed_density=computed_density, neighbor_count, point_coords, cache...) end -@inline function create_cache_interpolation(ref_system::FluidSystem, n_points) - velocity = zeros(eltype(ref_system), ndims(ref_system), n_points) - pressure = zeros(eltype(ref_system), n_points) - density = zeros(eltype(ref_system), n_points) +@inline function create_cache_interpolation(ref_system::FluidSystem, n_points, semi) + velocity_ = zeros(eltype(ref_system), ndims(ref_system), n_points) + pressure_ = zeros(eltype(ref_system), n_points) + + if semi.parallelization_backend isa KernelAbstractions.Backend + velocity = Adapt.adapt(semi.parallelization_backend, velocity_) + pressure = Adapt.adapt(semi.parallelization_backend, pressure_) + else + velocity = velocity_ + pressure = pressure_ + end return (; velocity, pressure, density) end -@inline function create_cache_interpolation(ref_system::SolidSystem, n_points) +@inline function create_cache_interpolation(ref_system::SolidSystem, n_points, semi) velocity = zeros(eltype(ref_system), ndims(ref_system), n_points) jacobian = zeros(eltype(ref_system), n_points) von_mises_stress = zeros(eltype(ref_system), n_points) diff --git a/src/general/semidiscretization.jl b/src/general/semidiscretization.jl index 5b7eaa5302..0bc2cef57b 100644 --- a/src/general/semidiscretization.jl +++ b/src/general/semidiscretization.jl @@ -282,19 +282,18 @@ function semidiscretize(semi, tspan; reset_threads=true) sizes_u = (u_nvariables(system) * n_moving_particles(system) for system in systems) sizes_v = (v_nvariables(system) * n_moving_particles(system) for system in systems) + # Use either the specified backend, e.g., `CUDABackend` or `MetalBackend` or + # use CPU vectors for all CPU backends. + u0_ode_ = allocate(semi.parallelization_backend, ELTYPE, sum(sizes_u)) + v0_ode_ = allocate(semi.parallelization_backend, ELTYPE, sum(sizes_v)) + if semi.parallelization_backend isa KernelAbstractions.Backend - # Use the specified backend, e.g., `CUDABackend` or `MetalBackend` - u0_ode = KernelAbstractions.allocate(semi.parallelization_backend, ELTYPE, - sum(sizes_u)) - v0_ode = KernelAbstractions.allocate(semi.parallelization_backend, ELTYPE, - sum(sizes_v)) + u0_ode = u0_ode_ + v0_ode = v0_ode_ else - # Use CPU vectors for all CPU backends. - # These are wrapped in `ThreadedBroadcastArray`s + # CPU vectors are wrapped in `ThreadedBroadcastArray`s # to make broadcasting (which is done by OrdinaryDiffEq.jl) multithreaded. # See https://github.com/trixi-framework/TrixiParticles.jl/pull/722 for more details. - u0_ode_ = Vector{ELTYPE}(undef, sum(sizes_u)) - v0_ode_ = Vector{ELTYPE}(undef, sum(sizes_v)) u0_ode = ThreadedBroadcastArray(u0_ode_; parallelization_backend=semi.parallelization_backend) v0_ode = ThreadedBroadcastArray(v0_ode_; From 3f2d7b8f1690c78144b959aaf8d14774019a0743 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Thu, 22 May 2025 18:36:14 +0200 Subject: [PATCH 02/17] fix --- src/general/interpolation.jl | 96 ++++++++++++++++++++++++++++-------- 1 file changed, 76 insertions(+), 20 deletions(-) diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl index 8ab2969cad..82ca3e28bc 100644 --- a/src/general/interpolation.jl +++ b/src/general/interpolation.jl @@ -565,49 +565,105 @@ end computed_density[point] = NaN neighbor_count[point] = 0 - foreach(cache) do field - if field isa AbstractVector - field[point] = NaN - else - field[:, point] .= NaN - end - end + set_NaN!(cache, point, ref_system) else # Normalize all quantities by the shepard coefficient - foreach(cache) do field - if field isa AbstractVector - field[point] /= shepard_coefficient[point] - else - field[:, point] ./= shepard_coefficient[point] - end - end + divide_by_shepard_coefficient!(cache, point, shepard_coefficient, + ref_system) end end - return (; computed_density=computed_density, neighbor_count, point_coords, cache...) + cache_ = map(x -> Array(x), cache) + + return (; computed_density=Array(computed_density), point_coords=Array(point_coords), + neighbor_count=Array(neighbor_count), cache_...) +end + +function set_NaN!(cache, point, ref_system::FluidSystem) + for dim in 1:ndims(ref_system) + cache.velocity[dim, point] = NaN + end + cache.pressure[point] = NaN + cache.density[point] = NaN + + return cache +end + +function set_NaN!(cache, point, ref_system::SolidSystem) + for dim in 1:ndims(ref_system) + cache.velocity[dim, point] = NaN + end + cache.jacobian[point] = NaN + cache.von_mises_stress[point] = NaN + + for j in axes(cache.cauchy_stress, 2), i in axes(cache.cauchy_stress, 1) + cache.cauchy_stress[i, j, point] = NaN + end + + return cache +end + +function divide_by_shepard_coefficient!(cache, point, shepard_coefficient, + ref_system::FluidSystem) + for dim in 1:ndims(ref_system) + cache.velocity[dim, point] /= shepard_coefficient[point] + end + cache.pressure[point] /= shepard_coefficient[point] + cache.density[point] /= shepard_coefficient[point] + + return cache +end + +function divide_by_shepard_coefficient!(cache, point, shepard_coefficient, + ref_system::SolidSystem) + for dim in 1:ndims(ref_system) + cache.velocity[dim, point] /= shepard_coefficient[point] + end + cache.jacobian[point] /= shepard_coefficient[point] + cache.von_mises_stress[point] /= shepard_coefficient[point] + + for j in axes(cache.cauchy_stress, 2), i in axes(cache.cauchy_stress, 1) + cache.cauchy_stress[i, j, point] /= shepard_coefficient[point] + end + return cache end @inline function create_cache_interpolation(ref_system::FluidSystem, n_points, semi) velocity_ = zeros(eltype(ref_system), ndims(ref_system), n_points) pressure_ = zeros(eltype(ref_system), n_points) + density_ = zeros(eltype(ref_system), n_points) if semi.parallelization_backend isa KernelAbstractions.Backend velocity = Adapt.adapt(semi.parallelization_backend, velocity_) pressure = Adapt.adapt(semi.parallelization_backend, pressure_) + density = Adapt.adapt(semi.parallelization_backend, density_) else velocity = velocity_ pressure = pressure_ + density = density_ end return (; velocity, pressure, density) end @inline function create_cache_interpolation(ref_system::SolidSystem, n_points, semi) - velocity = zeros(eltype(ref_system), ndims(ref_system), n_points) - jacobian = zeros(eltype(ref_system), n_points) - von_mises_stress = zeros(eltype(ref_system), n_points) - cauchy_stress = zeros(eltype(ref_system), ndims(ref_system), ndims(ref_system), - n_points) + velocity_ = zeros(eltype(ref_system), ndims(ref_system), n_points) + jacobian_ = zeros(eltype(ref_system), n_points) + von_mises_stress_ = zeros(eltype(ref_system), n_points) + cauchy_stress_ = zeros(eltype(ref_system), ndims(ref_system), ndims(ref_system), + n_points) + + if semi.parallelization_backend isa KernelAbstractions.Backend + velocity = Adapt.adapt(semi.parallelization_backend, velocity_) + jacobian = Adapt.adapt(semi.parallelization_backend, jacobian_) + von_mises_stress = Adapt.adapt(semi.parallelization_backend, von_mises_stress_) + cauchy_stress = Adapt.adapt(semi.parallelization_backend, cauchy_stress_) + else + velocity = velocity_ + jacobian = jacobian_ + von_mises_stress = von_mises_stress_ + cauchy_stress = cauchy_stress_ + end return (; velocity, jacobian, von_mises_stress, cauchy_stress) end From ffbbaba227fb35242cf3d84a8333e735547f7b3d Mon Sep 17 00:00:00 2001 From: LasNikas Date: Fri, 23 May 2025 10:20:40 +0200 Subject: [PATCH 03/17] allocate instead of adapt --- src/general/interpolation.jl | 53 ++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl index 82ca3e28bc..19f22eb1c5 100644 --- a/src/general/interpolation.jl +++ b/src/general/interpolation.jl @@ -629,41 +629,34 @@ function divide_by_shepard_coefficient!(cache, point, shepard_coefficient, end @inline function create_cache_interpolation(ref_system::FluidSystem, n_points, semi) - velocity_ = zeros(eltype(ref_system), ndims(ref_system), n_points) - pressure_ = zeros(eltype(ref_system), n_points) - density_ = zeros(eltype(ref_system), n_points) - - if semi.parallelization_backend isa KernelAbstractions.Backend - velocity = Adapt.adapt(semi.parallelization_backend, velocity_) - pressure = Adapt.adapt(semi.parallelization_backend, pressure_) - density = Adapt.adapt(semi.parallelization_backend, density_) - else - velocity = velocity_ - pressure = pressure_ - density = density_ - end + (; parallelization_backend) = semi + + velocity = allocate(parallelization_backend, eltype(ref_system), + (ndims(ref_system), n_points)) + pressure = allocate(parallelization_backend, eltype(ref_system), n_points) + density = allocate(parallelization_backend, eltype(ref_system), n_points) + + set_zero!(velocity) + set_zero!(pressure) + set_zero!(density) return (; velocity, pressure, density) end @inline function create_cache_interpolation(ref_system::SolidSystem, n_points, semi) - velocity_ = zeros(eltype(ref_system), ndims(ref_system), n_points) - jacobian_ = zeros(eltype(ref_system), n_points) - von_mises_stress_ = zeros(eltype(ref_system), n_points) - cauchy_stress_ = zeros(eltype(ref_system), ndims(ref_system), ndims(ref_system), - n_points) - - if semi.parallelization_backend isa KernelAbstractions.Backend - velocity = Adapt.adapt(semi.parallelization_backend, velocity_) - jacobian = Adapt.adapt(semi.parallelization_backend, jacobian_) - von_mises_stress = Adapt.adapt(semi.parallelization_backend, von_mises_stress_) - cauchy_stress = Adapt.adapt(semi.parallelization_backend, cauchy_stress_) - else - velocity = velocity_ - jacobian = jacobian_ - von_mises_stress = von_mises_stress_ - cauchy_stress = cauchy_stress_ - end + (; parallelization_backend) = semi + + velocity = allocate(parallelization_backend, eltype(ref_system), + (ndims(ref_system), n_points)) + jacobian = allocate(parallelization_backend, eltype(ref_system), n_points) + von_mises_stress = allocate(parallelization_backend, eltype(ref_system), n_points) + cauchy_stress = allocate(parallelization_backend, eltype(ref_system), + (ndims(ref_system), ndims(ref_system), n_points)) + + set_zero!(velocity) + set_zero!(jacobian) + set_zero!(von_mises_stress) + set_zero!(cauchy_stress) return (; velocity, jacobian, von_mises_stress, cauchy_stress) end From 8516db59becf20c951a5d058da9a223884f50306 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Fri, 23 May 2025 10:40:02 +0200 Subject: [PATCH 04/17] fix --- src/general/interpolation.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl index 19f22eb1c5..d2cd6e0d40 100644 --- a/src/general/interpolation.jl +++ b/src/general/interpolation.jl @@ -517,6 +517,11 @@ end shepard_coefficient = allocate(semi.parallelization_backend, ELTYPE, n_points) neighbor_count = allocate(semi.parallelization_backend, Int, n_points) + set_zero!(computed_density) + set_zero!(other_density) + set_zero!(shepard_coefficient) + set_zero!(neighbor_count) + cache = create_cache_interpolation(ref_system, n_points, semi) ref_id = system_indices(ref_system, semi) From acceb98ece5d2a57dd227921c1cebedf592f15a5 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Fri, 23 May 2025 12:13:54 +0200 Subject: [PATCH 05/17] consistency --- src/general/interpolation.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl index d2cd6e0d40..68d5bbf02c 100644 --- a/src/general/interpolation.jl +++ b/src/general/interpolation.jl @@ -570,7 +570,7 @@ end computed_density[point] = NaN neighbor_count[point] = 0 - set_NaN!(cache, point, ref_system) + set_nan!(cache, point, ref_system) else # Normalize all quantities by the shepard coefficient divide_by_shepard_coefficient!(cache, point, shepard_coefficient, @@ -584,7 +584,7 @@ end neighbor_count=Array(neighbor_count), cache_...) end -function set_NaN!(cache, point, ref_system::FluidSystem) +function set_nan!(cache, point, ref_system::FluidSystem) for dim in 1:ndims(ref_system) cache.velocity[dim, point] = NaN end @@ -594,7 +594,7 @@ function set_NaN!(cache, point, ref_system::FluidSystem) return cache end -function set_NaN!(cache, point, ref_system::SolidSystem) +function set_nan!(cache, point, ref_system::SolidSystem) for dim in 1:ndims(ref_system) cache.velocity[dim, point] = NaN end From 3371c9878b68448090bf75b92641074dfdac8eee Mon Sep 17 00:00:00 2001 From: LasNikas Date: Mon, 26 May 2025 10:38:13 +0200 Subject: [PATCH 06/17] add gpu tests --- test/examples/gpu.jl | 69 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/test/examples/gpu.jl b/test/examples/gpu.jl index c99c67f139..2935679316 100644 --- a/test/examples/gpu.jl +++ b/test/examples/gpu.jl @@ -391,3 +391,72 @@ end end end end + +@testset verbose=true "Postprocessing $TRIXIPARTICLES_TEST_" begin + @testset verbose=true "Interpolation" begin + # Import variables into scope + trixi_include_changeprecision(Float32, @__MODULE__, + joinpath(examples_dir(), "fluid", + "hydrostatic_water_column_2d.jl"), + sol=nothing, ode=nothing) + + # Neighborhood search with `FullGridCellList` for GPU compatibility + min_corner = minimum(tank.boundary.coordinates, dims=2) + max_corner = maximum(tank.boundary.coordinates, dims=2) + cell_list = FullGridCellList(; min_corner, max_corner, + max_points_per_cell=500) + semi_fullgrid = Semidiscretization(fluid_system, boundary_system, + neighborhood_search=GridNeighborhoodSearch{2}(; + cell_list), + parallelization_backend=Main.parallelization_backend) + + trixi_include_changeprecision(Float32, @__MODULE__, + joinpath(examples_dir(), + "fluid", "hydrostatic_water_column_2d.jl"); + semi=semi_fullgrid, tspan=(0.0f0, 0.1f0)) + + # Interpolation parameters + position_x = tank_size[1] / 2 + n_interpolation_points = 10 + start_point = [position_x, -fluid_particle_spacing] + end_point = [position_x, tank_size[2]] + + result = interpolate_line(start_point, end_point, n_interpolation_points, + semi, fluid_system, sol) + @test isapprocx(plane.computed_density, + [NaN, + 1051.550381846236, + 1058.840859422405, + 1054.6636640426582, + 1040.437243621303, + 1031.95170017119, + 1023.5350903161437, + 1009.1916142262469, + 693.8269492612843, + NaN]) + + @test isapporx(result.density, + [NaN, + 1066.8978311276544, + 1058.4834534513207, + 1049.7765132420302, + 1040.2795841900875, + 1030.331702082792, + 1020.2045843019229, + 1010.3658109398008, + 1003.3921419243012, + NaN]) + + @test isapporx(result.pressure, + [NaN, + 8198.684648008579, + 6983.142465751377, + 5788.908403992583, + 4551.910032518114, + 3327.0608294382264, + 2149.986239483061, + 1071.880176298758, + 342.81444988598906, + NaN]) + end +end From 0b7002da55b153ed665f57a5412f05d4cc28ad5d Mon Sep 17 00:00:00 2001 From: LasNikas Date: Tue, 3 Jun 2025 11:30:09 +0200 Subject: [PATCH 07/17] implement suggestions --- src/general/interpolation.jl | 104 +++++++++++++++++------------------ 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl index 68d5bbf02c..cf54d9906c 100644 --- a/src/general/interpolation.jl +++ b/src/general/interpolation.jl @@ -570,11 +570,14 @@ end computed_density[point] = NaN neighbor_count[point] = 0 - set_nan!(cache, point, ref_system) + foreach(Tuple(cache)) do field + set_nan!(field, point) + end else # Normalize all quantities by the shepard coefficient - divide_by_shepard_coefficient!(cache, point, shepard_coefficient, - ref_system) + foreach(Tuple(cache)) do field + divide_by_shepard_coefficient!(field, shepard_coefficient, point) + end end end @@ -584,55 +587,6 @@ end neighbor_count=Array(neighbor_count), cache_...) end -function set_nan!(cache, point, ref_system::FluidSystem) - for dim in 1:ndims(ref_system) - cache.velocity[dim, point] = NaN - end - cache.pressure[point] = NaN - cache.density[point] = NaN - - return cache -end - -function set_nan!(cache, point, ref_system::SolidSystem) - for dim in 1:ndims(ref_system) - cache.velocity[dim, point] = NaN - end - cache.jacobian[point] = NaN - cache.von_mises_stress[point] = NaN - - for j in axes(cache.cauchy_stress, 2), i in axes(cache.cauchy_stress, 1) - cache.cauchy_stress[i, j, point] = NaN - end - - return cache -end - -function divide_by_shepard_coefficient!(cache, point, shepard_coefficient, - ref_system::FluidSystem) - for dim in 1:ndims(ref_system) - cache.velocity[dim, point] /= shepard_coefficient[point] - end - cache.pressure[point] /= shepard_coefficient[point] - cache.density[point] /= shepard_coefficient[point] - - return cache -end - -function divide_by_shepard_coefficient!(cache, point, shepard_coefficient, - ref_system::SolidSystem) - for dim in 1:ndims(ref_system) - cache.velocity[dim, point] /= shepard_coefficient[point] - end - cache.jacobian[point] /= shepard_coefficient[point] - cache.von_mises_stress[point] /= shepard_coefficient[point] - - for j in axes(cache.cauchy_stress, 2), i in axes(cache.cauchy_stress, 1) - cache.cauchy_stress[i, j, point] /= shepard_coefficient[point] - end - return cache -end - @inline function create_cache_interpolation(ref_system::FluidSystem, n_points, semi) (; parallelization_backend) = semi @@ -704,3 +658,49 @@ end return cache end + +function set_nan!(field::AbstractVector, point) + field[point] = NaN + + return field +end + +function set_nan!(field::AbstractArray{T, 2}, point) + for dim in axes(field, 1) + field[dim, point] = NaN + end + + return field +end + +function set_nan!(field::AbstractArray{T, 3}, point) + for j in axes(field, 2), i in axes(field, 1) + field[i, j, point] = NaN + end + + return field +end + +function divide_by_shepard_coefficient!(field::AbstractVector, shepard_coefficient, point) + field[point] /= shepard_coefficient[point] + + return field +end + +function divide_by_shepard_coefficient!(field::AbstractArray{T, 2}, shepard_coefficient, + point) + for dim in axes(field, 1) + field[dim, point] /= shepard_coefficient[point] + end + + return cache +end + +function divide_by_shepard_coefficient!(field::AbstractArray{T, 3}, shepard_coefficient, + point) + for j in axes(field, 2), i in axes(field, 1) + field[i, j, point] /= shepard_coefficient[point] + end + + return field +end From fa9bad82e0c03f19e202aee4f8b0eb7fc2075872 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Tue, 3 Jun 2025 11:41:24 +0200 Subject: [PATCH 08/17] fix --- src/general/interpolation.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl index cf54d9906c..fb0b6551a6 100644 --- a/src/general/interpolation.jl +++ b/src/general/interpolation.jl @@ -688,7 +688,7 @@ function divide_by_shepard_coefficient!(field::AbstractVector, shepard_coefficie end function divide_by_shepard_coefficient!(field::AbstractArray{T, 2}, shepard_coefficient, - point) + point) where {T} for dim in axes(field, 1) field[dim, point] /= shepard_coefficient[point] end @@ -697,7 +697,7 @@ function divide_by_shepard_coefficient!(field::AbstractArray{T, 2}, shepard_coef end function divide_by_shepard_coefficient!(field::AbstractArray{T, 3}, shepard_coefficient, - point) + point) where {T} for j in axes(field, 2), i in axes(field, 1) field[i, j, point] /= shepard_coefficient[point] end From c39e0be6e3a436981752793699891f9ca574bb61 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Tue, 3 Jun 2025 11:46:44 +0200 Subject: [PATCH 09/17] fix again --- src/general/interpolation.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl index fb0b6551a6..ff013645d9 100644 --- a/src/general/interpolation.jl +++ b/src/general/interpolation.jl @@ -665,7 +665,7 @@ function set_nan!(field::AbstractVector, point) return field end -function set_nan!(field::AbstractArray{T, 2}, point) +function set_nan!(field::AbstractArray{T, 2}, point) where {T} for dim in axes(field, 1) field[dim, point] = NaN end @@ -673,7 +673,7 @@ function set_nan!(field::AbstractArray{T, 2}, point) return field end -function set_nan!(field::AbstractArray{T, 3}, point) +function set_nan!(field::AbstractArray{T, 3}, point) where {T} for j in axes(field, 2), i in axes(field, 1) field[i, j, point] = NaN end From e59c21a734c13564f4fd4a605fc7fc4c3a935295 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Tue, 3 Jun 2025 14:50:22 +0200 Subject: [PATCH 10/17] fix tests --- src/general/interpolation.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl index ff013645d9..ea8953130d 100644 --- a/src/general/interpolation.jl +++ b/src/general/interpolation.jl @@ -693,7 +693,7 @@ function divide_by_shepard_coefficient!(field::AbstractArray{T, 2}, shepard_coef field[dim, point] /= shepard_coefficient[point] end - return cache + return field end function divide_by_shepard_coefficient!(field::AbstractArray{T, 3}, shepard_coefficient, From 5a3d5fc06f5a837b90a7ee53b6ab3bebeaeeb8ca Mon Sep 17 00:00:00 2001 From: LasNikas Date: Tue, 3 Jun 2025 19:37:43 +0200 Subject: [PATCH 11/17] fix gpu test --- test/examples/gpu.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/examples/gpu.jl b/test/examples/gpu.jl index 4c5d11e1b7..66f0abba4b 100644 --- a/test/examples/gpu.jl +++ b/test/examples/gpu.jl @@ -423,8 +423,8 @@ end end_point = [position_x, tank_size[2]] result = interpolate_line(start_point, end_point, n_interpolation_points, - semi, fluid_system, sol) - @test isapprocx(plane.computed_density, + semi_fullgrid, fluid_system, sol) + @test isapprox(plane.computed_density, [NaN, 1051.550381846236, 1058.840859422405, @@ -436,7 +436,7 @@ end 693.8269492612843, NaN]) - @test isapporx(result.density, + @test isapprox(result.density, [NaN, 1066.8978311276544, 1058.4834534513207, @@ -448,7 +448,7 @@ end 1003.3921419243012, NaN]) - @test isapporx(result.pressure, + @test isapprox(result.pressure, [NaN, 8198.684648008579, 6983.142465751377, From 746b23840bf9a16bd679fcc97b37a455d2596bf3 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Tue, 3 Jun 2025 19:40:53 +0200 Subject: [PATCH 12/17] apply formatter --- test/examples/gpu.jl | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/test/examples/gpu.jl b/test/examples/gpu.jl index 66f0abba4b..060e61c520 100644 --- a/test/examples/gpu.jl +++ b/test/examples/gpu.jl @@ -424,17 +424,18 @@ end result = interpolate_line(start_point, end_point, n_interpolation_points, semi_fullgrid, fluid_system, sol) + @test isapprox(plane.computed_density, - [NaN, - 1051.550381846236, - 1058.840859422405, - 1054.6636640426582, - 1040.437243621303, - 1031.95170017119, - 1023.5350903161437, - 1009.1916142262469, - 693.8269492612843, - NaN]) + [NaN, + 1051.550381846236, + 1058.840859422405, + 1054.6636640426582, + 1040.437243621303, + 1031.95170017119, + 1023.5350903161437, + 1009.1916142262469, + 693.8269492612843, + NaN]) @test isapprox(result.density, [NaN, From 9094341d4759b89f1bd9e2a56c47f26f71c3ddde Mon Sep 17 00:00:00 2001 From: LasNikas Date: Tue, 3 Jun 2025 20:21:04 +0200 Subject: [PATCH 13/17] fix --- test/examples/gpu.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/examples/gpu.jl b/test/examples/gpu.jl index 060e61c520..9515c0a6a6 100644 --- a/test/examples/gpu.jl +++ b/test/examples/gpu.jl @@ -423,7 +423,7 @@ end end_point = [position_x, tank_size[2]] result = interpolate_line(start_point, end_point, n_interpolation_points, - semi_fullgrid, fluid_system, sol) + semi_fullgrid, semi_fullgrid.systems[1], sol) @test isapprox(plane.computed_density, [NaN, From 83727badd1000c3e119826e808425008a63b1a76 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Wed, 4 Jun 2025 10:58:00 +0200 Subject: [PATCH 14/17] fix tests --- src/general/interpolation.jl | 8 +++- test/examples/gpu.jl | 77 ++++++++++++++++++------------------ 2 files changed, 45 insertions(+), 40 deletions(-) diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl index ea8953130d..395dd4cac6 100644 --- a/src/general/interpolation.jl +++ b/src/general/interpolation.jl @@ -393,7 +393,13 @@ function interpolate_line(start, end_, n_points, semi, ref_system, v_ode, u_ode; # Convert to coordinate matrix points_coords_ = collect(reinterpret(reshape, eltype(start_svector), points_coords)) - return interpolate_points(points_coords_, semi, ref_system, v_ode, u_ode; + if semi.parallelization_backend isa KernelAbstractions.Backend + points_coords_new = Adapt.adapt(semi.parallelization_backend, points_coords_) + else + points_coords_new = points_coords_ + end + + return interpolate_points(points_coords_new, semi, ref_system, v_ode, u_ode; smoothing_length=smoothing_length, cut_off_bnd=cut_off_bnd, clip_negative_pressure) end diff --git a/test/examples/gpu.jl b/test/examples/gpu.jl index 9515c0a6a6..79f6e3fed8 100644 --- a/test/examples/gpu.jl +++ b/test/examples/gpu.jl @@ -404,8 +404,7 @@ end # Neighborhood search with `FullGridCellList` for GPU compatibility min_corner = minimum(tank.boundary.coordinates, dims=2) max_corner = maximum(tank.boundary.coordinates, dims=2) - cell_list = FullGridCellList(; min_corner, max_corner, - max_points_per_cell=500) + cell_list = FullGridCellList(; min_corner, max_corner, max_points_per_cell=500) semi_fullgrid = Semidiscretization(fluid_system, boundary_system, neighborhood_search=GridNeighborhoodSearch{2}(; cell_list), @@ -416,6 +415,9 @@ end "fluid", "hydrostatic_water_column_2d.jl"); semi=semi_fullgrid, tspan=(0.0f0, 0.1f0)) + semi_new = TrixiParticles.Adapt.adapt(semi_fullgrid.parallelization_backend, + sol.prob.p) + # Interpolation parameters position_x = tank_size[1] / 2 n_interpolation_points = 10 @@ -423,42 +425,39 @@ end end_point = [position_x, tank_size[2]] result = interpolate_line(start_point, end_point, n_interpolation_points, - semi_fullgrid, semi_fullgrid.systems[1], sol) - - @test isapprox(plane.computed_density, - [NaN, - 1051.550381846236, - 1058.840859422405, - 1054.6636640426582, - 1040.437243621303, - 1031.95170017119, - 1023.5350903161437, - 1009.1916142262469, - 693.8269492612843, - NaN]) - - @test isapprox(result.density, - [NaN, - 1066.8978311276544, - 1058.4834534513207, - 1049.7765132420302, - 1040.2795841900875, - 1030.331702082792, - 1020.2045843019229, - 1010.3658109398008, - 1003.3921419243012, - NaN]) - - @test isapprox(result.pressure, - [NaN, - 8198.684648008579, - 6983.142465751377, - 5788.908403992583, - 4551.910032518114, - 3327.0608294382264, - 2149.986239483061, - 1071.880176298758, - 342.81444988598906, - NaN]) + semi_new, semi_new.systems[1], sol; cut_off_bnd=false) + + @test isapprox(result.computed_density[1:(end - 1)], # Exclude last NaN + Float32[62.50176, + 1053.805, + 1061.2959, + 1055.8348, + 1043.9069, + 1038.2051, + 1033.1708, + 1014.2249, + 672.61566]) + + @test isapprox(result.density[1:(end - 1)], # Exclude last NaN + Float32[1078.3738, + 1070.8535, + 1061.2003, + 1052.4126, + 1044.5074, + 1037.0444, + 1028.4813, + 1014.7941, + 1003.6117]) + + @test isapprox(result.pressure[1:(end - 1)], # Exclude last NaN + Float32[9940.595, + 8791.842, + 7368.837, + 6143.6562, + 5093.711, + 4143.313, + 3106.1575, + 1552.1078, + 366.71414]) end end From 82b6b461ac1593367ee19c85d8da46a46b4315f8 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Wed, 4 Jun 2025 22:31:16 +0200 Subject: [PATCH 15/17] add plane interpolation --- src/general/interpolation.jl | 25 +++++----- test/examples/gpu.jl | 90 ++++++++++++++++++------------------ 2 files changed, 59 insertions(+), 56 deletions(-) diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl index 395dd4cac6..e1e44c2758 100644 --- a/src/general/interpolation.jl +++ b/src/general/interpolation.jl @@ -393,13 +393,7 @@ function interpolate_line(start, end_, n_points, semi, ref_system, v_ode, u_ode; # Convert to coordinate matrix points_coords_ = collect(reinterpret(reshape, eltype(start_svector), points_coords)) - if semi.parallelization_backend isa KernelAbstractions.Backend - points_coords_new = Adapt.adapt(semi.parallelization_backend, points_coords_) - else - points_coords_new = points_coords_ - end - - return interpolate_points(points_coords_new, semi, ref_system, v_ode, u_ode; + return interpolate_points(points_coords_, semi, ref_system, v_ode, u_ode; smoothing_length=smoothing_length, cut_off_bnd=cut_off_bnd, clip_negative_pressure) end @@ -510,12 +504,18 @@ function process_neighborhood_searches(semi, u_ode, ref_system, smoothing_length end # Interpolate points with given neighborhood searches -@inline function interpolate_points(point_coords, semi, ref_system, v_ode, u_ode, +@inline function interpolate_points(point_coords_, semi, ref_system, v_ode, u_ode, neighborhood_searches; smoothing_length=initial_smoothing_length(ref_system), cut_off_bnd=true, clip_negative_pressure=false) (; parallelization_backend) = semi + if semi.parallelization_backend isa KernelAbstractions.Backend + point_coords = Adapt.adapt(semi.parallelization_backend, point_coords_) + else + point_coords = point_coords_ + end + n_points = size(point_coords, 2) ELTYPE = eltype(point_coords) computed_density = allocate(semi.parallelization_backend, ELTYPE, n_points) @@ -576,11 +576,13 @@ end computed_density[point] = NaN neighbor_count[point] = 0 + # We need to convert the `NamedTuple` to a `Tuple` for GPU compatibility foreach(Tuple(cache)) do field set_nan!(field, point) end else - # Normalize all quantities by the shepard coefficient + # Normalize all quantities by the shepard coefficient. + # We need to convert the `NamedTuple` to a `Tuple` for GPU compatibility. foreach(Tuple(cache)) do field divide_by_shepard_coefficient!(field, shepard_coefficient, point) end @@ -671,7 +673,7 @@ function set_nan!(field::AbstractVector, point) return field end -function set_nan!(field::AbstractArray{T, 2}, point) where {T} +function set_nan!(field::AbstractMatrix, point) for dim in axes(field, 1) field[dim, point] = NaN end @@ -693,8 +695,7 @@ function divide_by_shepard_coefficient!(field::AbstractVector, shepard_coefficie return field end -function divide_by_shepard_coefficient!(field::AbstractArray{T, 2}, shepard_coefficient, - point) where {T} +function divide_by_shepard_coefficient!(field::AbstractMatrix, shepard_coefficient, point) for dim in axes(field, 1) field[dim, point] /= shepard_coefficient[point] end diff --git a/test/examples/gpu.jl b/test/examples/gpu.jl index 79f6e3fed8..1db23eaf9a 100644 --- a/test/examples/gpu.jl +++ b/test/examples/gpu.jl @@ -415,49 +415,51 @@ end "fluid", "hydrostatic_water_column_2d.jl"); semi=semi_fullgrid, tspan=(0.0f0, 0.1f0)) - semi_new = TrixiParticles.Adapt.adapt(semi_fullgrid.parallelization_backend, - sol.prob.p) - - # Interpolation parameters - position_x = tank_size[1] / 2 - n_interpolation_points = 10 - start_point = [position_x, -fluid_particle_spacing] - end_point = [position_x, tank_size[2]] - - result = interpolate_line(start_point, end_point, n_interpolation_points, - semi_new, semi_new.systems[1], sol; cut_off_bnd=false) - - @test isapprox(result.computed_density[1:(end - 1)], # Exclude last NaN - Float32[62.50176, - 1053.805, - 1061.2959, - 1055.8348, - 1043.9069, - 1038.2051, - 1033.1708, - 1014.2249, - 672.61566]) - - @test isapprox(result.density[1:(end - 1)], # Exclude last NaN - Float32[1078.3738, - 1070.8535, - 1061.2003, - 1052.4126, - 1044.5074, - 1037.0444, - 1028.4813, - 1014.7941, - 1003.6117]) - - @test isapprox(result.pressure[1:(end - 1)], # Exclude last NaN - Float32[9940.595, - 8791.842, - 7368.837, - 6143.6562, - 5093.711, - 4143.313, - 3106.1575, - 1552.1078, - 366.71414]) + semi_new = sol.prob.p + + @testset verbose=true "Line" begin + # Interpolation parameters + position_x = tank_size[1] / 2 + n_interpolation_points = 10 + start_point = [position_x, -fluid_particle_spacing] + end_point = [position_x, tank_size[2]] + + result = interpolate_line(start_point, end_point, n_interpolation_points, + semi_new, semi_new.systems[1], sol; cut_off_bnd=false) + + @test isapprox(result.computed_density[1:(end - 1)], # Exclude last NaN + Float32[62.50176, 1053.805, 1061.2959, 1055.8348, 1043.9069, + 1038.2051, 1033.1708, 1014.2249, 672.61566]) + + @test isapprox(result.density[1:(end - 1)], # Exclude last NaN + Float32[1078.3738, 1070.8535, 1061.2003, 1052.4126, 1044.5074, + 1037.0444, 1028.4813, 1014.7941, 1003.6117]) + + @test isapprox(result.pressure[1:(end - 1)], # Exclude last NaN + Float32[9940.595, 8791.842, 7368.837, 6143.6562, 5093.711, + 4143.313, 3106.1575, 1552.1078, 366.71414]) + end + + @testset verbose=true "Plane" begin + interpolation_start = [0.0, 0.0] + interpolation_end = [1.0, 1.0] + resolution = 0.4f0 + + result = interpolate_plane_2d(interpolation_start, interpolation_end, + resolution, semi_new, semi_new.systems[1], sol; + cut_off_bnd=false) + + @test isapprox(result.computed_density, + Float32[282.85278, 554.32996, 551.2026, 529.11, 1049.9423, + 1049.9451, 504.8472, 1014.6541, 1013.62933]) + + @test isapprox(result.density, + Float32[1074.302, 1077.213, 1073.9844, 1044.9037, 1046.0576, + 1044.6786, 1008.5474, 1010.59863, 1009.51215]) + + @test isapprox(result.pressure, + Float32[9310.613, 9762.787, 9262.167, 5144.3604, 5295.3286, + 5115.5, 880.0212, 1098.6537, 982.35785]) + end end end From fab56c31c7c2677e87e33ba11faff89d5a0c8df6 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Thu, 5 Jun 2025 08:17:19 +0200 Subject: [PATCH 16/17] use dam break for solution --- test/examples/gpu.jl | 118 ++++++++++++++++++++----------------------- 1 file changed, 56 insertions(+), 62 deletions(-) diff --git a/test/examples/gpu.jl b/test/examples/gpu.jl index 1db23eaf9a..03fb2377d2 100644 --- a/test/examples/gpu.jl +++ b/test/examples/gpu.jl @@ -391,75 +391,69 @@ end @test backend == Main.parallelization_backend end end -end -@testset verbose=true "Postprocessing $TRIXIPARTICLES_TEST_" begin - @testset verbose=true "Interpolation" begin - # Import variables into scope - trixi_include_changeprecision(Float32, @__MODULE__, - joinpath(examples_dir(), "fluid", - "hydrostatic_water_column_2d.jl"), - sol=nothing, ode=nothing) - - # Neighborhood search with `FullGridCellList` for GPU compatibility - min_corner = minimum(tank.boundary.coordinates, dims=2) - max_corner = maximum(tank.boundary.coordinates, dims=2) - cell_list = FullGridCellList(; min_corner, max_corner, max_points_per_cell=500) - semi_fullgrid = Semidiscretization(fluid_system, boundary_system, - neighborhood_search=GridNeighborhoodSearch{2}(; - cell_list), - parallelization_backend=Main.parallelization_backend) - - trixi_include_changeprecision(Float32, @__MODULE__, - joinpath(examples_dir(), - "fluid", "hydrostatic_water_column_2d.jl"); - semi=semi_fullgrid, tspan=(0.0f0, 0.1f0)) - - semi_new = sol.prob.p - - @testset verbose=true "Line" begin - # Interpolation parameters - position_x = tank_size[1] / 2 - n_interpolation_points = 10 - start_point = [position_x, -fluid_particle_spacing] - end_point = [position_x, tank_size[2]] - - result = interpolate_line(start_point, end_point, n_interpolation_points, - semi_new, semi_new.systems[1], sol; cut_off_bnd=false) - - @test isapprox(result.computed_density[1:(end - 1)], # Exclude last NaN - Float32[62.50176, 1053.805, 1061.2959, 1055.8348, 1043.9069, - 1038.2051, 1033.1708, 1014.2249, 672.61566]) - - @test isapprox(result.density[1:(end - 1)], # Exclude last NaN - Float32[1078.3738, 1070.8535, 1061.2003, 1052.4126, 1044.5074, - 1037.0444, 1028.4813, 1014.7941, 1003.6117]) - - @test isapprox(result.pressure[1:(end - 1)], # Exclude last NaN - Float32[9940.595, 8791.842, 7368.837, 6143.6562, 5093.711, - 4143.313, 3106.1575, 1552.1078, 366.71414]) - end + @testset verbose=true "Postprocessing $TRIXIPARTICLES_TEST_" begin + @testset verbose=true "Interpolation" begin + # Run the dam break example to get a solution + trixi_include_changeprecision(Float32, @__MODULE__, + joinpath(examples_dir(), "fluid", + "dam_break_2d_gpu.jl"); + fluid_particle_spacing=0.05f0, + tspan=(0.0f0, 0.01f0), + parallelization_backend=Main.parallelization_backend) - @testset verbose=true "Plane" begin - interpolation_start = [0.0, 0.0] - interpolation_end = [1.0, 1.0] - resolution = 0.4f0 + semi_new = sol.prob.p - result = interpolate_plane_2d(interpolation_start, interpolation_end, - resolution, semi_new, semi_new.systems[1], sol; + @testset verbose=true "Line" begin + # Interpolation parameters + n_interpolation_points = 10 + start_point = Float32[0.5, 0.0] + end_point = Float32[0.5, 0.5] + + result = interpolate_line(start_point, end_point, n_interpolation_points, + semi_new, semi_new.systems[1], sol; cut_off_bnd=false) - @test isapprox(result.computed_density, - Float32[282.85278, 554.32996, 551.2026, 529.11, 1049.9423, - 1049.9451, 504.8472, 1014.6541, 1013.62933]) + @test isapprox(result.computed_density, + Float32[500.33255, 893.09766, 997.7032, 1001.14355, 1001.234, + 1001.0098, 1000.4352, 999.7572, 999.1139, 989.6319]) + + @test isapprox(result.density, + Float32[1002.3152, 1002.19653, 1001.99915, 1001.7685, + 1001.5382, + 1001.3093, 1001.0836, 1000.8649, 1000.635, + 1000.4053]) - @test isapprox(result.density, - Float32[1074.302, 1077.213, 1073.9844, 1044.9037, 1046.0576, - 1044.6786, 1008.5474, 1010.59863, 1009.51215]) + @test isapprox(result.pressure, + Float32[5450.902, 5171.2856, 4706.551, 4163.9185, 3621.5042, + 3082.6948, 2551.5725, 2036.1208, 1494.8608, + 954.14355]) + end - @test isapprox(result.pressure, - Float32[9310.613, 9762.787, 9262.167, 5144.3604, 5295.3286, - 5115.5, 880.0212, 1098.6537, 982.35785]) + @testset verbose=true "Plane" begin + interpolation_start = Float32[0.0, 0.0] + interpolation_end = Float32[1.0, 1.0] + resolution = 0.4f0 + + result = interpolate_plane_2d(interpolation_start, interpolation_end, + resolution, semi_new, semi_new.systems[1], + sol; + cut_off_bnd=false) + + @test isapprox(result.computed_density, + Float32[250.18625, 500.34482, 499.77225, 254.3632, 499.58026, + 999.1413, 998.6351, 503.0122]) + + @test isapprox(result.density, + Float32[1002.34467, 1002.3365, 1001.5021, 999.7109, + 1000.84863, + 1000.8373, 1000.3423, 1000.20734]) + + @test isapprox(result.pressure, + Float32[5520.0513, 5501.1846, 3536.2256, -680.5194, + 1997.7814, + 1971.0717, 805.8584, 488.4068]) + end end end end From cc333b4f537565079e3874f791aa9e488bf21e5b Mon Sep 17 00:00:00 2001 From: LasNikas Date: Thu, 5 Jun 2025 08:23:44 +0200 Subject: [PATCH 17/17] revert CPU transfer --- src/general/interpolation.jl | 6 ++---- test/examples/gpu.jl | 12 ++++++------ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl index e1e44c2758..9b0ac40d7f 100644 --- a/src/general/interpolation.jl +++ b/src/general/interpolation.jl @@ -589,10 +589,8 @@ end end end - cache_ = map(x -> Array(x), cache) - - return (; computed_density=Array(computed_density), point_coords=Array(point_coords), - neighbor_count=Array(neighbor_count), cache_...) + return (; computed_density=computed_density, point_coords=point_coords, + neighbor_count=neighbor_count, cache...) end @inline function create_cache_interpolation(ref_system::FluidSystem, n_points, semi) diff --git a/test/examples/gpu.jl b/test/examples/gpu.jl index 03fb2377d2..9d551a5273 100644 --- a/test/examples/gpu.jl +++ b/test/examples/gpu.jl @@ -414,17 +414,17 @@ end semi_new, semi_new.systems[1], sol; cut_off_bnd=false) - @test isapprox(result.computed_density, + @test isapprox(Array(result.computed_density), Float32[500.33255, 893.09766, 997.7032, 1001.14355, 1001.234, 1001.0098, 1000.4352, 999.7572, 999.1139, 989.6319]) - @test isapprox(result.density, + @test isapprox(Array(result.density), Float32[1002.3152, 1002.19653, 1001.99915, 1001.7685, 1001.5382, 1001.3093, 1001.0836, 1000.8649, 1000.635, 1000.4053]) - @test isapprox(result.pressure, + @test isapprox(Array(result.pressure), Float32[5450.902, 5171.2856, 4706.551, 4163.9185, 3621.5042, 3082.6948, 2551.5725, 2036.1208, 1494.8608, 954.14355]) @@ -440,16 +440,16 @@ end sol; cut_off_bnd=false) - @test isapprox(result.computed_density, + @test isapprox(Array(result.computed_density), Float32[250.18625, 500.34482, 499.77225, 254.3632, 499.58026, 999.1413, 998.6351, 503.0122]) - @test isapprox(result.density, + @test isapprox(Array(result.density), Float32[1002.34467, 1002.3365, 1001.5021, 999.7109, 1000.84863, 1000.8373, 1000.3423, 1000.20734]) - @test isapprox(result.pressure, + @test isapprox(Array(result.pressure), Float32[5520.0513, 5501.1846, 3536.2256, -680.5194, 1997.7814, 1971.0717, 805.8584, 488.4068])