trixi-framework · svchb · Jun 5, 2025 · May 21, 2025 · May 22, 2025 · May 22, 2025
diff --git a/src/general/gpu.jl b/src/general/gpu.jl
@@ -29,3 +29,11 @@ end
 @inline function PointNeighbors.parallel_foreach(f, iterator, semi::Semidiscretization)
     PointNeighbors.parallel_foreach(f, iterator, semi.parallelization_backend)
 end
+
+function allocate(backend::KernelAbstractions.Backend, ELTYPE, size)
+    return KernelAbstractions.allocate(backend, ELTYPE, size)
+end
+
+function allocate(backend, ELTYPE, size)
+    return Array{ELTYPE, length(size)}(undef, size)
+end
diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl
@@ -504,20 +504,31 @@ function process_neighborhood_searches(semi, u_ode, ref_system, smoothing_length
 end
 
 # Interpolate points with given neighborhood searches
-@inline function interpolate_points(point_coords, semi, ref_system, v_ode, u_ode,
+@inline function interpolate_points(point_coords_, semi, ref_system, v_ode, u_ode,
                                     neighborhood_searches;
                                     smoothing_length=initial_smoothing_length(ref_system),
                                     cut_off_bnd=true, clip_negative_pressure=false)
     (; parallelization_backend) = semi
 
+    if semi.parallelization_backend isa KernelAbstractions.Backend
+        point_coords = Adapt.adapt(semi.parallelization_backend, point_coords_)
+    else
+        point_coords = point_coords_
+    end
+
     n_points = size(point_coords, 2)
     ELTYPE = eltype(point_coords)
-    computed_density = zeros(ELTYPE, n_points)
-    other_density = zeros(ELTYPE, n_points)
-    neighbor_count = zeros(Int, n_points)
-    shepard_coefficient = zeros(ELTYPE, n_points)
+    computed_density = allocate(semi.parallelization_backend, ELTYPE, n_points)
+    other_density = allocate(semi.parallelization_backend, ELTYPE, n_points)
+    shepard_coefficient = allocate(semi.parallelization_backend, ELTYPE, n_points)
+    neighbor_count = allocate(semi.parallelization_backend, Int, n_points)
 
-    cache = create_cache_interpolation(ref_system, n_points)
+    set_zero!(computed_density)
+    set_zero!(other_density)
+    set_zero!(shepard_coefficient)
+    set_zero!(neighbor_count)
+
+    cache = create_cache_interpolation(ref_system, n_points, semi)
 
     ref_id = system_indices(ref_system, semi)
     ref_smoothing_kernel = ref_system.smoothing_kernel
@@ -565,42 +576,52 @@ end
             computed_density[point] = NaN
             neighbor_count[point] = 0
 
-            foreach(cache) do field
-                if field isa AbstractVector
-                    field[point] = NaN
-                else
-                    field[:, point] .= NaN
-                end
+            # We need to convert the `NamedTuple` to a `Tuple` for GPU compatibility
+            foreach(Tuple(cache)) do field
+                set_nan!(field, point)
             end
         else
-            # Normalize all quantities by the shepard coefficient
-            foreach(cache) do field
-                if field isa AbstractVector
-                    field[point] /= shepard_coefficient[point]
-                else
-                    field[:, point] ./= shepard_coefficient[point]
-                end
+            # Normalize all quantities by the shepard coefficient.
+            # We need to convert the `NamedTuple` to a `Tuple` for GPU compatibility.
+            foreach(Tuple(cache)) do field
+                divide_by_shepard_coefficient!(field, shepard_coefficient, point)
             end
         end
     end
 
-    return (; computed_density=computed_density, neighbor_count, point_coords, cache...)
+    return (; computed_density=computed_density, point_coords=point_coords,
+            neighbor_count=neighbor_count, cache...)
 end
 
-@inline function create_cache_interpolation(ref_system::FluidSystem, n_points)
-    velocity = zeros(eltype(ref_system), ndims(ref_system), n_points)
-    pressure = zeros(eltype(ref_system), n_points)
-    density = zeros(eltype(ref_system), n_points)
+@inline function create_cache_interpolation(ref_system::FluidSystem, n_points, semi)
+    (; parallelization_backend) = semi
+
+    velocity = allocate(parallelization_backend, eltype(ref_system),
+                        (ndims(ref_system), n_points))
+    pressure = allocate(parallelization_backend, eltype(ref_system), n_points)
+    density = allocate(parallelization_backend, eltype(ref_system), n_points)
+
+    set_zero!(velocity)
+    set_zero!(pressure)
+    set_zero!(density)
 
     return (; velocity, pressure, density)
 end
 
-@inline function create_cache_interpolation(ref_system::SolidSystem, n_points)
-    velocity = zeros(eltype(ref_system), ndims(ref_system), n_points)
-    jacobian = zeros(eltype(ref_system), n_points)
-    von_mises_stress = zeros(eltype(ref_system), n_points)
-    cauchy_stress = zeros(eltype(ref_system), ndims(ref_system), ndims(ref_system),
-                          n_points)
+@inline function create_cache_interpolation(ref_system::SolidSystem, n_points, semi)
+    (; parallelization_backend) = semi
+
+    velocity = allocate(parallelization_backend, eltype(ref_system),
+                        (ndims(ref_system), n_points))
+    jacobian = allocate(parallelization_backend, eltype(ref_system), n_points)
+    von_mises_stress = allocate(parallelization_backend, eltype(ref_system), n_points)
+    cauchy_stress = allocate(parallelization_backend, eltype(ref_system),
+                             (ndims(ref_system), ndims(ref_system), n_points))
+
+    set_zero!(velocity)
+    set_zero!(jacobian)
+    set_zero!(von_mises_stress)
+    set_zero!(cauchy_stress)
 
     return (; velocity, jacobian, von_mises_stress, cauchy_stress)
 end
@@ -643,3 +664,48 @@ end
 
     return cache
 end
+
+function set_nan!(field::AbstractVector, point)
+    field[point] = NaN
+
+    return field
+end
+
+function set_nan!(field::AbstractMatrix, point)
+    for dim in axes(field, 1)
+        field[dim, point] = NaN
+    end
+
+    return field
+end
+
+function set_nan!(field::AbstractArray{T, 3}, point) where {T}
+    for j in axes(field, 2), i in axes(field, 1)
+        field[i, j, point] = NaN
+    end
+
+    return field
+end
+
+function divide_by_shepard_coefficient!(field::AbstractVector, shepard_coefficient, point)
+    field[point] /= shepard_coefficient[point]
+
+    return field
+end
+
+function divide_by_shepard_coefficient!(field::AbstractMatrix, shepard_coefficient, point)
+    for dim in axes(field, 1)
+        field[dim, point] /= shepard_coefficient[point]
+    end
+
+    return field
+end
+
+function divide_by_shepard_coefficient!(field::AbstractArray{T, 3}, shepard_coefficient,
+                                        point) where {T}
+    for j in axes(field, 2), i in axes(field, 1)
+        field[i, j, point] /= shepard_coefficient[point]
+    end
+
+    return field
+end
diff --git a/src/general/semidiscretization.jl b/src/general/semidiscretization.jl
@@ -282,19 +282,18 @@ function semidiscretize(semi, tspan; reset_threads=true)
     sizes_u = (u_nvariables(system) * n_moving_particles(system) for system in systems)
     sizes_v = (v_nvariables(system) * n_moving_particles(system) for system in systems)
 
+    # Use either the specified backend, e.g., `CUDABackend` or `MetalBackend` or
+    # use CPU vectors for all CPU backends.
+    u0_ode_ = allocate(semi.parallelization_backend, ELTYPE, sum(sizes_u))
+    v0_ode_ = allocate(semi.parallelization_backend, ELTYPE, sum(sizes_v))
+
     if semi.parallelization_backend isa KernelAbstractions.Backend
-        # Use the specified backend, e.g., `CUDABackend` or `MetalBackend`
-        u0_ode = KernelAbstractions.allocate(semi.parallelization_backend, ELTYPE,
-                                             sum(sizes_u))
-        v0_ode = KernelAbstractions.allocate(semi.parallelization_backend, ELTYPE,
-                                             sum(sizes_v))
+        u0_ode = u0_ode_
+        v0_ode = v0_ode_
     else
-        # Use CPU vectors for all CPU backends.
-        # These are wrapped in `ThreadedBroadcastArray`s
+        # CPU vectors are wrapped in `ThreadedBroadcastArray`s
         # to make broadcasting (which is done by OrdinaryDiffEq.jl) multithreaded.
         # See https://github.com/trixi-framework/TrixiParticles.jl/pull/722 for more details.
-        u0_ode_ = Vector{ELTYPE}(undef, sum(sizes_u))
-        v0_ode_ = Vector{ELTYPE}(undef, sum(sizes_v))
         u0_ode = ThreadedBroadcastArray(u0_ode_;
                                         parallelization_backend=semi.parallelization_backend)
         v0_ode = ThreadedBroadcastArray(v0_ode_;

diff --git a/test/examples/gpu.jl b/test/examples/gpu.jl
@@ -391,4 +391,69 @@ end
             @test backend == Main.parallelization_backend
         end
     end
+
+    @testset verbose=true "Postprocessing $TRIXIPARTICLES_TEST_" begin
+        @testset verbose=true "Interpolation" begin
+            # Run the dam break example to get a solution
+            trixi_include_changeprecision(Float32, @__MODULE__,
+                                          joinpath(examples_dir(), "fluid",
+                                                   "dam_break_2d_gpu.jl");
+                                          fluid_particle_spacing=0.05f0,
+                                          tspan=(0.0f0, 0.01f0),
+                                          parallelization_backend=Main.parallelization_backend)
+
+            semi_new = sol.prob.p
+
+            @testset verbose=true "Line" begin
+                # Interpolation parameters
+                n_interpolation_points = 10
+                start_point = Float32[0.5, 0.0]
+                end_point = Float32[0.5, 0.5]
+
+                result = interpolate_line(start_point, end_point, n_interpolation_points,
+                                          semi_new, semi_new.systems[1], sol;
+                                          cut_off_bnd=false)
+
+                @test isapprox(Array(result.computed_density),
+                               Float32[500.33255, 893.09766, 997.7032, 1001.14355, 1001.234,
+                                       1001.0098, 1000.4352, 999.7572, 999.1139, 989.6319])
+
+                @test isapprox(Array(result.density),
+                               Float32[1002.3152, 1002.19653, 1001.99915, 1001.7685,
+                                       1001.5382,
+                                       1001.3093, 1001.0836, 1000.8649, 1000.635,
+                                       1000.4053])
+
+                @test isapprox(Array(result.pressure),
+                               Float32[5450.902, 5171.2856, 4706.551, 4163.9185, 3621.5042,
+                                       3082.6948, 2551.5725, 2036.1208, 1494.8608,
+                                       954.14355])
+            end
+
+            @testset verbose=true "Plane" begin
+                interpolation_start = Float32[0.0, 0.0]
+                interpolation_end = Float32[1.0, 1.0]
+                resolution = 0.4f0
+
+                result = interpolate_plane_2d(interpolation_start, interpolation_end,
+                                              resolution, semi_new, semi_new.systems[1],
+                                              sol;
+                                              cut_off_bnd=false)
+
+                @test isapprox(Array(result.computed_density),
+                               Float32[250.18625, 500.34482, 499.77225, 254.3632, 499.58026,
+                                       999.1413, 998.6351, 503.0122])
+
+                @test isapprox(Array(result.density),
+                               Float32[1002.34467, 1002.3365, 1001.5021, 999.7109,
+                                       1000.84863,
+                                       1000.8373, 1000.3423, 1000.20734])
+
+                @test isapprox(Array(result.pressure),
+                               Float32[5520.0513, 5501.1846, 3536.2256, -680.5194,
+                                       1997.7814,
+                                       1971.0717, 805.8584, 488.4068])
+            end
+        end
+    end
 end