From ef84bb0e6d00390a7fe9ff57324310056201f6cb Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Wed, 21 May 2025 14:47:21 +0200
Subject: [PATCH 01/17] adapt array in interpolation

---
 src/general/gpu.jl                |  8 ++++++++
 src/general/interpolation.jl      | 27 +++++++++++++++++----------
 src/general/semidiscretization.jl | 17 ++++++++---------
 3 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/src/general/gpu.jl b/src/general/gpu.jl
index 87b5f95aa4..9898da475d 100644
--- a/src/general/gpu.jl
+++ b/src/general/gpu.jl
@@ -29,3 +29,11 @@ end
 @inline function PointNeighbors.parallel_foreach(f, iterator, semi::Semidiscretization)
     PointNeighbors.parallel_foreach(f, iterator, semi.parallelization_backend)
 end
+
+function allocate(backend::KernelAbstractions.Backend, ELTYPE, size)
+    return KernelAbstractions.allocate(backend, ELTYPE, size)
+end
+
+function allocate(backend, ELTYPE, size)
+    return Array{ELTYPE, length(size)}(undef, size)
+end
diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl
index a17cddddba..8ab2969cad 100644
--- a/src/general/interpolation.jl
+++ b/src/general/interpolation.jl
@@ -512,12 +512,12 @@ end
 
     n_points = size(point_coords, 2)
     ELTYPE = eltype(point_coords)
-    computed_density = zeros(ELTYPE, n_points)
-    other_density = zeros(ELTYPE, n_points)
-    neighbor_count = zeros(Int, n_points)
-    shepard_coefficient = zeros(ELTYPE, n_points)
+    computed_density = allocate(semi.parallelization_backend, ELTYPE, n_points)
+    other_density = allocate(semi.parallelization_backend, ELTYPE, n_points)
+    shepard_coefficient = allocate(semi.parallelization_backend, ELTYPE, n_points)
+    neighbor_count = allocate(semi.parallelization_backend, Int, n_points)
 
-    cache = create_cache_interpolation(ref_system, n_points)
+    cache = create_cache_interpolation(ref_system, n_points, semi)
 
     ref_id = system_indices(ref_system, semi)
     ref_smoothing_kernel = ref_system.smoothing_kernel
@@ -587,15 +587,22 @@ end
     return (; computed_density=computed_density, neighbor_count, point_coords, cache...)
 end
 
-@inline function create_cache_interpolation(ref_system::FluidSystem, n_points)
-    velocity = zeros(eltype(ref_system), ndims(ref_system), n_points)
-    pressure = zeros(eltype(ref_system), n_points)
-    density = zeros(eltype(ref_system), n_points)
+@inline function create_cache_interpolation(ref_system::FluidSystem, n_points, semi)
+    velocity_ = zeros(eltype(ref_system), ndims(ref_system), n_points)
+    pressure_ = zeros(eltype(ref_system), n_points)
+
+    if semi.parallelization_backend isa KernelAbstractions.Backend
+        velocity = Adapt.adapt(semi.parallelization_backend, velocity_)
+        pressure = Adapt.adapt(semi.parallelization_backend, pressure_)
+    else
+        velocity = velocity_
+        pressure = pressure_
+    end
 
     return (; velocity, pressure, density)
 end
 
-@inline function create_cache_interpolation(ref_system::SolidSystem, n_points)
+@inline function create_cache_interpolation(ref_system::SolidSystem, n_points, semi)
     velocity = zeros(eltype(ref_system), ndims(ref_system), n_points)
     jacobian = zeros(eltype(ref_system), n_points)
     von_mises_stress = zeros(eltype(ref_system), n_points)
diff --git a/src/general/semidiscretization.jl b/src/general/semidiscretization.jl
index 5b7eaa5302..0bc2cef57b 100644
--- a/src/general/semidiscretization.jl
+++ b/src/general/semidiscretization.jl
@@ -282,19 +282,18 @@ function semidiscretize(semi, tspan; reset_threads=true)
     sizes_u = (u_nvariables(system) * n_moving_particles(system) for system in systems)
     sizes_v = (v_nvariables(system) * n_moving_particles(system) for system in systems)
 
+    # Use either the specified backend, e.g., `CUDABackend` or `MetalBackend` or
+    # use CPU vectors for all CPU backends.
+    u0_ode_ = allocate(semi.parallelization_backend, ELTYPE, sum(sizes_u))
+    v0_ode_ = allocate(semi.parallelization_backend, ELTYPE, sum(sizes_v))
+
     if semi.parallelization_backend isa KernelAbstractions.Backend
-        # Use the specified backend, e.g., `CUDABackend` or `MetalBackend`
-        u0_ode = KernelAbstractions.allocate(semi.parallelization_backend, ELTYPE,
-                                             sum(sizes_u))
-        v0_ode = KernelAbstractions.allocate(semi.parallelization_backend, ELTYPE,
-                                             sum(sizes_v))
+        u0_ode = u0_ode_
+        v0_ode = v0_ode_
     else
-        # Use CPU vectors for all CPU backends.
-        # These are wrapped in `ThreadedBroadcastArray`s
+        # CPU vectors are wrapped in `ThreadedBroadcastArray`s
         # to make broadcasting (which is done by OrdinaryDiffEq.jl) multithreaded.
         # See https://github.com/trixi-framework/TrixiParticles.jl/pull/722 for more details.
-        u0_ode_ = Vector{ELTYPE}(undef, sum(sizes_u))
-        v0_ode_ = Vector{ELTYPE}(undef, sum(sizes_v))
         u0_ode = ThreadedBroadcastArray(u0_ode_;
                                         parallelization_backend=semi.parallelization_backend)
         v0_ode = ThreadedBroadcastArray(v0_ode_;

From 3f2d7b8f1690c78144b959aaf8d14774019a0743 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Thu, 22 May 2025 18:36:14 +0200
Subject: [PATCH 02/17] fix

---
 src/general/interpolation.jl | 96 ++++++++++++++++++++++++++++--------
 1 file changed, 76 insertions(+), 20 deletions(-)

diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl
index 8ab2969cad..82ca3e28bc 100644
--- a/src/general/interpolation.jl
+++ b/src/general/interpolation.jl
@@ -565,49 +565,105 @@ end
             computed_density[point] = NaN
             neighbor_count[point] = 0
 
-            foreach(cache) do field
-                if field isa AbstractVector
-                    field[point] = NaN
-                else
-                    field[:, point] .= NaN
-                end
-            end
+            set_NaN!(cache, point, ref_system)
         else
             # Normalize all quantities by the shepard coefficient
-            foreach(cache) do field
-                if field isa AbstractVector
-                    field[point] /= shepard_coefficient[point]
-                else
-                    field[:, point] ./= shepard_coefficient[point]
-                end
-            end
+            divide_by_shepard_coefficient!(cache, point, shepard_coefficient,
+                                           ref_system)
         end
     end
 
-    return (; computed_density=computed_density, neighbor_count, point_coords, cache...)
+    cache_ = map(x -> Array(x), cache)
+
+    return (; computed_density=Array(computed_density), point_coords=Array(point_coords),
+            neighbor_count=Array(neighbor_count), cache_...)
+end
+
+function set_NaN!(cache, point, ref_system::FluidSystem)
+    for dim in 1:ndims(ref_system)
+        cache.velocity[dim, point] = NaN
+    end
+    cache.pressure[point] = NaN
+    cache.density[point] = NaN
+
+    return cache
+end
+
+function set_NaN!(cache, point, ref_system::SolidSystem)
+    for dim in 1:ndims(ref_system)
+        cache.velocity[dim, point] = NaN
+    end
+    cache.jacobian[point] = NaN
+    cache.von_mises_stress[point] = NaN
+
+    for j in axes(cache.cauchy_stress, 2), i in axes(cache.cauchy_stress, 1)
+        cache.cauchy_stress[i, j, point] = NaN
+    end
+
+    return cache
+end
+
+function divide_by_shepard_coefficient!(cache, point, shepard_coefficient,
+                                        ref_system::FluidSystem)
+    for dim in 1:ndims(ref_system)
+        cache.velocity[dim, point] /= shepard_coefficient[point]
+    end
+    cache.pressure[point] /= shepard_coefficient[point]
+    cache.density[point] /= shepard_coefficient[point]
+
+    return cache
+end
+
+function divide_by_shepard_coefficient!(cache, point, shepard_coefficient,
+                                        ref_system::SolidSystem)
+    for dim in 1:ndims(ref_system)
+        cache.velocity[dim, point] /= shepard_coefficient[point]
+    end
+    cache.jacobian[point] /= shepard_coefficient[point]
+    cache.von_mises_stress[point] /= shepard_coefficient[point]
+
+    for j in axes(cache.cauchy_stress, 2), i in axes(cache.cauchy_stress, 1)
+        cache.cauchy_stress[i, j, point] /= shepard_coefficient[point]
+    end
+    return cache
 end
 
 @inline function create_cache_interpolation(ref_system::FluidSystem, n_points, semi)
     velocity_ = zeros(eltype(ref_system), ndims(ref_system), n_points)
     pressure_ = zeros(eltype(ref_system), n_points)
+    density_ = zeros(eltype(ref_system), n_points)
 
     if semi.parallelization_backend isa KernelAbstractions.Backend
         velocity = Adapt.adapt(semi.parallelization_backend, velocity_)
         pressure = Adapt.adapt(semi.parallelization_backend, pressure_)
+        density = Adapt.adapt(semi.parallelization_backend, density_)
     else
         velocity = velocity_
         pressure = pressure_
+        density = density_
     end
 
     return (; velocity, pressure, density)
 end
 
 @inline function create_cache_interpolation(ref_system::SolidSystem, n_points, semi)
-    velocity = zeros(eltype(ref_system), ndims(ref_system), n_points)
-    jacobian = zeros(eltype(ref_system), n_points)
-    von_mises_stress = zeros(eltype(ref_system), n_points)
-    cauchy_stress = zeros(eltype(ref_system), ndims(ref_system), ndims(ref_system),
-                          n_points)
+    velocity_ = zeros(eltype(ref_system), ndims(ref_system), n_points)
+    jacobian_ = zeros(eltype(ref_system), n_points)
+    von_mises_stress_ = zeros(eltype(ref_system), n_points)
+    cauchy_stress_ = zeros(eltype(ref_system), ndims(ref_system), ndims(ref_system),
+                           n_points)
+
+    if semi.parallelization_backend isa KernelAbstractions.Backend
+        velocity = Adapt.adapt(semi.parallelization_backend, velocity_)
+        jacobian = Adapt.adapt(semi.parallelization_backend, jacobian_)
+        von_mises_stress = Adapt.adapt(semi.parallelization_backend, von_mises_stress_)
+        cauchy_stress = Adapt.adapt(semi.parallelization_backend, cauchy_stress_)
+    else
+        velocity = velocity_
+        jacobian = jacobian_
+        von_mises_stress = von_mises_stress_
+        cauchy_stress = cauchy_stress_
+    end
 
     return (; velocity, jacobian, von_mises_stress, cauchy_stress)
 end

From ffbbaba227fb35242cf3d84a8333e735547f7b3d Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Fri, 23 May 2025 10:20:40 +0200
Subject: [PATCH 03/17] allocate instead of adapt

---
 src/general/interpolation.jl | 53 ++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 30 deletions(-)

diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl
index 82ca3e28bc..19f22eb1c5 100644
--- a/src/general/interpolation.jl
+++ b/src/general/interpolation.jl
@@ -629,41 +629,34 @@ function divide_by_shepard_coefficient!(cache, point, shepard_coefficient,
 end
 
 @inline function create_cache_interpolation(ref_system::FluidSystem, n_points, semi)
-    velocity_ = zeros(eltype(ref_system), ndims(ref_system), n_points)
-    pressure_ = zeros(eltype(ref_system), n_points)
-    density_ = zeros(eltype(ref_system), n_points)
-
-    if semi.parallelization_backend isa KernelAbstractions.Backend
-        velocity = Adapt.adapt(semi.parallelization_backend, velocity_)
-        pressure = Adapt.adapt(semi.parallelization_backend, pressure_)
-        density = Adapt.adapt(semi.parallelization_backend, density_)
-    else
-        velocity = velocity_
-        pressure = pressure_
-        density = density_
-    end
+    (; parallelization_backend) = semi
+
+    velocity = allocate(parallelization_backend, eltype(ref_system),
+                        (ndims(ref_system), n_points))
+    pressure = allocate(parallelization_backend, eltype(ref_system), n_points)
+    density = allocate(parallelization_backend, eltype(ref_system), n_points)
+
+    set_zero!(velocity)
+    set_zero!(pressure)
+    set_zero!(density)
 
     return (; velocity, pressure, density)
 end
 
 @inline function create_cache_interpolation(ref_system::SolidSystem, n_points, semi)
-    velocity_ = zeros(eltype(ref_system), ndims(ref_system), n_points)
-    jacobian_ = zeros(eltype(ref_system), n_points)
-    von_mises_stress_ = zeros(eltype(ref_system), n_points)
-    cauchy_stress_ = zeros(eltype(ref_system), ndims(ref_system), ndims(ref_system),
-                           n_points)
-
-    if semi.parallelization_backend isa KernelAbstractions.Backend
-        velocity = Adapt.adapt(semi.parallelization_backend, velocity_)
-        jacobian = Adapt.adapt(semi.parallelization_backend, jacobian_)
-        von_mises_stress = Adapt.adapt(semi.parallelization_backend, von_mises_stress_)
-        cauchy_stress = Adapt.adapt(semi.parallelization_backend, cauchy_stress_)
-    else
-        velocity = velocity_
-        jacobian = jacobian_
-        von_mises_stress = von_mises_stress_
-        cauchy_stress = cauchy_stress_
-    end
+    (; parallelization_backend) = semi
+
+    velocity = allocate(parallelization_backend, eltype(ref_system),
+                        (ndims(ref_system), n_points))
+    jacobian = allocate(parallelization_backend, eltype(ref_system), n_points)
+    von_mises_stress = allocate(parallelization_backend, eltype(ref_system), n_points)
+    cauchy_stress = allocate(parallelization_backend, eltype(ref_system),
+                             (ndims(ref_system), ndims(ref_system), n_points))
+
+    set_zero!(velocity)
+    set_zero!(jacobian)
+    set_zero!(von_mises_stress)
+    set_zero!(cauchy_stress)
 
     return (; velocity, jacobian, von_mises_stress, cauchy_stress)
 end

From 8516db59becf20c951a5d058da9a223884f50306 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Fri, 23 May 2025 10:40:02 +0200
Subject: [PATCH 04/17] fix

---
 src/general/interpolation.jl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl
index 19f22eb1c5..d2cd6e0d40 100644
--- a/src/general/interpolation.jl
+++ b/src/general/interpolation.jl
@@ -517,6 +517,11 @@ end
     shepard_coefficient = allocate(semi.parallelization_backend, ELTYPE, n_points)
     neighbor_count = allocate(semi.parallelization_backend, Int, n_points)
 
+    set_zero!(computed_density)
+    set_zero!(other_density)
+    set_zero!(shepard_coefficient)
+    set_zero!(neighbor_count)
+
     cache = create_cache_interpolation(ref_system, n_points, semi)
 
     ref_id = system_indices(ref_system, semi)

From acceb98ece5d2a57dd227921c1cebedf592f15a5 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Fri, 23 May 2025 12:13:54 +0200
Subject: [PATCH 05/17] consistency

---
 src/general/interpolation.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl
index d2cd6e0d40..68d5bbf02c 100644
--- a/src/general/interpolation.jl
+++ b/src/general/interpolation.jl
@@ -570,7 +570,7 @@ end
             computed_density[point] = NaN
             neighbor_count[point] = 0
 
-            set_NaN!(cache, point, ref_system)
+            set_nan!(cache, point, ref_system)
         else
             # Normalize all quantities by the shepard coefficient
             divide_by_shepard_coefficient!(cache, point, shepard_coefficient,
@@ -584,7 +584,7 @@ end
             neighbor_count=Array(neighbor_count), cache_...)
 end
 
-function set_NaN!(cache, point, ref_system::FluidSystem)
+function set_nan!(cache, point, ref_system::FluidSystem)
     for dim in 1:ndims(ref_system)
         cache.velocity[dim, point] = NaN
     end
@@ -594,7 +594,7 @@ function set_NaN!(cache, point, ref_system::FluidSystem)
     return cache
 end
 
-function set_NaN!(cache, point, ref_system::SolidSystem)
+function set_nan!(cache, point, ref_system::SolidSystem)
     for dim in 1:ndims(ref_system)
         cache.velocity[dim, point] = NaN
     end

From 3371c9878b68448090bf75b92641074dfdac8eee Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Mon, 26 May 2025 10:38:13 +0200
Subject: [PATCH 06/17] add gpu tests

---
 test/examples/gpu.jl | 69 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/test/examples/gpu.jl b/test/examples/gpu.jl
index c99c67f139..2935679316 100644
--- a/test/examples/gpu.jl
+++ b/test/examples/gpu.jl
@@ -391,3 +391,72 @@ end
         end
     end
 end
+
+@testset verbose=true "Postprocessing $TRIXIPARTICLES_TEST_" begin
+    @testset verbose=true "Interpolation" begin
+        # Import variables into scope
+        trixi_include_changeprecision(Float32, @__MODULE__,
+                                      joinpath(examples_dir(), "fluid",
+                                               "hydrostatic_water_column_2d.jl"),
+                                      sol=nothing, ode=nothing)
+
+        # Neighborhood search with `FullGridCellList` for GPU compatibility
+        min_corner = minimum(tank.boundary.coordinates, dims=2)
+        max_corner = maximum(tank.boundary.coordinates, dims=2)
+        cell_list = FullGridCellList(; min_corner, max_corner,
+                                     max_points_per_cell=500)
+        semi_fullgrid = Semidiscretization(fluid_system, boundary_system,
+                                           neighborhood_search=GridNeighborhoodSearch{2}(;
+                                                                                         cell_list),
+                                           parallelization_backend=Main.parallelization_backend)
+
+        trixi_include_changeprecision(Float32, @__MODULE__,
+                                      joinpath(examples_dir(),
+                                               "fluid", "hydrostatic_water_column_2d.jl");
+                                      semi=semi_fullgrid, tspan=(0.0f0, 0.1f0))
+
+        # Interpolation parameters
+        position_x = tank_size[1] / 2
+        n_interpolation_points = 10
+        start_point = [position_x, -fluid_particle_spacing]
+        end_point = [position_x, tank_size[2]]
+
+        result = interpolate_line(start_point, end_point, n_interpolation_points,
+                                  semi, fluid_system, sol)
+        @test isapprocx(plane.computed_density,
+                        [NaN,
+                            1051.550381846236,
+                            1058.840859422405,
+                            1054.6636640426582,
+                            1040.437243621303,
+                            1031.95170017119,
+                            1023.5350903161437,
+                            1009.1916142262469,
+                            693.8269492612843,
+                            NaN])
+
+        @test isapporx(result.density,
+                       [NaN,
+                           1066.8978311276544,
+                           1058.4834534513207,
+                           1049.7765132420302,
+                           1040.2795841900875,
+                           1030.331702082792,
+                           1020.2045843019229,
+                           1010.3658109398008,
+                           1003.3921419243012,
+                           NaN])
+
+        @test isapporx(result.pressure,
+                       [NaN,
+                           8198.684648008579,
+                           6983.142465751377,
+                           5788.908403992583,
+                           4551.910032518114,
+                           3327.0608294382264,
+                           2149.986239483061,
+                           1071.880176298758,
+                           342.81444988598906,
+                           NaN])
+    end
+end

From 0b7002da55b153ed665f57a5412f05d4cc28ad5d Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Tue, 3 Jun 2025 11:30:09 +0200
Subject: [PATCH 07/17] implement suggestions

---
 src/general/interpolation.jl | 104 +++++++++++++++++------------------
 1 file changed, 52 insertions(+), 52 deletions(-)

diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl
index 68d5bbf02c..cf54d9906c 100644
--- a/src/general/interpolation.jl
+++ b/src/general/interpolation.jl
@@ -570,11 +570,14 @@ end
             computed_density[point] = NaN
             neighbor_count[point] = 0
 
-            set_nan!(cache, point, ref_system)
+            foreach(Tuple(cache)) do field
+                set_nan!(field, point)
+            end
         else
             # Normalize all quantities by the shepard coefficient
-            divide_by_shepard_coefficient!(cache, point, shepard_coefficient,
-                                           ref_system)
+            foreach(Tuple(cache)) do field
+                divide_by_shepard_coefficient!(field, shepard_coefficient, point)
+            end
         end
     end
 
@@ -584,55 +587,6 @@ end
             neighbor_count=Array(neighbor_count), cache_...)
 end
 
-function set_nan!(cache, point, ref_system::FluidSystem)
-    for dim in 1:ndims(ref_system)
-        cache.velocity[dim, point] = NaN
-    end
-    cache.pressure[point] = NaN
-    cache.density[point] = NaN
-
-    return cache
-end
-
-function set_nan!(cache, point, ref_system::SolidSystem)
-    for dim in 1:ndims(ref_system)
-        cache.velocity[dim, point] = NaN
-    end
-    cache.jacobian[point] = NaN
-    cache.von_mises_stress[point] = NaN
-
-    for j in axes(cache.cauchy_stress, 2), i in axes(cache.cauchy_stress, 1)
-        cache.cauchy_stress[i, j, point] = NaN
-    end
-
-    return cache
-end
-
-function divide_by_shepard_coefficient!(cache, point, shepard_coefficient,
-                                        ref_system::FluidSystem)
-    for dim in 1:ndims(ref_system)
-        cache.velocity[dim, point] /= shepard_coefficient[point]
-    end
-    cache.pressure[point] /= shepard_coefficient[point]
-    cache.density[point] /= shepard_coefficient[point]
-
-    return cache
-end
-
-function divide_by_shepard_coefficient!(cache, point, shepard_coefficient,
-                                        ref_system::SolidSystem)
-    for dim in 1:ndims(ref_system)
-        cache.velocity[dim, point] /= shepard_coefficient[point]
-    end
-    cache.jacobian[point] /= shepard_coefficient[point]
-    cache.von_mises_stress[point] /= shepard_coefficient[point]
-
-    for j in axes(cache.cauchy_stress, 2), i in axes(cache.cauchy_stress, 1)
-        cache.cauchy_stress[i, j, point] /= shepard_coefficient[point]
-    end
-    return cache
-end
-
 @inline function create_cache_interpolation(ref_system::FluidSystem, n_points, semi)
     (; parallelization_backend) = semi
 
@@ -704,3 +658,49 @@ end
 
     return cache
 end
+
+function set_nan!(field::AbstractVector, point)
+    field[point] = NaN
+
+    return field
+end
+
+function set_nan!(field::AbstractArray{T, 2}, point)
+    for dim in axes(field, 1)
+        field[dim, point] = NaN
+    end
+
+    return field
+end
+
+function set_nan!(field::AbstractArray{T, 3}, point)
+    for j in axes(field, 2), i in axes(field, 1)
+        field[i, j, point] = NaN
+    end
+
+    return field
+end
+
+function divide_by_shepard_coefficient!(field::AbstractVector, shepard_coefficient, point)
+    field[point] /= shepard_coefficient[point]
+
+    return field
+end
+
+function divide_by_shepard_coefficient!(field::AbstractArray{T, 2}, shepard_coefficient,
+                                        point)
+    for dim in axes(field, 1)
+        field[dim, point] /= shepard_coefficient[point]
+    end
+
+    return cache
+end
+
+function divide_by_shepard_coefficient!(field::AbstractArray{T, 3}, shepard_coefficient,
+                                        point)
+    for j in axes(field, 2), i in axes(field, 1)
+        field[i, j, point] /= shepard_coefficient[point]
+    end
+
+    return field
+end

From fa9bad82e0c03f19e202aee4f8b0eb7fc2075872 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Tue, 3 Jun 2025 11:41:24 +0200
Subject: [PATCH 08/17] fix

---
 src/general/interpolation.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl
index cf54d9906c..fb0b6551a6 100644
--- a/src/general/interpolation.jl
+++ b/src/general/interpolation.jl
@@ -688,7 +688,7 @@ function divide_by_shepard_coefficient!(field::AbstractVector, shepard_coefficie
 end
 
 function divide_by_shepard_coefficient!(field::AbstractArray{T, 2}, shepard_coefficient,
-                                        point)
+                                        point) where {T}
     for dim in axes(field, 1)
         field[dim, point] /= shepard_coefficient[point]
     end
@@ -697,7 +697,7 @@ function divide_by_shepard_coefficient!(field::AbstractArray{T, 2}, shepard_coef
 end
 
 function divide_by_shepard_coefficient!(field::AbstractArray{T, 3}, shepard_coefficient,
-                                        point)
+                                        point) where {T}
     for j in axes(field, 2), i in axes(field, 1)
         field[i, j, point] /= shepard_coefficient[point]
     end

From c39e0be6e3a436981752793699891f9ca574bb61 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Tue, 3 Jun 2025 11:46:44 +0200
Subject: [PATCH 09/17] fix again

---
 src/general/interpolation.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl
index fb0b6551a6..ff013645d9 100644
--- a/src/general/interpolation.jl
+++ b/src/general/interpolation.jl
@@ -665,7 +665,7 @@ function set_nan!(field::AbstractVector, point)
     return field
 end
 
-function set_nan!(field::AbstractArray{T, 2}, point)
+function set_nan!(field::AbstractArray{T, 2}, point) where {T}
     for dim in axes(field, 1)
         field[dim, point] = NaN
     end
@@ -673,7 +673,7 @@ function set_nan!(field::AbstractArray{T, 2}, point)
     return field
 end
 
-function set_nan!(field::AbstractArray{T, 3}, point)
+function set_nan!(field::AbstractArray{T, 3}, point) where {T}
     for j in axes(field, 2), i in axes(field, 1)
         field[i, j, point] = NaN
     end

From e59c21a734c13564f4fd4a605fc7fc4c3a935295 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Tue, 3 Jun 2025 14:50:22 +0200
Subject: [PATCH 10/17] fix tests

---
 src/general/interpolation.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl
index ff013645d9..ea8953130d 100644
--- a/src/general/interpolation.jl
+++ b/src/general/interpolation.jl
@@ -693,7 +693,7 @@ function divide_by_shepard_coefficient!(field::AbstractArray{T, 2}, shepard_coef
         field[dim, point] /= shepard_coefficient[point]
     end
 
-    return cache
+    return field
 end
 
 function divide_by_shepard_coefficient!(field::AbstractArray{T, 3}, shepard_coefficient,

From 5a3d5fc06f5a837b90a7ee53b6ab3bebeaeeb8ca Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Tue, 3 Jun 2025 19:37:43 +0200
Subject: [PATCH 11/17] fix gpu test

---
 test/examples/gpu.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/examples/gpu.jl b/test/examples/gpu.jl
index 4c5d11e1b7..66f0abba4b 100644
--- a/test/examples/gpu.jl
+++ b/test/examples/gpu.jl
@@ -423,8 +423,8 @@ end
         end_point = [position_x, tank_size[2]]
 
         result = interpolate_line(start_point, end_point, n_interpolation_points,
-                                  semi, fluid_system, sol)
-        @test isapprocx(plane.computed_density,
+                                  semi_fullgrid, fluid_system, sol)
+        @test isapprox(plane.computed_density,
                         [NaN,
                             1051.550381846236,
                             1058.840859422405,
@@ -436,7 +436,7 @@ end
                             693.8269492612843,
                             NaN])
 
-        @test isapporx(result.density,
+        @test isapprox(result.density,
                        [NaN,
                            1066.8978311276544,
                            1058.4834534513207,
@@ -448,7 +448,7 @@ end
                            1003.3921419243012,
                            NaN])
 
-        @test isapporx(result.pressure,
+        @test isapprox(result.pressure,
                        [NaN,
                            8198.684648008579,
                            6983.142465751377,

From 746b23840bf9a16bd679fcc97b37a455d2596bf3 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Tue, 3 Jun 2025 19:40:53 +0200
Subject: [PATCH 12/17] apply formatter

---
 test/examples/gpu.jl | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/test/examples/gpu.jl b/test/examples/gpu.jl
index 66f0abba4b..060e61c520 100644
--- a/test/examples/gpu.jl
+++ b/test/examples/gpu.jl
@@ -424,17 +424,18 @@ end
 
         result = interpolate_line(start_point, end_point, n_interpolation_points,
                                   semi_fullgrid, fluid_system, sol)
+
         @test isapprox(plane.computed_density,
-                        [NaN,
-                            1051.550381846236,
-                            1058.840859422405,
-                            1054.6636640426582,
-                            1040.437243621303,
-                            1031.95170017119,
-                            1023.5350903161437,
-                            1009.1916142262469,
-                            693.8269492612843,
-                            NaN])
+                       [NaN,
+                           1051.550381846236,
+                           1058.840859422405,
+                           1054.6636640426582,
+                           1040.437243621303,
+                           1031.95170017119,
+                           1023.5350903161437,
+                           1009.1916142262469,
+                           693.8269492612843,
+                           NaN])
 
         @test isapprox(result.density,
                        [NaN,

From 9094341d4759b89f1bd9e2a56c47f26f71c3ddde Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Tue, 3 Jun 2025 20:21:04 +0200
Subject: [PATCH 13/17] fix

---
 test/examples/gpu.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/examples/gpu.jl b/test/examples/gpu.jl
index 060e61c520..9515c0a6a6 100644
--- a/test/examples/gpu.jl
+++ b/test/examples/gpu.jl
@@ -423,7 +423,7 @@ end
         end_point = [position_x, tank_size[2]]
 
         result = interpolate_line(start_point, end_point, n_interpolation_points,
-                                  semi_fullgrid, fluid_system, sol)
+                                  semi_fullgrid, semi_fullgrid.systems[1], sol)
 
         @test isapprox(plane.computed_density,
                        [NaN,

From 83727badd1000c3e119826e808425008a63b1a76 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Wed, 4 Jun 2025 10:58:00 +0200
Subject: [PATCH 14/17] fix tests

---
 src/general/interpolation.jl |  8 +++-
 test/examples/gpu.jl         | 77 ++++++++++++++++++------------------
 2 files changed, 45 insertions(+), 40 deletions(-)

diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl
index ea8953130d..395dd4cac6 100644
--- a/src/general/interpolation.jl
+++ b/src/general/interpolation.jl
@@ -393,7 +393,13 @@ function interpolate_line(start, end_, n_points, semi, ref_system, v_ode, u_ode;
     # Convert to coordinate matrix
     points_coords_ = collect(reinterpret(reshape, eltype(start_svector), points_coords))
 
-    return interpolate_points(points_coords_, semi, ref_system, v_ode, u_ode;
+    if semi.parallelization_backend isa KernelAbstractions.Backend
+        points_coords_new = Adapt.adapt(semi.parallelization_backend, points_coords_)
+    else
+        points_coords_new = points_coords_
+    end
+
+    return interpolate_points(points_coords_new, semi, ref_system, v_ode, u_ode;
                               smoothing_length=smoothing_length,
                               cut_off_bnd=cut_off_bnd, clip_negative_pressure)
 end
diff --git a/test/examples/gpu.jl b/test/examples/gpu.jl
index 9515c0a6a6..79f6e3fed8 100644
--- a/test/examples/gpu.jl
+++ b/test/examples/gpu.jl
@@ -404,8 +404,7 @@ end
         # Neighborhood search with `FullGridCellList` for GPU compatibility
         min_corner = minimum(tank.boundary.coordinates, dims=2)
         max_corner = maximum(tank.boundary.coordinates, dims=2)
-        cell_list = FullGridCellList(; min_corner, max_corner,
-                                     max_points_per_cell=500)
+        cell_list = FullGridCellList(; min_corner, max_corner, max_points_per_cell=500)
         semi_fullgrid = Semidiscretization(fluid_system, boundary_system,
                                            neighborhood_search=GridNeighborhoodSearch{2}(;
                                                                                          cell_list),
@@ -416,6 +415,9 @@ end
                                                "fluid", "hydrostatic_water_column_2d.jl");
                                       semi=semi_fullgrid, tspan=(0.0f0, 0.1f0))
 
+        semi_new = TrixiParticles.Adapt.adapt(semi_fullgrid.parallelization_backend,
+                                              sol.prob.p)
+
         # Interpolation parameters
         position_x = tank_size[1] / 2
         n_interpolation_points = 10
@@ -423,42 +425,39 @@ end
         end_point = [position_x, tank_size[2]]
 
         result = interpolate_line(start_point, end_point, n_interpolation_points,
-                                  semi_fullgrid, semi_fullgrid.systems[1], sol)
-
-        @test isapprox(plane.computed_density,
-                       [NaN,
-                           1051.550381846236,
-                           1058.840859422405,
-                           1054.6636640426582,
-                           1040.437243621303,
-                           1031.95170017119,
-                           1023.5350903161437,
-                           1009.1916142262469,
-                           693.8269492612843,
-                           NaN])
-
-        @test isapprox(result.density,
-                       [NaN,
-                           1066.8978311276544,
-                           1058.4834534513207,
-                           1049.7765132420302,
-                           1040.2795841900875,
-                           1030.331702082792,
-                           1020.2045843019229,
-                           1010.3658109398008,
-                           1003.3921419243012,
-                           NaN])
-
-        @test isapprox(result.pressure,
-                       [NaN,
-                           8198.684648008579,
-                           6983.142465751377,
-                           5788.908403992583,
-                           4551.910032518114,
-                           3327.0608294382264,
-                           2149.986239483061,
-                           1071.880176298758,
-                           342.81444988598906,
-                           NaN])
+                                  semi_new, semi_new.systems[1], sol; cut_off_bnd=false)
+
+        @test isapprox(result.computed_density[1:(end - 1)], # Exclude last NaN
+                       Float32[62.50176,
+                               1053.805,
+                               1061.2959,
+                               1055.8348,
+                               1043.9069,
+                               1038.2051,
+                               1033.1708,
+                               1014.2249,
+                               672.61566])
+
+        @test isapprox(result.density[1:(end - 1)], # Exclude last NaN
+                       Float32[1078.3738,
+                               1070.8535,
+                               1061.2003,
+                               1052.4126,
+                               1044.5074,
+                               1037.0444,
+                               1028.4813,
+                               1014.7941,
+                               1003.6117])
+
+        @test isapprox(result.pressure[1:(end - 1)], # Exclude last NaN
+                       Float32[9940.595,
+                               8791.842,
+                               7368.837,
+                               6143.6562,
+                               5093.711,
+                               4143.313,
+                               3106.1575,
+                               1552.1078,
+                               366.71414])
     end
 end

From 82b6b461ac1593367ee19c85d8da46a46b4315f8 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Wed, 4 Jun 2025 22:31:16 +0200
Subject: [PATCH 15/17] add plane interpolation

---
 src/general/interpolation.jl | 25 +++++-----
 test/examples/gpu.jl         | 90 ++++++++++++++++++------------------
 2 files changed, 59 insertions(+), 56 deletions(-)

diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl
index 395dd4cac6..e1e44c2758 100644
--- a/src/general/interpolation.jl
+++ b/src/general/interpolation.jl
@@ -393,13 +393,7 @@ function interpolate_line(start, end_, n_points, semi, ref_system, v_ode, u_ode;
     # Convert to coordinate matrix
     points_coords_ = collect(reinterpret(reshape, eltype(start_svector), points_coords))
 
-    if semi.parallelization_backend isa KernelAbstractions.Backend
-        points_coords_new = Adapt.adapt(semi.parallelization_backend, points_coords_)
-    else
-        points_coords_new = points_coords_
-    end
-
-    return interpolate_points(points_coords_new, semi, ref_system, v_ode, u_ode;
+    return interpolate_points(points_coords_, semi, ref_system, v_ode, u_ode;
                               smoothing_length=smoothing_length,
                               cut_off_bnd=cut_off_bnd, clip_negative_pressure)
 end
@@ -510,12 +504,18 @@ function process_neighborhood_searches(semi, u_ode, ref_system, smoothing_length
 end
 
 # Interpolate points with given neighborhood searches
-@inline function interpolate_points(point_coords, semi, ref_system, v_ode, u_ode,
+@inline function interpolate_points(point_coords_, semi, ref_system, v_ode, u_ode,
                                     neighborhood_searches;
                                     smoothing_length=initial_smoothing_length(ref_system),
                                     cut_off_bnd=true, clip_negative_pressure=false)
     (; parallelization_backend) = semi
 
+    if semi.parallelization_backend isa KernelAbstractions.Backend
+        point_coords = Adapt.adapt(semi.parallelization_backend, point_coords_)
+    else
+        point_coords = point_coords_
+    end
+
     n_points = size(point_coords, 2)
     ELTYPE = eltype(point_coords)
     computed_density = allocate(semi.parallelization_backend, ELTYPE, n_points)
@@ -576,11 +576,13 @@ end
             computed_density[point] = NaN
             neighbor_count[point] = 0
 
+            # We need to convert the `NamedTuple` to a `Tuple` for GPU compatibility
             foreach(Tuple(cache)) do field
                 set_nan!(field, point)
             end
         else
-            # Normalize all quantities by the shepard coefficient
+            # Normalize all quantities by the shepard coefficient.
+            # We need to convert the `NamedTuple` to a `Tuple` for GPU compatibility.
             foreach(Tuple(cache)) do field
                 divide_by_shepard_coefficient!(field, shepard_coefficient, point)
             end
@@ -671,7 +673,7 @@ function set_nan!(field::AbstractVector, point)
     return field
 end
 
-function set_nan!(field::AbstractArray{T, 2}, point) where {T}
+function set_nan!(field::AbstractMatrix, point)
     for dim in axes(field, 1)
         field[dim, point] = NaN
     end
@@ -693,8 +695,7 @@ function divide_by_shepard_coefficient!(field::AbstractVector, shepard_coefficie
     return field
 end
 
-function divide_by_shepard_coefficient!(field::AbstractArray{T, 2}, shepard_coefficient,
-                                        point) where {T}
+function divide_by_shepard_coefficient!(field::AbstractMatrix, shepard_coefficient, point)
     for dim in axes(field, 1)
         field[dim, point] /= shepard_coefficient[point]
     end
diff --git a/test/examples/gpu.jl b/test/examples/gpu.jl
index 79f6e3fed8..1db23eaf9a 100644
--- a/test/examples/gpu.jl
+++ b/test/examples/gpu.jl
@@ -415,49 +415,51 @@ end
                                                "fluid", "hydrostatic_water_column_2d.jl");
                                       semi=semi_fullgrid, tspan=(0.0f0, 0.1f0))
 
-        semi_new = TrixiParticles.Adapt.adapt(semi_fullgrid.parallelization_backend,
-                                              sol.prob.p)
-
-        # Interpolation parameters
-        position_x = tank_size[1] / 2
-        n_interpolation_points = 10
-        start_point = [position_x, -fluid_particle_spacing]
-        end_point = [position_x, tank_size[2]]
-
-        result = interpolate_line(start_point, end_point, n_interpolation_points,
-                                  semi_new, semi_new.systems[1], sol; cut_off_bnd=false)
-
-        @test isapprox(result.computed_density[1:(end - 1)], # Exclude last NaN
-                       Float32[62.50176,
-                               1053.805,
-                               1061.2959,
-                               1055.8348,
-                               1043.9069,
-                               1038.2051,
-                               1033.1708,
-                               1014.2249,
-                               672.61566])
-
-        @test isapprox(result.density[1:(end - 1)], # Exclude last NaN
-                       Float32[1078.3738,
-                               1070.8535,
-                               1061.2003,
-                               1052.4126,
-                               1044.5074,
-                               1037.0444,
-                               1028.4813,
-                               1014.7941,
-                               1003.6117])
-
-        @test isapprox(result.pressure[1:(end - 1)], # Exclude last NaN
-                       Float32[9940.595,
-                               8791.842,
-                               7368.837,
-                               6143.6562,
-                               5093.711,
-                               4143.313,
-                               3106.1575,
-                               1552.1078,
-                               366.71414])
+        semi_new = sol.prob.p
+
+        @testset verbose=true "Line" begin
+            # Interpolation parameters
+            position_x = tank_size[1] / 2
+            n_interpolation_points = 10
+            start_point = [position_x, -fluid_particle_spacing]
+            end_point = [position_x, tank_size[2]]
+
+            result = interpolate_line(start_point, end_point, n_interpolation_points,
+                                      semi_new, semi_new.systems[1], sol; cut_off_bnd=false)
+
+            @test isapprox(result.computed_density[1:(end - 1)], # Exclude last NaN
+                           Float32[62.50176, 1053.805, 1061.2959, 1055.8348, 1043.9069,
+                                   1038.2051, 1033.1708, 1014.2249, 672.61566])
+
+            @test isapprox(result.density[1:(end - 1)], # Exclude last NaN
+                           Float32[1078.3738, 1070.8535, 1061.2003, 1052.4126, 1044.5074,
+                                   1037.0444, 1028.4813, 1014.7941, 1003.6117])
+
+            @test isapprox(result.pressure[1:(end - 1)], # Exclude last NaN
+                           Float32[9940.595, 8791.842, 7368.837, 6143.6562, 5093.711,
+                                   4143.313, 3106.1575, 1552.1078, 366.71414])
+        end
+
+        @testset verbose=true "Plane" begin
+            interpolation_start = [0.0, 0.0]
+            interpolation_end = [1.0, 1.0]
+            resolution = 0.4f0
+
+            result = interpolate_plane_2d(interpolation_start, interpolation_end,
+                                          resolution, semi_new, semi_new.systems[1], sol;
+                                          cut_off_bnd=false)
+
+            @test isapprox(result.computed_density,
+                           Float32[282.85278, 554.32996, 551.2026, 529.11, 1049.9423,
+                                   1049.9451, 504.8472, 1014.6541, 1013.62933])
+
+            @test isapprox(result.density,
+                           Float32[1074.302, 1077.213, 1073.9844, 1044.9037, 1046.0576,
+                                   1044.6786, 1008.5474, 1010.59863, 1009.51215])
+
+            @test isapprox(result.pressure,
+                           Float32[9310.613, 9762.787, 9262.167, 5144.3604, 5295.3286,
+                                   5115.5, 880.0212, 1098.6537, 982.35785])
+        end
     end
 end

From fab56c31c7c2677e87e33ba11faff89d5a0c8df6 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Thu, 5 Jun 2025 08:17:19 +0200
Subject: [PATCH 16/17] use dam break for solution

---
 test/examples/gpu.jl | 118 ++++++++++++++++++++-----------------------
 1 file changed, 56 insertions(+), 62 deletions(-)

diff --git a/test/examples/gpu.jl b/test/examples/gpu.jl
index 1db23eaf9a..03fb2377d2 100644
--- a/test/examples/gpu.jl
+++ b/test/examples/gpu.jl
@@ -391,75 +391,69 @@ end
             @test backend == Main.parallelization_backend
         end
     end
-end
 
-@testset verbose=true "Postprocessing $TRIXIPARTICLES_TEST_" begin
-    @testset verbose=true "Interpolation" begin
-        # Import variables into scope
-        trixi_include_changeprecision(Float32, @__MODULE__,
-                                      joinpath(examples_dir(), "fluid",
-                                               "hydrostatic_water_column_2d.jl"),
-                                      sol=nothing, ode=nothing)
-
-        # Neighborhood search with `FullGridCellList` for GPU compatibility
-        min_corner = minimum(tank.boundary.coordinates, dims=2)
-        max_corner = maximum(tank.boundary.coordinates, dims=2)
-        cell_list = FullGridCellList(; min_corner, max_corner, max_points_per_cell=500)
-        semi_fullgrid = Semidiscretization(fluid_system, boundary_system,
-                                           neighborhood_search=GridNeighborhoodSearch{2}(;
-                                                                                         cell_list),
-                                           parallelization_backend=Main.parallelization_backend)
-
-        trixi_include_changeprecision(Float32, @__MODULE__,
-                                      joinpath(examples_dir(),
-                                               "fluid", "hydrostatic_water_column_2d.jl");
-                                      semi=semi_fullgrid, tspan=(0.0f0, 0.1f0))
-
-        semi_new = sol.prob.p
-
-        @testset verbose=true "Line" begin
-            # Interpolation parameters
-            position_x = tank_size[1] / 2
-            n_interpolation_points = 10
-            start_point = [position_x, -fluid_particle_spacing]
-            end_point = [position_x, tank_size[2]]
-
-            result = interpolate_line(start_point, end_point, n_interpolation_points,
-                                      semi_new, semi_new.systems[1], sol; cut_off_bnd=false)
-
-            @test isapprox(result.computed_density[1:(end - 1)], # Exclude last NaN
-                           Float32[62.50176, 1053.805, 1061.2959, 1055.8348, 1043.9069,
-                                   1038.2051, 1033.1708, 1014.2249, 672.61566])
-
-            @test isapprox(result.density[1:(end - 1)], # Exclude last NaN
-                           Float32[1078.3738, 1070.8535, 1061.2003, 1052.4126, 1044.5074,
-                                   1037.0444, 1028.4813, 1014.7941, 1003.6117])
-
-            @test isapprox(result.pressure[1:(end - 1)], # Exclude last NaN
-                           Float32[9940.595, 8791.842, 7368.837, 6143.6562, 5093.711,
-                                   4143.313, 3106.1575, 1552.1078, 366.71414])
-        end
+    @testset verbose=true "Postprocessing $TRIXIPARTICLES_TEST_" begin
+        @testset verbose=true "Interpolation" begin
+            # Run the dam break example to get a solution
+            trixi_include_changeprecision(Float32, @__MODULE__,
+                                          joinpath(examples_dir(), "fluid",
+                                                   "dam_break_2d_gpu.jl");
+                                          fluid_particle_spacing=0.05f0,
+                                          tspan=(0.0f0, 0.01f0),
+                                          parallelization_backend=Main.parallelization_backend)
 
-        @testset verbose=true "Plane" begin
-            interpolation_start = [0.0, 0.0]
-            interpolation_end = [1.0, 1.0]
-            resolution = 0.4f0
+            semi_new = sol.prob.p
 
-            result = interpolate_plane_2d(interpolation_start, interpolation_end,
-                                          resolution, semi_new, semi_new.systems[1], sol;
+            @testset verbose=true "Line" begin
+                # Interpolation parameters
+                n_interpolation_points = 10
+                start_point = Float32[0.5, 0.0]
+                end_point = Float32[0.5, 0.5]
+
+                result = interpolate_line(start_point, end_point, n_interpolation_points,
+                                          semi_new, semi_new.systems[1], sol;
                                           cut_off_bnd=false)
 
-            @test isapprox(result.computed_density,
-                           Float32[282.85278, 554.32996, 551.2026, 529.11, 1049.9423,
-                                   1049.9451, 504.8472, 1014.6541, 1013.62933])
+                @test isapprox(result.computed_density,
+                               Float32[500.33255, 893.09766, 997.7032, 1001.14355, 1001.234,
+                                       1001.0098, 1000.4352, 999.7572, 999.1139, 989.6319])
+
+                @test isapprox(result.density,
+                               Float32[1002.3152, 1002.19653, 1001.99915, 1001.7685,
+                                       1001.5382,
+                                       1001.3093, 1001.0836, 1000.8649, 1000.635,
+                                       1000.4053])
 
-            @test isapprox(result.density,
-                           Float32[1074.302, 1077.213, 1073.9844, 1044.9037, 1046.0576,
-                                   1044.6786, 1008.5474, 1010.59863, 1009.51215])
+                @test isapprox(result.pressure,
+                               Float32[5450.902, 5171.2856, 4706.551, 4163.9185, 3621.5042,
+                                       3082.6948, 2551.5725, 2036.1208, 1494.8608,
+                                       954.14355])
+            end
 
-            @test isapprox(result.pressure,
-                           Float32[9310.613, 9762.787, 9262.167, 5144.3604, 5295.3286,
-                                   5115.5, 880.0212, 1098.6537, 982.35785])
+            @testset verbose=true "Plane" begin
+                interpolation_start = Float32[0.0, 0.0]
+                interpolation_end = Float32[1.0, 1.0]
+                resolution = 0.4f0
+
+                result = interpolate_plane_2d(interpolation_start, interpolation_end,
+                                              resolution, semi_new, semi_new.systems[1],
+                                              sol;
+                                              cut_off_bnd=false)
+
+                @test isapprox(result.computed_density,
+                               Float32[250.18625, 500.34482, 499.77225, 254.3632, 499.58026,
+                                       999.1413, 998.6351, 503.0122])
+
+                @test isapprox(result.density,
+                               Float32[1002.34467, 1002.3365, 1001.5021, 999.7109,
+                                       1000.84863,
+                                       1000.8373, 1000.3423, 1000.20734])
+
+                @test isapprox(result.pressure,
+                               Float32[5520.0513, 5501.1846, 3536.2256, -680.5194,
+                                       1997.7814,
+                                       1971.0717, 805.8584, 488.4068])
+            end
         end
     end
 end

From cc333b4f537565079e3874f791aa9e488bf21e5b Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Thu, 5 Jun 2025 08:23:44 +0200
Subject: [PATCH 17/17] revert CPU transfer

---
 src/general/interpolation.jl |  6 ++----
 test/examples/gpu.jl         | 12 ++++++------
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl
index e1e44c2758..9b0ac40d7f 100644
--- a/src/general/interpolation.jl
+++ b/src/general/interpolation.jl
@@ -589,10 +589,8 @@ end
         end
     end
 
-    cache_ = map(x -> Array(x), cache)
-
-    return (; computed_density=Array(computed_density), point_coords=Array(point_coords),
-            neighbor_count=Array(neighbor_count), cache_...)
+    return (; computed_density=computed_density, point_coords=point_coords,
+            neighbor_count=neighbor_count, cache...)
 end
 
 @inline function create_cache_interpolation(ref_system::FluidSystem, n_points, semi)
diff --git a/test/examples/gpu.jl b/test/examples/gpu.jl
index 03fb2377d2..9d551a5273 100644
--- a/test/examples/gpu.jl
+++ b/test/examples/gpu.jl
@@ -414,17 +414,17 @@ end
                                           semi_new, semi_new.systems[1], sol;
                                           cut_off_bnd=false)
 
-                @test isapprox(result.computed_density,
+                @test isapprox(Array(result.computed_density),
                                Float32[500.33255, 893.09766, 997.7032, 1001.14355, 1001.234,
                                        1001.0098, 1000.4352, 999.7572, 999.1139, 989.6319])
 
-                @test isapprox(result.density,
+                @test isapprox(Array(result.density),
                                Float32[1002.3152, 1002.19653, 1001.99915, 1001.7685,
                                        1001.5382,
                                        1001.3093, 1001.0836, 1000.8649, 1000.635,
                                        1000.4053])
 
-                @test isapprox(result.pressure,
+                @test isapprox(Array(result.pressure),
                                Float32[5450.902, 5171.2856, 4706.551, 4163.9185, 3621.5042,
                                        3082.6948, 2551.5725, 2036.1208, 1494.8608,
                                        954.14355])
@@ -440,16 +440,16 @@ end
                                               sol;
                                               cut_off_bnd=false)
 
-                @test isapprox(result.computed_density,
+                @test isapprox(Array(result.computed_density),
                                Float32[250.18625, 500.34482, 499.77225, 254.3632, 499.58026,
                                        999.1413, 998.6351, 503.0122])
 
-                @test isapprox(result.density,
+                @test isapprox(Array(result.density),
                                Float32[1002.34467, 1002.3365, 1001.5021, 999.7109,
                                        1000.84863,
                                        1000.8373, 1000.3423, 1000.20734])
 
-                @test isapprox(result.pressure,
+                @test isapprox(Array(result.pressure),
                                Float32[5520.0513, 5501.1846, 3536.2256, -680.5194,
                                        1997.7814,
                                        1971.0717, 805.8584, 488.4068])