From bb14bb4343a0dff516d0bb1f31da27ef6db867a0 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Wed, 16 Apr 2025 15:48:06 +0200
Subject: [PATCH 01/33] fix

---
 src/general/semidiscretization.jl            | 19 +++++++++++--------
 src/schemes/boundary/open_boundary/system.jl | 18 ++++++++----------
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/src/general/semidiscretization.jl b/src/general/semidiscretization.jl
index c0a70da3f3..280cd21998 100644
--- a/src/general/semidiscretization.jl
+++ b/src/general/semidiscretization.jl
@@ -367,7 +367,8 @@ function initialize_neighborhood_searches!(semi)
         foreach_system(semi) do neighbor
             PointNeighbors.initialize!(get_neighborhood_search(system, neighbor, semi),
                                        initial_coordinates(system),
-                                       initial_coordinates(neighbor))
+                                       initial_coordinates(neighbor),
+                                       eachindex_y=active_particles(neighbor))
         end
     end
 
@@ -656,7 +657,7 @@ function update_nhs!(neighborhood_search,
     update!(neighborhood_search,
             current_coordinates(u_system, system),
             current_coordinates(u_neighbor, neighbor),
-            semi, points_moving=(true, true))
+            semi, points_moving=(true, true), eachindex_y=active_particles(neighbor))
 end
 
 function update_nhs!(neighborhood_search,
@@ -679,7 +680,7 @@ function update_nhs!(neighborhood_search,
     update!(neighborhood_search,
             current_coordinates(u_system, system),
             current_coordinates(u_neighbor, neighbor),
-            semi, points_moving=(true, true))
+            semi, points_moving=(true, true), eachindex_y=active_particles(neighbor))
 end
 
 function update_nhs!(neighborhood_search,
@@ -692,7 +693,7 @@ function update_nhs!(neighborhood_search,
     update!(neighborhood_search,
             current_coordinates(u_system, system),
             current_coordinates(u_neighbor, neighbor),
-            semi, points_moving=(true, true))
+            semi, points_moving=(true, true), eachindex_y=active_particles(neighbor))
 end
 
 function update_nhs!(neighborhood_search,
@@ -716,7 +717,7 @@ function update_nhs!(neighborhood_search,
     update!(neighborhood_search,
             current_coordinates(u_system, system),
             current_coordinates(u_neighbor, neighbor),
-            semi, points_moving=(true, true))
+            semi, points_moving=(true, true), eachindex_y=active_particles(neighbor))
 end
 
 function update_nhs!(neighborhood_search,
@@ -751,7 +752,8 @@ function update_nhs!(neighborhood_search,
     update!(neighborhood_search,
             current_coordinates(u_system, system),
             current_coordinates(u_neighbor, neighbor),
-            semi, points_moving=(system.ismoving[], true))
+            semi, points_moving=(system.ismoving[], true),
+            eachindex_y=active_particles(neighbor))
 end
 
 # This function is the same as the one above to avoid ambiguous dispatch when using `Union`
@@ -828,8 +830,9 @@ function update_nhs!(neighborhood_search,
 end
 
 # Forward to PointNeighbors.jl
-function update!(neighborhood_search, x, y, semi; points_moving=(true, false))
-    PointNeighbors.update!(neighborhood_search, x, y; points_moving,
+function update!(neighborhood_search, x, y, semi; points_moving=(true, false),
+                 eachindex_y=axes(y, 2))
+    PointNeighbors.update!(neighborhood_search, x, y; points_moving, eachindex_y,
                            parallelization_backend=semi.parallelization_backend)
 end
 
diff --git a/src/schemes/boundary/open_boundary/system.jl b/src/schemes/boundary/open_boundary/system.jl
index d1d1e305f8..9eb3895a28 100644
--- a/src/schemes/boundary/open_boundary/system.jl
+++ b/src/schemes/boundary/open_boundary/system.jl
@@ -257,8 +257,6 @@ function check_domain!(system, v, u, v_ode, u_ode, semi)
     u_fluid = wrap_u(u_ode, fluid_system, semi)
     v_fluid = wrap_v(v_ode, fluid_system, semi)
 
-    neighborhood_search = get_neighborhood_search(system, fluid_system, semi)
-
     for particle in each_moving_particle(system)
         particle_coords = current_coords(u, system, particle)
 
@@ -267,16 +265,16 @@ function check_domain!(system, v, u, v_ode, u_ode, semi)
             convert_particle!(system, fluid_system, boundary_zone, particle,
                               v, u, v_fluid, u_fluid)
         end
+    end
 
-        # Check the neighboring fluid particles whether they're entering the boundary zone
-        for neighbor in PointNeighbors.eachneighbor(particle_coords, neighborhood_search)
-            fluid_coords = current_coords(u_fluid, fluid_system, neighbor)
+    # Check the fluid particles whether they're entering the boundary zone
+    for fluid_particle in each_moving_particle(fluid_system)
+        fluid_coords = current_coords(u_fluid, fluid_system, fluid_particle)
 
-            # Check if neighboring fluid particle is in boundary zone
-            if is_in_boundary_zone(boundary_zone, fluid_coords)
-                convert_particle!(fluid_system, system, boundary_zone, neighbor,
-                                  v, u, v_fluid, u_fluid)
-            end
+        # Check if fluid particle is in boundary zone
+        if is_in_boundary_zone(boundary_zone, fluid_coords)
+            convert_particle!(fluid_system, system, boundary_zone, fluid_particle,
+                              v, u, v_fluid, u_fluid)
         end
     end
 

From ead8aa2c017a4411fe6e577ca9a38ff20710f360 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Wed, 16 Apr 2025 15:54:48 +0200
Subject: [PATCH 02/33] threaded loop

---
 src/general/buffer.jl                        | 26 +++++++++++++-------
 src/schemes/boundary/open_boundary/system.jl |  5 ++--
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/src/general/buffer.jl b/src/general/buffer.jl
index 409a1c32d6..25c1c1fea0 100644
--- a/src/general/buffer.jl
+++ b/src/general/buffer.jl
@@ -62,27 +62,35 @@ end
 @inline function activate_next_particle(system)
     (; active_particle) = system.buffer
 
-    next_particle = findfirst(x -> !x, active_particle)
-
-    if isnothing(next_particle)
-        error("0 out of $(system.buffer.buffer_size) buffer particles available")
+    for particle in eachindex(active_particle)
+        if !active_particle[particle]
+            # Activate this particle. The return value is the old value.
+            # If this is `true`, the particle was active before and we need to continue.
+            # This happens because a particle might have been activated by another thread
+            # between the condition and the line below.
+            was_active = PointNeighbors.Atomix.@atomicswap active_particle[particle] = true
+
+            !was_active && return particle
+        end
     end
 
-    active_particle[next_particle] = true
-
-    return next_particle
+    error("0 out of $(system.buffer.buffer_size) buffer particles available")
 end
 
 @inline function deactivate_particle!(system, particle, u)
     (; active_particle) = system.buffer
 
-    active_particle[particle] = false
-
     # Set particle far away from simulation domain
     for dim in 1:ndims(system)
         # Inf or NaN causes instability outcome.
         u[dim, particle] = 1e16
     end
 
+    # To ensure thread safety, the buffer particle is only released for reuse
+    # after the write operation (`u`) has been completed.
+    # This guarantees that no other thread can access the active particle prematurely,
+    # avoiding race conditions.
+    active_particle[particle] = false
+
     return system
 end
diff --git a/src/schemes/boundary/open_boundary/system.jl b/src/schemes/boundary/open_boundary/system.jl
index 9eb3895a28..823c013aab 100644
--- a/src/schemes/boundary/open_boundary/system.jl
+++ b/src/schemes/boundary/open_boundary/system.jl
@@ -257,7 +257,8 @@ function check_domain!(system, v, u, v_ode, u_ode, semi)
     u_fluid = wrap_u(u_ode, fluid_system, semi)
     v_fluid = wrap_v(v_ode, fluid_system, semi)
 
-    for particle in each_moving_particle(system)
+    # Check the boundary particles whether they're leaving the boundary zone
+    @threaded semi for particle in each_moving_particle(system)
         particle_coords = current_coords(u, system, particle)
 
         # Check if boundary particle is outside the boundary zone
@@ -268,7 +269,7 @@ function check_domain!(system, v, u, v_ode, u_ode, semi)
     end
 
     # Check the fluid particles whether they're entering the boundary zone
-    for fluid_particle in each_moving_particle(fluid_system)
+    @threaded semi for fluid_particle in each_moving_particle(fluid_system)
         fluid_coords = current_coords(u_fluid, fluid_system, fluid_particle)
 
         # Check if fluid particle is in boundary zone

From 1e8fe3d688524b88048fd8b24c78bc746abcaabd Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Wed, 16 Apr 2025 17:13:34 +0200
Subject: [PATCH 03/33] revise mirroring

---
 src/general/semidiscretization.jl             | 24 +++++---
 .../boundary/open_boundary/mirroring.jl       | 56 +++++++++----------
 2 files changed, 41 insertions(+), 39 deletions(-)

diff --git a/src/general/semidiscretization.jl b/src/general/semidiscretization.jl
index 280cd21998..69db9a0e1e 100644
--- a/src/general/semidiscretization.jl
+++ b/src/general/semidiscretization.jl
@@ -74,7 +74,7 @@ function Semidiscretization(systems...;
 
     # Check e.g. that the boundary systems are using a state equation if EDAC is not used.
     # Other checks might be added here later.
-    check_configuration(systems)
+    check_configuration(systems, neighborhood_search)
 
     sizes_u = [u_nvariables(system) * n_moving_particles(system)
                for system in systems]
@@ -836,15 +836,15 @@ function update!(neighborhood_search, x, y, semi; points_moving=(true, false),
                            parallelization_backend=semi.parallelization_backend)
 end
 
-function check_configuration(systems)
+function check_configuration(systems, neighborhood_search)
     foreach_system(systems) do system
-        check_configuration(system, systems)
+        check_configuration(system, systems, neighborhood_search)
     end
 
     check_system_color(systems)
 end
 
-check_configuration(system, systems) = nothing
+check_configuration(system, systems, neighborhood_search) = nothing
 
 function check_system_color(systems)
     if any(system isa FluidSystem && !(system isa ParticlePackingSystem) &&
@@ -861,7 +861,7 @@ function check_system_color(systems)
     end
 end
 
-function check_configuration(fluid_system::FluidSystem, systems)
+function check_configuration(fluid_system::FluidSystem, systems, neighborhood_search)
     if !(fluid_system isa ParticlePackingSystem) && !isnothing(fluid_system.surface_tension)
         foreach_system(systems) do neighbor
             if neighbor isa FluidSystem && isnothing(fluid_system.surface_tension) &&
@@ -872,8 +872,8 @@ function check_configuration(fluid_system::FluidSystem, systems)
     end
 end
 
-function check_configuration(boundary_system::BoundarySPHSystem, systems)
-    (; boundary_model) = boundary_system
+function check_configuration(system::BoundarySPHSystem, systems, neighborhood_search)
+    (; boundary_model) = system
 
     foreach_system(systems) do neighbor
         if neighbor isa WeaklyCompressibleSPHSystem &&
@@ -885,7 +885,7 @@ function check_configuration(boundary_system::BoundarySPHSystem, systems)
     end
 end
 
-function check_configuration(system::TotalLagrangianSPHSystem, systems)
+function check_configuration(system::TotalLagrangianSPHSystem, systems, neighborhood_search)
     (; boundary_model) = system
 
     foreach_system(systems) do neighbor
@@ -902,7 +902,7 @@ function check_configuration(system::TotalLagrangianSPHSystem, systems)
     end
 end
 
-function check_configuration(system::OpenBoundarySPHSystem, systems)
+function check_configuration(system::OpenBoundarySPHSystem, systems, neighborhood_search)
     (; boundary_model, boundary_zone) = system
 
     if boundary_model isa BoundaryModelLastiwka &&
@@ -910,4 +910,10 @@ function check_configuration(system::OpenBoundarySPHSystem, systems)
         throw(ArgumentError("`BoundaryModelLastiwka` needs a specific flow direction. " *
                             "Please specify inflow and outflow."))
     end
+
+    if first(PointNeighbors.requires_update(neighborhood_search))
+        throw(ArgumentError("`OpenBoundarySPHSystem` requires a neighborhood search " *
+                            "that does not need an update for the `x` coordinates (e.g. `GridNeighborhoodSearch`). " *
+                            "See the PointNeighbors.jl documentation for more details."))
+    end
 end
diff --git a/src/schemes/boundary/open_boundary/mirroring.jl b/src/schemes/boundary/open_boundary/mirroring.jl
index 25402437cb..95f79a368f 100644
--- a/src/schemes/boundary/open_boundary/mirroring.jl
+++ b/src/schemes/boundary/open_boundary/mirroring.jl
@@ -36,7 +36,9 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
     two_to_end = SVector{ndims(system)}(2:(ndims(system) + 1))
 
     # Use the fluid-fluid nhs, since the boundary particles are mirrored into the fluid domain
-    neighborhood_search = get_neighborhood_search(fluid_system, fluid_system, semi)
+    nhs = get_neighborhood_search(fluid_system, fluid_system, semi)
+
+    fluid_coords = current_coordinates(u_fluid, fluid_system)
 
     @threaded semi for particle in each_moving_particle(system)
         particle_coords = current_coords(u_open_boundary, system, particle)
@@ -50,41 +52,35 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
         extrapolated_velocity_correction = zero(SMatrix{ndims(system), ndims(system) + 1,
                                                         eltype(system)})
 
-        for neighbor in PointNeighbors.eachneighbor(ghost_node_position,
-                                                    neighborhood_search)
-            neighbor_coords = current_coords(u_fluid, fluid_system, neighbor)
-            pos_diff = ghost_node_position - neighbor_coords
-            distance2 = dot(pos_diff, pos_diff)
-
-            if distance2 <= neighborhood_search.search_radius^2
-                distance = sqrt(distance2)
+        # TODO: Not public API
+        PointNeighbors.foreach_neighbor(fluid_coords, nhs, particle, ghost_node_position,
+                                        nhs.search_radius) do particle, neighbor, pos_diff,
+                                                              distance
+            m_b = hydrodynamic_mass(fluid_system, neighbor)
+            rho_b = current_density(v_fluid, fluid_system, neighbor)
+            pressure_b = current_pressure(v_fluid, fluid_system, neighbor)
+            v_b = current_velocity(v_fluid, fluid_system, neighbor)
 
-                m_b = hydrodynamic_mass(fluid_system, neighbor)
-                rho_b = current_density(v_fluid, fluid_system, neighbor)
-                pressure_b = current_pressure(v_fluid, fluid_system, neighbor)
-                v_b = current_velocity(v_fluid, fluid_system, neighbor)
+            # Project `v_b` on the normal direction of the boundary zone
+            # See https://doi.org/10.1016/j.jcp.2020.110029 Section 3.3.:
+            # "Because ﬂow from the inlet interface occurs perpendicular to the boundary,
+            # only this component of interpolated velocity is kept [...]"
+            v_b = dot(v_b, boundary_zone.plane_normal) * boundary_zone.plane_normal
 
-                # Project `v_b` on the normal direction of the boundary zone
-                # See https://doi.org/10.1016/j.jcp.2020.110029 Section 3.3.:
-                # "Because ﬂow from the inlet interface occurs perpendicular to the boundary,
-                # only this component of interpolated velocity is kept [...]"
-                v_b = dot(v_b, boundary_zone.plane_normal) * boundary_zone.plane_normal
+            kernel_value = smoothing_kernel(fluid_system, distance, particle)
+            grad_kernel = smoothing_kernel_grad(fluid_system, pos_diff, distance,
+                                                particle)
 
-                kernel_value = smoothing_kernel(fluid_system, distance, particle)
-                grad_kernel = smoothing_kernel_grad(fluid_system, pos_diff, distance,
-                                                    particle)
+            L, R = correction_arrays(kernel_value, grad_kernel, pos_diff, rho_b, m_b)
 
-                L, R = correction_arrays(kernel_value, grad_kernel, pos_diff, rho_b, m_b)
+            correction_matrix += L
 
-                correction_matrix += L
-
-                if !prescribed_pressure
-                    extrapolated_pressure_correction += pressure_b * R
-                end
+            if !prescribed_pressure
+                extrapolated_pressure_correction += pressure_b * R
+            end
 
-                if !prescribed_velocity
-                    extrapolated_velocity_correction += v_b * R'
-                end
+            if !prescribed_velocity
+                extrapolated_velocity_correction += v_b * R'
             end
         end
 

From 5ece6cb60142e7b688c3d1ea61f2527fc60c8fcb Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Wed, 16 Apr 2025 17:34:12 +0200
Subject: [PATCH 04/33] adapt example

---
 examples/fluid/pipe_flow_2d.jl | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/examples/fluid/pipe_flow_2d.jl b/examples/fluid/pipe_flow_2d.jl
index 4df1914a25..885087ebd9 100644
--- a/examples/fluid/pipe_flow_2d.jl
+++ b/examples/fluid/pipe_flow_2d.jl
@@ -136,8 +136,14 @@ boundary_system = BoundarySPHSystem(pipe.boundary, boundary_model)
 
 # ==========================================================================================
 # ==== Simulation
+min_corner = minimum(pipe.boundary.coordinates .- particle_spacing, dims=2)
+max_corner = maximum(pipe.boundary.coordinates .+ particle_spacing, dims=2)
+
+nhs = GridNeighborhoodSearch{2}(; cell_list=FullGridCellList(; min_corner, max_corner),
+                                update_strategy=ParallelUpdate())
+
 semi = Semidiscretization(fluid_system, open_boundary_in, open_boundary_out,
-                          boundary_system, parallelization_backend=true)
+                          boundary_system, neighborhood_search=nhs)
 
 ode = semidiscretize(semi, tspan)
 

From 7fea2d7b50b9448b900110645a2adf8da2cbbce7 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Thu, 17 Apr 2025 17:49:21 +0200
Subject: [PATCH 05/33] make gpu compatible

---
 src/general/buffer.jl                        |  5 ++--
 src/general/semidiscretization.jl            | 18 ++++++++------
 src/schemes/boundary/open_boundary/system.jl | 20 +++++----------
 src/setups/sphere_shape.jl                   | 26 ++++++++++----------
 4 files changed, 33 insertions(+), 36 deletions(-)

diff --git a/src/general/buffer.jl b/src/general/buffer.jl
index 25c1c1fea0..6b603d2313 100644
--- a/src/general/buffer.jl
+++ b/src/general/buffer.jl
@@ -18,7 +18,8 @@ function allocate_buffer(initial_condition, buffer::SystemBuffer)
     (; buffer_size) = buffer
 
     # Initialize particles far away from simulation domain
-    coordinates = fill(1e16, ndims(initial_condition), buffer_size)
+    coordinates = fill(eltype(initial_condition)(1e-16), ndims(initial_condition),
+                       buffer_size)
 
     if all(rho -> isapprox(rho, first(initial_condition.density), atol=eps(), rtol=eps()),
            initial_condition.density)
@@ -83,7 +84,7 @@ end
     # Set particle far away from simulation domain
     for dim in 1:ndims(system)
         # Inf or NaN causes instability outcome.
-        u[dim, particle] = 1e16
+        u[dim, particle] = eltype(system)(1e16)
     end
 
     # To ensure thread safety, the buffer particle is only released for reuse
diff --git a/src/general/semidiscretization.jl b/src/general/semidiscretization.jl
index 69db9a0e1e..8d926247f4 100644
--- a/src/general/semidiscretization.jl
+++ b/src/general/semidiscretization.jl
@@ -836,15 +836,15 @@ function update!(neighborhood_search, x, y, semi; points_moving=(true, false),
                            parallelization_backend=semi.parallelization_backend)
 end
 
-function check_configuration(systems, neighborhood_search)
+function check_configuration(systems, nhs::PointNeighbors.AbstractNeighborhoodSearch)
     foreach_system(systems) do system
-        check_configuration(system, systems, neighborhood_search)
+        check_configuration(system, systems, nhs)
     end
 
     check_system_color(systems)
 end
 
-check_configuration(system, systems, neighborhood_search) = nothing
+check_configuration(system, systems, ::PointNeighbors.AbstractNeighborhoodSearch) = nothing
 
 function check_system_color(systems)
     if any(system isa FluidSystem && !(system isa ParticlePackingSystem) &&
@@ -861,7 +861,8 @@ function check_system_color(systems)
     end
 end
 
-function check_configuration(fluid_system::FluidSystem, systems, neighborhood_search)
+function check_configuration(fluid_system::FluidSystem, systems,
+                             ::PointNeighbors.AbstractNeighborhoodSearch)
     if !(fluid_system isa ParticlePackingSystem) && !isnothing(fluid_system.surface_tension)
         foreach_system(systems) do neighbor
             if neighbor isa FluidSystem && isnothing(fluid_system.surface_tension) &&
@@ -872,7 +873,8 @@ function check_configuration(fluid_system::FluidSystem, systems, neighborhood_se
     end
 end
 
-function check_configuration(system::BoundarySPHSystem, systems, neighborhood_search)
+function check_configuration(system::BoundarySPHSystem, systems,
+                             ::PointNeighbors.AbstractNeighborhoodSearch)
     (; boundary_model) = system
 
     foreach_system(systems) do neighbor
@@ -885,7 +887,8 @@ function check_configuration(system::BoundarySPHSystem, systems, neighborhood_se
     end
 end
 
-function check_configuration(system::TotalLagrangianSPHSystem, systems, neighborhood_search)
+function check_configuration(system::TotalLagrangianSPHSystem, systems,
+                             ::PointNeighbors.AbstractNeighborhoodSearch)
     (; boundary_model) = system
 
     foreach_system(systems) do neighbor
@@ -902,7 +905,8 @@ function check_configuration(system::TotalLagrangianSPHSystem, systems, neighbor
     end
 end
 
-function check_configuration(system::OpenBoundarySPHSystem, systems, neighborhood_search)
+function check_configuration(system::OpenBoundarySPHSystem, systems,
+                             neighborhood_search::PointNeighbors.AbstractNeighborhoodSearch)
     (; boundary_model, boundary_zone) = system
 
     if boundary_model isa BoundaryModelLastiwka &&
diff --git a/src/schemes/boundary/open_boundary/system.jl b/src/schemes/boundary/open_boundary/system.jl
index 823c013aab..e210357de1 100644
--- a/src/schemes/boundary/open_boundary/system.jl
+++ b/src/schemes/boundary/open_boundary/system.jl
@@ -367,14 +367,9 @@ end
 end
 
 function write_v0!(v0, system::OpenBoundarySPHSystem)
-    (; initial_condition) = system
-
-    for particle in eachparticle(system)
-        # Write particle velocities
-        for dim in 1:ndims(system)
-            v0[dim, particle] = initial_condition.velocity[dim, particle]
-        end
-    end
+    # This is as fast as a loop with `@inbounds`, but it's GPU-compatible
+    indices = CartesianIndices(system.initial_condition.velocity)
+    copyto!(v0, indices, system.initial_condition.velocity, indices)
 
     return v0
 end
@@ -382,12 +377,9 @@ end
 function write_u0!(u0, system::OpenBoundarySPHSystem)
     (; initial_condition) = system
 
-    for particle in eachparticle(system)
-        # Write particle velocities
-        for dim in 1:ndims(system)
-            u0[dim, particle] = initial_condition.coordinates[dim, particle]
-        end
-    end
+    # This is as fast as a loop with `@inbounds`, but it's GPU-compatible
+    indices = CartesianIndices(initial_condition.coordinates)
+    copyto!(u0, indices, initial_condition.coordinates, indices)
 
     return u0
 end
diff --git a/src/setups/sphere_shape.jl b/src/setups/sphere_shape.jl
index 4889e13464..0233fc8995 100644
--- a/src/setups/sphere_shape.jl
+++ b/src/setups/sphere_shape.jl
@@ -90,7 +90,7 @@ SphereShape(0.1, 0.5, (0.2, 0.4, 0.3), 1000.0, sphere_type=RoundSphere())
 function SphereShape(particle_spacing, radius, center_position, density;
                      sphere_type=VoxelSphere(), n_layers=-1, layer_outwards=false,
                      cutout_min=(0.0, 0.0), cutout_max=(0.0, 0.0), tlsph=false,
-                     velocity=zeros(length(center_position)), mass=nothing, pressure=0.0)
+                     velocity=zeros(length(center_position)), mass=nothing, pressure=0)
     if particle_spacing < eps()
         throw(ArgumentError("`particle_spacing` needs to be positive and larger than $(eps())"))
     end
@@ -177,8 +177,8 @@ function sphere_shape_coords(::VoxelSphere, particle_spacing, radius, center_pos
 
             if !tlsph
                 # Put first layer of particles half a particle spacing outside of `radius`
-                inner_radius += 0.5particle_spacing
-                outer_radius += 0.5particle_spacing
+                inner_radius += particle_spacing / 2
+                outer_radius += particle_spacing / 2
             end
         else
             inner_radius = radius - n_layers * particle_spacing
@@ -186,17 +186,17 @@ function sphere_shape_coords(::VoxelSphere, particle_spacing, radius, center_pos
 
             if !tlsph
                 # Put first layer of particles half a particle spacing inside of `radius`
-                inner_radius -= 0.5particle_spacing
-                outer_radius -= 0.5particle_spacing
+                inner_radius -= particle_spacing / 2
+                outer_radius -= particle_spacing / 2
             end
         end
     else
         outer_radius = radius
-        inner_radius = -1.0
+        inner_radius = -1
 
         if !tlsph
             # Put first layer of particles half a particle spacing inside of `radius`
-            outer_radius -= 0.5particle_spacing
+            outer_radius -= particle_spacing / 2
         end
     end
 
@@ -235,12 +235,12 @@ function sphere_shape_coords(sphere::RoundSphere, particle_spacing, radius, cent
 
         if !tlsph
             # Put first layer of particles half a particle spacing outside of inner radius
-            inner_radius += 0.5particle_spacing
+            inner_radius += particle_spacing / 2
         end
     else
         if tlsph
             # Just create a sphere that is 0.5 particle spacing larger
-            radius += 0.5particle_spacing
+            radius += particle_spacing / 2
         end
 
         # Each layer has thickness `particle_spacing`
@@ -252,15 +252,15 @@ function sphere_shape_coords(sphere::RoundSphere, particle_spacing, radius, cent
         end
 
         # Same as above, which puts the inner radius between 0 and `particle_spacing`
-        inner_radius = max(0.0, radius - n_layers * particle_spacing + 0.5particle_spacing)
+        inner_radius = max(0, radius - n_layers * particle_spacing + particle_spacing / 2)
     end
 
-    coords = zeros(length(center), 0)
+    coords = zeros(eltype(particle_spacing), length(center), 0)
 
     for layer in 0:(n_layers - 1)
         sphere_coords = round_sphere(sphere, particle_spacing,
                                      inner_radius + layer * particle_spacing, center)
-        coords = hcat(coords, sphere_coords)
+        coords = hcat(coords, convert(Array{eltype(coords)}, sphere_coords))
     end
 
     return coords
@@ -321,7 +321,7 @@ function round_sphere(sphere, particle_spacing, radius, center::SVector{3})
         elseif n_particles == 3
             # Return 2D triangle
             y = sin(2pi / 3)
-            return [1 -0.5 -0.5;
+            return [1 -1/2 -1/2;
                     0 y -y;
                     0 0 0] * radius .+ center
         elseif n_particles == 2

From 03fe7428f321c088865c85eabc52124f81fd7b9c Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Thu, 8 May 2025 19:21:17 +0200
Subject: [PATCH 06/33] fix check configuration

---
 src/general/semidiscretization.jl | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/general/semidiscretization.jl b/src/general/semidiscretization.jl
index 70d0959b63..c67bae0454 100644
--- a/src/general/semidiscretization.jl
+++ b/src/general/semidiscretization.jl
@@ -838,15 +838,19 @@ function update!(neighborhood_search, x, y, semi; points_moving=(true, false),
                            parallelization_backend=semi.parallelization_backend)
 end
 
-function check_configuration(systems, nhs::PointNeighbors.AbstractNeighborhoodSearch)
+function check_configuration(systems, nhs)
     foreach_system(systems) do system
-        check_configuration(system, systems, nhs)
+        check_configuration(system, systems)
+    end
+
+    foreach_system(systems) do system
+        check_neighborhood_search_configuration(system, nhs)
     end
 
     check_system_color(systems)
 end
 
-check_configuration(system, systems, ::PointNeighbors.AbstractNeighborhoodSearch) = nothing
+check_configuration(system, systems) = nothing
 
 function check_system_color(systems)
     if any(system isa FluidSystem && !(system isa ParticlePackingSystem) &&
@@ -863,8 +867,7 @@ function check_system_color(systems)
     end
 end
 
-function check_configuration(fluid_system::FluidSystem, systems,
-                             ::PointNeighbors.AbstractNeighborhoodSearch)
+function check_configuration(fluid_system::FluidSystem, systems)
     if !(fluid_system isa ParticlePackingSystem) && !isnothing(fluid_system.surface_tension)
         foreach_system(systems) do neighbor
             if neighbor isa FluidSystem && isnothing(fluid_system.surface_tension) &&
@@ -875,8 +878,7 @@ function check_configuration(fluid_system::FluidSystem, systems,
     end
 end
 
-function check_configuration(system::BoundarySPHSystem, systems,
-                             ::PointNeighbors.AbstractNeighborhoodSearch)
+function check_configuration(system::BoundarySPHSystem, systems)
     (; boundary_model) = system
 
     foreach_system(systems) do neighbor
@@ -889,8 +891,7 @@ function check_configuration(system::BoundarySPHSystem, systems,
     end
 end
 
-function check_configuration(system::TotalLagrangianSPHSystem, systems,
-                             ::PointNeighbors.AbstractNeighborhoodSearch)
+function check_configuration(system::TotalLagrangianSPHSystem, systems)
     (; boundary_model) = system
 
     foreach_system(systems) do neighbor
@@ -907,8 +908,10 @@ function check_configuration(system::TotalLagrangianSPHSystem, systems,
     end
 end
 
-function check_configuration(system::OpenBoundarySPHSystem, systems,
-                             neighborhood_search::PointNeighbors.AbstractNeighborhoodSearch)
+check_neighborhood_search_configuration(system, nhs) = system
+
+function check_neighborhood_search_configuration(system::OpenBoundarySPHSystem,
+                                                 neighborhood_search::PointNeighbors.AbstractNeighborhoodSearch)
     (; boundary_model, boundary_zone) = system
 
     if boundary_model isa BoundaryModelLastiwka &&

From 63b60a1244fdf3c0c70040638aa05bf1abd48706 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Fri, 9 May 2025 12:23:01 +0200
Subject: [PATCH 07/33] adapt open boundaries

---
 src/general/buffer.jl                         |  52 +++--
 src/general/gpu.jl                            |  22 ++
 src/general/semidiscretization.jl             |  27 ++-
 .../boundary/open_boundary/boundary_zones.jl  |  57 +++--
 .../method_of_characteristics.jl              |  31 +--
 src/schemes/boundary/open_boundary/system.jl  | 194 ++++++++++--------
 6 files changed, 221 insertions(+), 162 deletions(-)

diff --git a/src/general/buffer.jl b/src/general/buffer.jl
index 94c5d2c083..33d664587c 100644
--- a/src/general/buffer.jl
+++ b/src/general/buffer.jl
@@ -1,15 +1,16 @@
-struct SystemBuffer{V}
-    active_particle :: Vector{Bool}
-    eachparticle    :: V # Vector{Int}
-    buffer_size     :: Int
+struct SystemBuffer{AP, APC, EP}
+    active_particle       :: AP # Vector{Bool}
+    active_particle_count :: APC
+    eachparticle          :: EP # Vector{Int}
+    buffer_size           :: Int
+end
 
-    function SystemBuffer(active_size, buffer_size::Integer)
-        # We cannot use a `BitVector` here, as writing to a `BitVector` is not thread-safe
-        active_particle = vcat(fill(true, active_size), fill(false, buffer_size))
-        eachparticle = collect(1:active_size)
+function SystemBuffer(active_size, buffer_size::Integer)
+    # We cannot use a `BitVector` here, as writing to a `BitVector` is not thread-safe
+    active_particle = vcat(fill(true, active_size), fill(false, buffer_size))
+    eachparticle = collect(eachindex(active_particle))
 
-        return new{typeof(eachparticle)}(active_particle, eachparticle, buffer_size)
-    end
+    return SystemBuffer(active_particle, Ref(active_size), eachparticle, buffer_size)
 end
 
 allocate_buffer(initial_condition, ::Nothing) = initial_condition
@@ -35,30 +36,39 @@ function allocate_buffer(initial_condition, buffer::SystemBuffer)
     return union(initial_condition, buffer_ic)
 end
 
-@inline update_system_buffer!(buffer::Nothing) = buffer
+@inline update_system_buffer!(buffer::Nothing, semi) = buffer
 
 # TODO `resize` allocates. Find a non-allocating version
-@inline function update_system_buffer!(buffer::SystemBuffer)
+@inline function update_system_buffer!(buffer::SystemBuffer, semi)
     (; active_particle) = buffer
 
-    resize!(buffer.eachparticle, count(active_particle))
 
-    i = 1
-    for j in eachindex(active_particle)
-        if active_particle[j]
-            buffer.eachparticle[i] = j
-            i += 1
+    buffer.active_particle_count[] = count(active_particle)
+    buffer.eachparticle .= -1
+
+    @threaded semi for i in 1:buffer.active_particle_count[]
+        active = 0
+        for j in eachindex(active_particle)
+            if active_particle[j]
+                active += 1
+                if active == i
+                    buffer.eachparticle[i] = j
+                    break
+                end
+            end
         end
     end
 
     return buffer
 end
 
-@inline each_moving_particle(system, buffer) = buffer.eachparticle
+@inline each_moving_particle(system, buffer) = view(buffer.eachparticle,
+                                                    1:buffer.active_particle_count[])
 
 @inline active_coordinates(u, system, buffer) = view(u, :, buffer.active_particle)
 
-@inline active_particles(system, buffer) = buffer.eachparticle
+@inline active_particles(system, buffer) = view(buffer.eachparticle,
+                                                1:buffer.active_particle_count[])
 
 @inline function activate_next_particle(system)
     (; active_particle) = system.buffer
@@ -75,7 +85,7 @@ end
         end
     end
 
-    error("0 out of $(system.buffer.buffer_size) buffer particles available")
+    error("No buffer particles available")
 end
 
 @inline function deactivate_particle!(system, particle, u)
diff --git a/src/general/gpu.jl b/src/general/gpu.jl
index 87b5f95aa4..1b371ad5f1 100644
--- a/src/general/gpu.jl
+++ b/src/general/gpu.jl
@@ -17,6 +17,28 @@ Adapt.@adapt_structure BoundaryModelDummyParticles
 Adapt.@adapt_structure BoundaryModelMonaghanKajtar
 Adapt.@adapt_structure BoundaryMovement
 Adapt.@adapt_structure TotalLagrangianSPHSystem
+Adapt.@adapt_structure OpenBoundarySPHSystem
+Adapt.@adapt_structure BoundaryZone
+Adapt.@adapt_structure SystemBuffer
+
+function Adapt.adapt_structure(to, system::OpenBoundarySPHSystem)
+    return OpenBoundarySPHSystem(Adapt.adapt(to, system.boundary_model),
+                                 Adapt.adapt(to, system.initial_condition),
+                                 nothing,  # Do not adapt `fluid_system``
+                                 Adapt.adapt(to, system.fluid_system_index),
+                                 Adapt.adapt(to, system.smoothing_length),
+                                 Adapt.adapt(to, system.mass),
+                                 Adapt.adapt(to, system.density),
+                                 Adapt.adapt(to, system.volume),
+                                 Adapt.adapt(to, system.pressure),
+                                 Adapt.adapt(to, system.boundary_zone),
+                                 Adapt.adapt(to, system.reference_velocity),
+                                 Adapt.adapt(to, system.reference_pressure),
+                                 Adapt.adapt(to, system.reference_density),
+                                 Adapt.adapt(to, system.buffer),
+                                 Adapt.adapt(to, system.update_callback_used),
+                                 Adapt.adapt(to, system.cache))
+end
 
 KernelAbstractions.get_backend(::PtrArray) = KernelAbstractions.CPU()
 KernelAbstractions.get_backend(system::System) = KernelAbstractions.get_backend(system.mass)
diff --git a/src/general/semidiscretization.jl b/src/general/semidiscretization.jl
index c67bae0454..775b73a520 100644
--- a/src/general/semidiscretization.jl
+++ b/src/general/semidiscretization.jl
@@ -838,19 +838,16 @@ function update!(neighborhood_search, x, y, semi; points_moving=(true, false),
                            parallelization_backend=semi.parallelization_backend)
 end
 
-function check_configuration(systems, nhs)
+function check_configuration(systems,
+                             nhs::Union{Nothing, PointNeighbors.AbstractNeighborhoodSearch})
     foreach_system(systems) do system
-        check_configuration(system, systems)
-    end
-
-    foreach_system(systems) do system
-        check_neighborhood_search_configuration(system, nhs)
+        check_configuration(system, systems, nhs)
     end
 
     check_system_color(systems)
 end
 
-check_configuration(system, systems) = nothing
+check_configuration(system::System, systems, nhs) = nothing
 
 function check_system_color(systems)
     if any(system isa FluidSystem && !(system isa ParticlePackingSystem) &&
@@ -867,7 +864,7 @@ function check_system_color(systems)
     end
 end
 
-function check_configuration(fluid_system::FluidSystem, systems)
+function check_configuration(fluid_system::FluidSystem, systems, nhs)
     if !(fluid_system isa ParticlePackingSystem) && !isnothing(fluid_system.surface_tension)
         foreach_system(systems) do neighbor
             if neighbor isa FluidSystem && isnothing(fluid_system.surface_tension) &&
@@ -878,7 +875,7 @@ function check_configuration(fluid_system::FluidSystem, systems)
     end
 end
 
-function check_configuration(system::BoundarySPHSystem, systems)
+function check_configuration(system::BoundarySPHSystem, systems, nhs)
     (; boundary_model) = system
 
     foreach_system(systems) do neighbor
@@ -891,7 +888,7 @@ function check_configuration(system::BoundarySPHSystem, systems)
     end
 end
 
-function check_configuration(system::TotalLagrangianSPHSystem, systems)
+function check_configuration(system::TotalLagrangianSPHSystem, systems, nhs)
     (; boundary_model) = system
 
     foreach_system(systems) do neighbor
@@ -908,12 +905,14 @@ function check_configuration(system::TotalLagrangianSPHSystem, systems)
     end
 end
 
-check_neighborhood_search_configuration(system, nhs) = system
-
-function check_neighborhood_search_configuration(system::OpenBoundarySPHSystem,
-                                                 neighborhood_search::PointNeighbors.AbstractNeighborhoodSearch)
+function check_configuration(system::OpenBoundarySPHSystem, systems,
+                             neighborhood_search::PointNeighbors.AbstractNeighborhoodSearch)
     (; boundary_model, boundary_zone) = system
 
+    fluid_system_index = findfirst(==(system.fluid_system), systems)
+
+    system.fluid_system_index[] = fluid_system_index
+
     if boundary_model isa BoundaryModelLastiwka &&
        boundary_zone isa BoundaryZone{BidirectionalFlow}
         throw(ArgumentError("`BoundaryModelLastiwka` needs a specific flow direction. " *
diff --git a/src/schemes/boundary/open_boundary/boundary_zones.jl b/src/schemes/boundary/open_boundary/boundary_zones.jl
index ad732b13d0..90d587bb36 100644
--- a/src/schemes/boundary/open_boundary/boundary_zones.jl
+++ b/src/schemes/boundary/open_boundary/boundary_zones.jl
@@ -81,50 +81,49 @@ bidirectional_flow = BoundaryZone(; plane=plane_points, plane_normal, particle_s
 !!! warning "Experimental Implementation"
     This is an experimental feature and may change in any future releases.
 """
-struct BoundaryZone{F, NDIMS, IC, S, ZO, ZW, FD, PN}
+struct BoundaryZone{BT, IC, S, ZO, ZW, FD, PN}
     initial_condition :: IC
     spanning_set      :: S
     zone_origin       :: ZO
     zone_width        :: ZW
     flow_direction    :: FD
     plane_normal      :: PN
+    boundary_type     :: BT
+end
 
-    function BoundaryZone(; plane, plane_normal, density, particle_spacing,
-                          initial_condition=nothing, extrude_geometry=nothing,
-                          open_boundary_layers::Integer, boundary_type=BidirectionalFlow())
-        if open_boundary_layers <= 0
-            throw(ArgumentError("`open_boundary_layers` must be positive and greater than zero"))
-        end
+function BoundaryZone(; plane, plane_normal, density, particle_spacing,
+                      initial_condition=nothing, extrude_geometry=nothing,
+                      open_boundary_layers::Integer, boundary_type=BidirectionalFlow())
+    if open_boundary_layers <= 0
+        throw(ArgumentError("`open_boundary_layers` must be positive and greater than zero"))
+    end
 
-        # `plane_normal` always points in fluid domain
-        plane_normal_ = normalize(SVector(plane_normal...))
+    # `plane_normal` always points in fluid domain
+    plane_normal_ = normalize(SVector(plane_normal...))
 
-        if boundary_type isa BidirectionalFlow
-            flow_direction = nothing
+    if boundary_type isa BidirectionalFlow
+        flow_direction = nothing
 
-        elseif boundary_type isa InFlow
-            # Unit vector pointing in downstream direction
-            flow_direction = plane_normal_
+    elseif boundary_type isa InFlow
+        # Unit vector pointing in downstream direction
+        flow_direction = plane_normal_
 
-        elseif boundary_type isa OutFlow
-            # Unit vector pointing in downstream direction
-            flow_direction = -plane_normal_
-        end
+    elseif boundary_type isa OutFlow
+        # Unit vector pointing in downstream direction
+        flow_direction = -plane_normal_
+    end
 
-        ic, spanning_set_, zone_origin,
-        zone_width = set_up_boundary_zone(plane, plane_normal_, flow_direction, density,
-                                          particle_spacing, initial_condition,
-                                          extrude_geometry, open_boundary_layers;
-                                          boundary_type=boundary_type)
+    ic, spanning_set_, zone_origin,
+    zone_width = set_up_boundary_zone(plane, plane_normal_, flow_direction, density,
+                                      particle_spacing, initial_condition,
+                                      extrude_geometry, open_boundary_layers;
+                                      boundary_type=boundary_type)
 
-        return new{typeof(boundary_type), ndims(ic), typeof(ic), typeof(spanning_set_),
-                   typeof(zone_origin), typeof(zone_width), typeof(flow_direction),
-                   typeof(plane_normal_)}(ic, spanning_set_, zone_origin, zone_width,
-                                          flow_direction, plane_normal_)
-    end
+    return BoundaryZone(ic, spanning_set_, zone_origin, zone_width,
+                        flow_direction, plane_normal_, boundary_type)
 end
 
-@inline Base.ndims(::BoundaryZone{F, NDIMS}) where {F, NDIMS} = NDIMS
+@inline Base.ndims(::BoundaryZone) = error("`ndims` not defined for `BoundaryZone`")
 
 function set_up_boundary_zone(plane, plane_normal, flow_direction, density,
                               particle_spacing, initial_condition, extrude_geometry,
diff --git a/src/schemes/boundary/open_boundary/method_of_characteristics.jl b/src/schemes/boundary/open_boundary/method_of_characteristics.jl
index 945606e740..aa41c08749 100644
--- a/src/schemes/boundary/open_boundary/method_of_characteristics.jl
+++ b/src/schemes/boundary/open_boundary/method_of_characteristics.jl
@@ -25,15 +25,17 @@ end
 @inline function update_boundary_quantities!(system, boundary_model::BoundaryModelLastiwka,
                                              v, u, v_ode, u_ode, semi, t)
     (; density, pressure, cache, boundary_zone,
-     reference_velocity, reference_pressure, reference_density) = system
+    reference_velocity, reference_pressure, reference_density) = system
     (; flow_direction) = boundary_zone
 
-    sound_speed = system_sound_speed(system.fluid_system)
+    fluid_system = corresponding_fluid_system(system, semi)
+
+    sound_speed = system_sound_speed(fluid_system)
 
     if boundary_model.extrapolate_reference_values
         (; prescribed_pressure, prescribed_velocity, prescribed_density) = cache
-        v_fluid = wrap_v(v_ode, system.fluid_system, semi)
-        u_fluid = wrap_u(u_ode, system.fluid_system, semi)
+        v_fluid = wrap_v(v_ode, fluid_system, semi)
+        u_fluid = wrap_u(u_ode, fluid_system, semi)
 
         @trixi_timeit timer() "extrapolate and correct values" begin
             extrapolate_values!(system, v, v_fluid, u, u_fluid, semi, t;
@@ -52,11 +54,11 @@ end
 
         rho_ref = reference_value(reference_density, density[particle],
                                   particle_position, t)
-        density[particle] = rho_ref + ((-J1 + 0.5 * (J2 + J3)) / sound_speed^2)
+        density[particle] = rho_ref + ((-J1 + (J2 + J3) / 2) / sound_speed^2)
 
         p_ref = reference_value(reference_pressure, pressure[particle],
                                 particle_position, t)
-        pressure[particle] = p_ref + 0.5 * (J2 + J3)
+        pressure[particle] = p_ref + (J2 + J3) / 2
 
         v_current = current_velocity(v, system, particle)
         v_ref = reference_value(reference_velocity, v_current,
@@ -88,8 +90,9 @@ end
 function evaluate_characteristics!(system, v, u, v_ode, u_ode, semi, t)
     (; volume, cache, boundary_zone) = system
     (; characteristics, previous_characteristics) = cache
+    fluid_system = corresponding_fluid_system(system, semi)
 
-    for particle in eachparticle(system)
+    @threaded semi for particle in eachparticle(system)
         previous_characteristics[1, particle] = characteristics[1, particle]
         previous_characteristics[2, particle] = characteristics[2, particle]
         previous_characteristics[3, particle] = characteristics[3, particle]
@@ -99,7 +102,7 @@ function evaluate_characteristics!(system, v, u, v_ode, u_ode, semi, t)
     set_zero!(volume)
 
     # Evaluate the characteristic variables with the fluid system
-    evaluate_characteristics!(system, system.fluid_system, v, u, v_ode, u_ode, semi, t)
+    evaluate_characteristics!(system, fluid_system, v, u, v_ode, u_ode, semi, t)
 
     # Only some of the in-/outlet particles are in the influence of the fluid particles.
     # Thus, we compute the characteristics for the particles that are outside the influence
@@ -108,13 +111,13 @@ function evaluate_characteristics!(system, v, u, v_ode, u_ode, semi, t)
     @threaded semi for particle in each_moving_particle(system)
 
         # Particle is outside of the influence of fluid particles
-        if isapprox(volume[particle], 0.0)
+        if isapprox(volume[particle], 0)
 
             # Using the average of the values at the previous time step for particles which
             # are outside of the influence of fluid particles.
-            avg_J1 = 0.0
-            avg_J2 = 0.0
-            avg_J3 = 0.0
+            avg_J1 = zero(volume[particle])
+            avg_J2 = zero(volume[particle])
+            avg_J3 = zero(volume[particle])
             counter = 0
 
             for neighbor in each_moving_particle(system)
@@ -152,7 +155,7 @@ end
 function evaluate_characteristics!(system, neighbor_system::FluidSystem,
                                    v, u, v_ode, u_ode, semi, t)
     (; volume, cache, boundary_zone, density, pressure,
-     reference_velocity, reference_pressure, reference_density) = system
+    reference_velocity, reference_pressure, reference_density) = system
     (; flow_direction) = boundary_zone
     (; characteristics) = cache
 
@@ -161,7 +164,7 @@ function evaluate_characteristics!(system, neighbor_system::FluidSystem,
 
     system_coords = current_coordinates(u, system)
     neighbor_coords = current_coordinates(u_neighbor_system, neighbor_system)
-    sound_speed = system_sound_speed(system.fluid_system)
+    sound_speed = system_sound_speed(neighbor_system)
 
     # Loop over all fluid neighbors within the kernel cutoff
     foreach_point_neighbor(system, neighbor_system, system_coords, neighbor_coords, semi;
diff --git a/src/schemes/boundary/open_boundary/system.jl b/src/schemes/boundary/open_boundary/system.jl
index cd33e9df56..70257d3a6e 100644
--- a/src/schemes/boundary/open_boundary/system.jl
+++ b/src/schemes/boundary/open_boundary/system.jl
@@ -35,11 +35,13 @@ Open boundary system for in- and outflow particles.
 !!! warning "Experimental Implementation"
 	This is an experimental feature and may change in any future releases.
 """
-struct OpenBoundarySPHSystem{BM, BZ, NDIMS, ELTYPE <: Real, IC, FS, ARRAY1D, RV,
-                             RP, RD, B, C} <: System{NDIMS}
+struct OpenBoundarySPHSystem{BM, ELTYPE, NDIMS, IC, FS, FSI, ARRAY1D, BZ, RV,
+                             RP, RD, B, UCU, C} <: System{NDIMS}
+    boundary_model       :: BM
     initial_condition    :: IC
     fluid_system         :: FS
-    boundary_model       :: BM
+    fluid_system_index   :: FSI
+    smoothing_length     :: ELTYPE
     mass                 :: ARRAY1D # Array{ELTYPE, 1}: [particle]
     density              :: ARRAY1D # Array{ELTYPE, 1}: [particle]
     volume               :: ARRAY1D # Array{ELTYPE, 1}: [particle]
@@ -49,91 +51,110 @@ struct OpenBoundarySPHSystem{BM, BZ, NDIMS, ELTYPE <: Real, IC, FS, ARRAY1D, RV,
     reference_pressure   :: RP
     reference_density    :: RD
     buffer               :: B
-    update_callback_used :: Ref{Bool}
+    update_callback_used :: UCU
     cache                :: C
+end
+
+function OpenBoundarySPHSystem(boundary_model, initial_condition, fluid_system,
+                               fluid_system_index, smoothing_length, mass, density, volume,
+                               pressure, boundary_zone, reference_velocity,
+                               reference_pressure, reference_density, buffer,
+                               update_callback_used, cache)
+    OpenBoundarySPHSystem{typeof(boundary_model), eltype(mass), ndims(initial_condition),
+                          typeof(initial_condition), typeof(fluid_system),
+                          typeof(fluid_system_index), typeof(mass), typeof(boundary_zone),
+                          typeof(reference_velocity), typeof(reference_pressure),
+                          typeof(reference_density), typeof(buffer),
+                          typeof(update_callback_used),
+                          typeof(cache)}(boundary_model, initial_condition, fluid_system,
+                                         fluid_system_index, smoothing_length, mass,
+                                         density, volume, pressure, boundary_zone,
+                                         reference_velocity, reference_pressure,
+                                         reference_density, buffer, update_callback_used,
+                                         cache)
+end
+
+function OpenBoundarySPHSystem(boundary_zone::BoundaryZone;
+                               fluid_system::FluidSystem,
+                               buffer_size::Integer, boundary_model,
+                               reference_velocity=nothing,
+                               reference_pressure=nothing,
+                               reference_density=nothing)
+    (; initial_condition) = boundary_zone
 
-    function OpenBoundarySPHSystem(boundary_zone::BoundaryZone;
-                                   fluid_system::FluidSystem,
-                                   buffer_size::Integer, boundary_model,
-                                   reference_velocity=nothing,
-                                   reference_pressure=nothing,
-                                   reference_density=nothing)
-        (; initial_condition) = boundary_zone
-
-        check_reference_values!(boundary_model, reference_density, reference_pressure,
-                                reference_velocity)
-
-        buffer = SystemBuffer(nparticles(initial_condition), buffer_size)
-
-        initial_condition = allocate_buffer(initial_condition, buffer)
-
-        NDIMS = ndims(initial_condition)
-        ELTYPE = eltype(initial_condition)
-
-        pressure = copy(initial_condition.pressure)
-        mass = copy(initial_condition.mass)
-        density = copy(initial_condition.density)
-        volume = similar(initial_condition.density)
-
-        if !(reference_velocity isa Function || isnothing(reference_velocity) ||
-             (reference_velocity isa Vector && length(reference_velocity) == NDIMS))
-            throw(ArgumentError("`reference_velocity` must be either a function mapping " *
-                                "each particle's coordinates and time to its velocity, " *
-                                "an array where the ``i``-th column holds the velocity of particle ``i`` " *
-                                "or, for a constant fluid velocity, a vector of length $NDIMS for a $(NDIMS)D problem holding this velocity"))
-        else
-            if reference_velocity isa Function
-                test_result = reference_velocity(zeros(NDIMS), 0.0)
-                if length(test_result) != NDIMS
-                    throw(ArgumentError("`reference_velocity` function must be of dimension $NDIMS"))
-                end
+    check_reference_values!(boundary_model, reference_density, reference_pressure,
+                            reference_velocity)
+
+    buffer = SystemBuffer(nparticles(initial_condition), buffer_size)
+
+    initial_condition = allocate_buffer(initial_condition, buffer)
+
+    NDIMS = ndims(initial_condition)
+
+    pressure = copy(initial_condition.pressure)
+    mass = copy(initial_condition.mass)
+    density = copy(initial_condition.density)
+    volume = similar(initial_condition.density)
+
+    if !(reference_velocity isa Function || isnothing(reference_velocity) ||
+         (reference_velocity isa Vector && length(reference_velocity) == NDIMS))
+        throw(ArgumentError("`reference_velocity` must be either a function mapping " *
+                            "each particle's coordinates and time to its velocity, " *
+                            "an array where the ``i``-th column holds the velocity of particle ``i`` " *
+                            "or, for a constant fluid velocity, a vector of length $NDIMS for a $(NDIMS)D problem holding this velocity"))
+    else
+        if reference_velocity isa Function
+            test_result = reference_velocity(zeros(NDIMS), 0.0)
+            if length(test_result) != NDIMS
+                throw(ArgumentError("`reference_velocity` function must be of dimension $NDIMS"))
             end
-            reference_velocity_ = wrap_reference_function(reference_velocity, Val(NDIMS))
         end
+        reference_velocity_ = wrap_reference_function(reference_velocity, Val(NDIMS))
+    end
 
-        if !(reference_pressure isa Function || reference_pressure isa Real ||
-             isnothing(reference_pressure))
-            throw(ArgumentError("`reference_pressure` must be either a function mapping " *
-                                "each particle's coordinates and time to its pressure, " *
-                                "a vector holding the pressure of each particle, or a scalar"))
-        else
-            if reference_pressure isa Function
-                test_result = reference_pressure(zeros(NDIMS), 0.0)
-                if length(test_result) != 1
-                    throw(ArgumentError("`reference_pressure` function must be a scalar function"))
-                end
+    if !(reference_pressure isa Function || reference_pressure isa Real ||
+         isnothing(reference_pressure))
+        throw(ArgumentError("`reference_pressure` must be either a function mapping " *
+                            "each particle's coordinates and time to its pressure, " *
+                            "a vector holding the pressure of each particle, or a scalar"))
+    else
+        if reference_pressure isa Function
+            test_result = reference_pressure(zeros(NDIMS), 0.0)
+            if length(test_result) != 1
+                throw(ArgumentError("`reference_pressure` function must be a scalar function"))
             end
-            reference_pressure_ = wrap_reference_function(reference_pressure, Val(NDIMS))
         end
+        reference_pressure_ = wrap_reference_function(reference_pressure, Val(NDIMS))
+    end
 
-        if !(reference_density isa Function || reference_density isa Real ||
-             isnothing(reference_density))
-            throw(ArgumentError("`reference_density` must be either a function mapping " *
-                                "each particle's coordinates and time to its density, " *
-                                "a vector holding the density of each particle, or a scalar"))
-        else
-            if reference_density isa Function
-                test_result = reference_density(zeros(NDIMS), 0.0)
-                if length(test_result) != 1
-                    throw(ArgumentError("`reference_density` function must be a scalar function"))
-                end
+    if !(reference_density isa Function || reference_density isa Real ||
+         isnothing(reference_density))
+        throw(ArgumentError("`reference_density` must be either a function mapping " *
+                            "each particle's coordinates and time to its density, " *
+                            "a vector holding the density of each particle, or a scalar"))
+    else
+        if reference_density isa Function
+            test_result = reference_density(zeros(NDIMS), 0.0)
+            if length(test_result) != 1
+                throw(ArgumentError("`reference_density` function must be a scalar function"))
             end
-            reference_density_ = wrap_reference_function(reference_density, Val(NDIMS))
         end
-
-        cache = create_cache_open_boundary(boundary_model, initial_condition,
-                                           reference_density, reference_velocity,
-                                           reference_pressure)
-
-        return new{typeof(boundary_model), typeof(boundary_zone), NDIMS, ELTYPE,
-                   typeof(initial_condition), typeof(fluid_system), typeof(mass),
-                   typeof(reference_velocity_), typeof(reference_pressure_),
-                   typeof(reference_density_), typeof(buffer),
-                   typeof(cache)}(initial_condition, fluid_system, boundary_model, mass,
-                                  density, volume, pressure, boundary_zone,
-                                  reference_velocity_, reference_pressure_,
-                                  reference_density_, buffer, false, cache)
+        reference_density_ = wrap_reference_function(reference_density, Val(NDIMS))
     end
+
+    cache = create_cache_open_boundary(boundary_model, initial_condition,
+                                       reference_density, reference_velocity,
+                                       reference_pressure)
+
+    ucu = Ref(false) # Reference to check if the update callback is used
+    fsi = Ref(0) # Reference to the fluid system index
+
+    smoothing_length = initial_smoothing_length(fluid_system)
+
+    return OpenBoundarySPHSystem(boundary_model, initial_condition, fluid_system, fsi,
+                                 smoothing_length, mass, density, volume, pressure,
+                                 boundary_zone, reference_velocity_, reference_pressure_,
+                                 reference_density_, buffer, ucu, cache)
 end
 
 function create_cache_open_boundary(boundary_model, initial_condition,
@@ -192,7 +213,7 @@ function Base.show(io::IO, ::MIME"text/plain", system::OpenBoundarySPHSystem)
     end
 end
 
-@inline function Base.eltype(::OpenBoundarySPHSystem{<:Any, <:Any, <:Any, ELTYPE}) where {ELTYPE}
+@inline function Base.eltype(::OpenBoundarySPHSystem{<:Any, ELTYPE}) where {ELTYPE}
     return ELTYPE
 end
 
@@ -204,8 +225,12 @@ end
 
 update_callback_used!(system::OpenBoundarySPHSystem) = system.update_callback_used[] = true
 
+function corresponding_fluid_system(system::OpenBoundarySPHSystem, semi)
+    return semi.systems[system.fluid_system_index[]]
+end
+
 function smoothing_length(system::OpenBoundarySPHSystem, particle)
-    return smoothing_length(system.fluid_system, particle)
+    return system.smoothing_length
 end
 
 @inline hydrodynamic_mass(system::OpenBoundarySPHSystem, particle) = system.mass[particle]
@@ -250,7 +275,8 @@ end
 update_open_boundary_eachstep!(system, v_ode, u_ode, semi, t) = system
 
 function check_domain!(system, v, u, v_ode, u_ode, semi)
-    (; boundary_zone, fluid_system) = system
+    (; boundary_zone) = system
+    fluid_system = corresponding_fluid_system(system, semi)
 
     u_fluid = wrap_u(u_ode, fluid_system, semi)
     v_fluid = wrap_v(v_ode, fluid_system, semi)
@@ -266,8 +292,8 @@ function check_domain!(system, v, u, v_ode, u_ode, semi)
         end
     end
 
-    update_system_buffer!(system.buffer)
-    update_system_buffer!(system.fluid_system.buffer)
+    update_system_buffer!(system.buffer, semi)
+    update_system_buffer!(fluid_system.buffer, semi)
 
     # Check the fluid particles whether they're entering the boundary zone
     @threaded semi for fluid_particle in each_moving_particle(fluid_system)
@@ -280,8 +306,8 @@ function check_domain!(system, v, u, v_ode, u_ode, semi)
         end
     end
 
-    update_system_buffer!(system.buffer)
-    update_system_buffer!(system.fluid_system.buffer)
+    update_system_buffer!(system.buffer, semi)
+    update_system_buffer!(fluid_system.buffer, semi)
 
     return system
 end

From 1de092ab9e836da11fc3201dbbac2a7e34202633 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Mon, 12 May 2025 22:31:13 +0200
Subject: [PATCH 08/33] works on CPU but not on GPU

---
 src/general/buffer.jl                         | 47 +++++------
 .../method_of_characteristics.jl              |  4 +-
 src/schemes/boundary/open_boundary/system.jl  | 81 +++++++++++++++----
 3 files changed, 86 insertions(+), 46 deletions(-)

diff --git a/src/general/buffer.jl b/src/general/buffer.jl
index 33d664587c..2fd34188a0 100644
--- a/src/general/buffer.jl
+++ b/src/general/buffer.jl
@@ -1,16 +1,24 @@
-struct SystemBuffer{AP, APC, EP}
-    active_particle       :: AP # Vector{Bool}
-    active_particle_count :: APC
-    eachparticle          :: EP # Vector{Int}
+struct SystemBuffer{VB, RI, VI}
+    active_particle       :: VB  # Vector{Bool}
+    active_particle_count :: RI  # Ref{Int}
+    candidates            :: VI  # Vector{Int}
+    particle_outside      :: VB  # Vector{Bool}
+    available_particles   :: VI  # Vector{Int}
+    next_particle         :: VI  # Vector{Int}
+    eachparticle          :: VI  # Vector{Int}
     buffer_size           :: Int
 end
 
 function SystemBuffer(active_size, buffer_size::Integer)
     # We cannot use a `BitVector` here, as writing to a `BitVector` is not thread-safe
     active_particle = vcat(fill(true, active_size), fill(false, buffer_size))
+    candidates = collect(eachindex(active_particle))
+    particle_outside = vcat(fill(false, active_size), fill(true, buffer_size))
+    available_particles = collect(eachindex(active_particle))
     eachparticle = collect(eachindex(active_particle))
 
-    return SystemBuffer(active_particle, Ref(active_size), eachparticle, buffer_size)
+    return SystemBuffer(active_particle, Ref(active_size), candidates, particle_outside,
+                        available_particles, Int[1], eachparticle, buffer_size)
 end
 
 allocate_buffer(initial_condition, ::Nothing) = initial_condition
@@ -42,7 +50,6 @@ end
 @inline function update_system_buffer!(buffer::SystemBuffer, semi)
     (; active_particle) = buffer
 
-
     buffer.active_particle_count[] = count(active_particle)
     buffer.eachparticle .= -1
 
@@ -62,31 +69,15 @@ end
     return buffer
 end
 
-@inline each_moving_particle(system, buffer) = view(buffer.eachparticle,
-                                                    1:buffer.active_particle_count[])
+@inline each_moving_particle(system,
+                             buffer) = view(buffer.eachparticle,
+                                            1:buffer.active_particle_count[])
 
 @inline active_coordinates(u, system, buffer) = view(u, :, buffer.active_particle)
 
-@inline active_particles(system, buffer) = view(buffer.eachparticle,
-                                                1:buffer.active_particle_count[])
-
-@inline function activate_next_particle(system)
-    (; active_particle) = system.buffer
-
-    for particle in eachindex(active_particle)
-        if !active_particle[particle]
-            # Activate this particle. The return value is the old value.
-            # If this is `true`, the particle was active before and we need to continue.
-            # This happens because a particle might have been activated by another thread
-            # between the condition and the line below.
-            was_active = PointNeighbors.Atomix.@atomicswap active_particle[particle] = true
-
-            !was_active && return particle
-        end
-    end
-
-    error("No buffer particles available")
-end
+@inline active_particles(system,
+                         buffer) = view(buffer.eachparticle,
+                                        1:buffer.active_particle_count[])
 
 @inline function deactivate_particle!(system, particle, u)
     (; active_particle) = system.buffer
diff --git a/src/schemes/boundary/open_boundary/method_of_characteristics.jl b/src/schemes/boundary/open_boundary/method_of_characteristics.jl
index aa41c08749..a1f0c01ed6 100644
--- a/src/schemes/boundary/open_boundary/method_of_characteristics.jl
+++ b/src/schemes/boundary/open_boundary/method_of_characteristics.jl
@@ -25,7 +25,7 @@ end
 @inline function update_boundary_quantities!(system, boundary_model::BoundaryModelLastiwka,
                                              v, u, v_ode, u_ode, semi, t)
     (; density, pressure, cache, boundary_zone,
-    reference_velocity, reference_pressure, reference_density) = system
+     reference_velocity, reference_pressure, reference_density) = system
     (; flow_direction) = boundary_zone
 
     fluid_system = corresponding_fluid_system(system, semi)
@@ -155,7 +155,7 @@ end
 function evaluate_characteristics!(system, neighbor_system::FluidSystem,
                                    v, u, v_ode, u_ode, semi, t)
     (; volume, cache, boundary_zone, density, pressure,
-    reference_velocity, reference_pressure, reference_density) = system
+     reference_velocity, reference_pressure, reference_density) = system
     (; flow_direction) = boundary_zone
     (; characteristics) = cache
 
diff --git a/src/schemes/boundary/open_boundary/system.jl b/src/schemes/boundary/open_boundary/system.jl
index 70257d3a6e..7f36ba3df9 100644
--- a/src/schemes/boundary/open_boundary/system.jl
+++ b/src/schemes/boundary/open_boundary/system.jl
@@ -281,31 +281,79 @@ function check_domain!(system, v, u, v_ode, u_ode, semi)
     u_fluid = wrap_u(u_ode, fluid_system, semi)
     v_fluid = wrap_v(v_ode, fluid_system, semi)
 
+    function map_sort!(a, b) # (a::Vector{Int}, b::Vector{Bool})
+        map!((true_value, idx) -> true_value ? -1 : idx, a, b, eachindex(b))
+
+        sort!(a, rev=true)
+
+        return a # [idx_1, idx_2, ..., -1, -1]
+    end
+
+    system.buffer.particle_outside .= false
+
     # Check the boundary particles whether they're leaving the boundary zone
     @threaded semi for particle in each_moving_particle(system)
         particle_coords = current_coords(u, system, particle)
 
         # Check if boundary particle is outside the boundary zone
         if !is_in_boundary_zone(boundary_zone, particle_coords)
-            convert_particle!(system, fluid_system, boundary_zone, particle,
-                              v, u, v_fluid, u_fluid)
+            system.buffer.particle_outside[particle] = true
         end
     end
 
+    # Basically: # candidates = [candidate_1, candidate_2, ..., -1, -1]
+    map_sort!(system.buffer.candidates, .!(system.buffer.particle_outside))
+
+    # Basically: `available_particles = [inactive_1, inactive_2, ..., -1, -1]`
+    map_sort!(fluid_system.buffer.available_particles, fluid_system.buffer.active_particle)
+
+    fluid_system.buffer.next_particle[1] = 0
+
+    # Copy buffer particle information to the fluid system
+    @threaded semi for buffer_id in 1:count(system.buffer.particle_outside)
+        particle = system.buffer.candidates[buffer_id]
+
+        fluid_id = PointNeighbors.Atomix.@atomic fluid_system.buffer.next_particle[1] += 1
+        new_particle = fluid_system.buffer.available_particles[fluid_id]
+
+        convert_particle!(system, fluid_system, boundary_zone, particle, new_particle,
+                          v, u, v_fluid, u_fluid)
+    end
+
     update_system_buffer!(system.buffer, semi)
     update_system_buffer!(fluid_system.buffer, semi)
 
+    fluid_system.buffer.particle_outside .= false
+
     # Check the fluid particles whether they're entering the boundary zone
     @threaded semi for fluid_particle in each_moving_particle(fluid_system)
         fluid_coords = current_coords(u_fluid, fluid_system, fluid_particle)
 
         # Check if fluid particle is in boundary zone
         if is_in_boundary_zone(boundary_zone, fluid_coords)
-            convert_particle!(fluid_system, system, boundary_zone, fluid_particle,
-                              v, u, v_fluid, u_fluid)
+            fluid_system.buffer.particle_outside[fluid_particle] = true
         end
     end
 
+    # Basically: # candidates = [candidate_1, candidate_2, ..., -1, -1]
+    map_sort!(fluid_system.buffer.candidates, .!(fluid_system.buffer.particle_outside))
+
+    # Basically: `available_particles = [inactive_1, inactive_2, ..., -1, -1]`
+    map_sort!(system.buffer.available_particles, system.buffer.active_particle)
+
+    system.buffer.next_particle[1] = 0
+
+    # Copy fluid particle information to the buffer
+    @threaded semi for fluid_id in 1:count(fluid_system.buffer.particle_outside)
+        particle = fluid_system.buffer.candidates[fluid_id]
+
+        buffer_id = PointNeighbors.Atomix.@atomic system.buffer.next_particle[1] += 1
+        new_particle = system.buffer.available_particles[buffer_id]
+
+        convert_particle!(fluid_system, system, boundary_zone, particle, new_particle,
+                          v, u, v_fluid, u_fluid)
+    end
+
     update_system_buffer!(system.buffer, semi)
     update_system_buffer!(fluid_system.buffer, semi)
 
@@ -314,8 +362,8 @@ end
 
 # Outflow particle is outside the boundary zone
 @inline function convert_particle!(system::OpenBoundarySPHSystem, fluid_system,
-                                   boundary_zone::BoundaryZone{OutFlow}, particle, v, u,
-                                   v_fluid, u_fluid)
+                                   boundary_zone::BoundaryZone{OutFlow},
+                                   particle, new_partcile, v, u, v_fluid, u_fluid)
     deactivate_particle!(system, particle, u)
 
     return system
@@ -323,12 +371,12 @@ end
 
 # Inflow particle is outside the boundary zone
 @inline function convert_particle!(system::OpenBoundarySPHSystem, fluid_system,
-                                   boundary_zone::BoundaryZone{InFlow}, particle, v, u,
-                                   v_fluid, u_fluid)
+                                   boundary_zone::BoundaryZone{InFlow},
+                                   particle, new_partcile, v, u, v_fluid, u_fluid)
     (; spanning_set) = boundary_zone
 
     # Activate a new particle in simulation domain
-    transfer_particle!(fluid_system, system, particle, v_fluid, u_fluid, v, u)
+    transfer_particle!(fluid_system, system, particle, new_partcile, v_fluid, u_fluid, v, u)
 
     # Reset position of boundary particle
     for dim in 1:ndims(system)
@@ -340,8 +388,8 @@ end
 
 # Buffer particle is outside the boundary zone
 @inline function convert_particle!(system::OpenBoundarySPHSystem, fluid_system,
-                                   boundary_zone::BoundaryZone{BidirectionalFlow}, particle,
-                                   v, u, v_fluid, u_fluid)
+                                   boundary_zone::BoundaryZone{BidirectionalFlow},
+                                   particle, new_particle, v, u, v_fluid, u_fluid)
     relative_position = current_coords(u, system, particle) - boundary_zone.zone_origin
 
     # Check if particle is in- or outside the fluid domain.
@@ -353,7 +401,7 @@ end
     end
 
     # Activate a new particle in simulation domain
-    transfer_particle!(fluid_system, system, particle, v_fluid, u_fluid, v, u)
+    transfer_particle!(fluid_system, system, particle, new_particle, v_fluid, u_fluid, v, u)
 
     # Reset position of boundary particle
     for dim in 1:ndims(system)
@@ -365,9 +413,10 @@ end
 
 # Fluid particle is in boundary zone
 @inline function convert_particle!(fluid_system::FluidSystem, system,
-                                   boundary_zone, particle, v, u, v_fluid, u_fluid)
+                                   boundary_zone,
+                                   particle, new_particle, v, u, v_fluid, u_fluid)
     # Activate particle in boundary zone
-    transfer_particle!(system, fluid_system, particle, v, u, v_fluid, u_fluid)
+    transfer_particle!(system, fluid_system, particle, new_particle, v, u, v_fluid, u_fluid)
 
     # Deactivate particle in interior domain
     deactivate_particle!(fluid_system, particle, u_fluid)
@@ -375,9 +424,9 @@ end
     return fluid_system
 end
 
-@inline function transfer_particle!(system_new, system_old, particle_old,
+@inline function transfer_particle!(system_new, system_old, particle_old, particle_new,
                                     v_new, u_new, v_old, u_old)
-    particle_new = activate_next_particle(system_new)
+    system_new.buffer.active_particle[particle_new] = true
 
     # Transfer densities
     density = current_density(v_old, system_old, particle_old)

From 7cfe8b20c91f8429d94b4c77d3de399b54b3d5e6 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Tue, 13 May 2025 12:38:14 +0200
Subject: [PATCH 09/33] first working prototype on GPU

---
 Project.toml                                 | 4 +++-
 src/TrixiParticles.jl                        | 1 +
 src/general/buffer.jl                        | 6 +++---
 src/schemes/boundary/open_boundary/system.jl | 6 +++---
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/Project.toml b/Project.toml
index e8297cd12d..169c95e339 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,9 +1,10 @@
 name = "TrixiParticles"
 uuid = "66699cd8-9c01-4e9d-a059-b96c86d16b3a"
 authors = ["erik.faulhaber <44124897+efaulhaber@users.noreply.github.com>"]
-version = "0.3-dev"
+version = "0.3.0-dev"
 
 [deps]
+AcceleratedKernels = "6a4ca0a5-0e36-4168-a932-d9be78d558f1"
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
@@ -40,6 +41,7 @@ OrdinaryDiffEqCore = "bbf590c4-e513-4bbe-9b18-05decba2e5d8"
 TrixiParticlesOrdinaryDiffEqExt = ["OrdinaryDiffEq", "OrdinaryDiffEqCore"]
 
 [compat]
+AcceleratedKernels = "0.3.3"
 Adapt = "4"
 CSV = "0.10"
 DataFrames = "1.6"
diff --git a/src/TrixiParticles.jl b/src/TrixiParticles.jl
index 694347e35e..4b2917fbee 100644
--- a/src/TrixiParticles.jl
+++ b/src/TrixiParticles.jl
@@ -2,6 +2,7 @@ module TrixiParticles
 
 using Reexport: @reexport
 
+using AcceleratedKernels: AcceleratedKernels
 using Adapt: Adapt
 using Base: @propagate_inbounds
 using CSV: CSV
diff --git a/src/general/buffer.jl b/src/general/buffer.jl
index 2fd34188a0..d398a1771b 100644
--- a/src/general/buffer.jl
+++ b/src/general/buffer.jl
@@ -1,10 +1,10 @@
-struct SystemBuffer{VB, RI, VI}
+struct SystemBuffer{VB, RI, VI, NP}
     active_particle       :: VB  # Vector{Bool}
     active_particle_count :: RI  # Ref{Int}
     candidates            :: VI  # Vector{Int}
     particle_outside      :: VB  # Vector{Bool}
     available_particles   :: VI  # Vector{Int}
-    next_particle         :: VI  # Vector{Int}
+    next_particle         :: NP  # Vector{Int32}
     eachparticle          :: VI  # Vector{Int}
     buffer_size           :: Int
 end
@@ -18,7 +18,7 @@ function SystemBuffer(active_size, buffer_size::Integer)
     eachparticle = collect(eachindex(active_particle))
 
     return SystemBuffer(active_particle, Ref(active_size), candidates, particle_outside,
-                        available_particles, Int[1], eachparticle, buffer_size)
+                        available_particles, Int32[1], eachparticle, buffer_size)
 end
 
 allocate_buffer(initial_condition, ::Nothing) = initial_condition
diff --git a/src/schemes/boundary/open_boundary/system.jl b/src/schemes/boundary/open_boundary/system.jl
index 7f36ba3df9..c713f93b26 100644
--- a/src/schemes/boundary/open_boundary/system.jl
+++ b/src/schemes/boundary/open_boundary/system.jl
@@ -284,7 +284,7 @@ function check_domain!(system, v, u, v_ode, u_ode, semi)
     function map_sort!(a, b) # (a::Vector{Int}, b::Vector{Bool})
         map!((true_value, idx) -> true_value ? -1 : idx, a, b, eachindex(b))
 
-        sort!(a, rev=true)
+        AcceleratedKernels.sort!(a; rev=true)
 
         return a # [idx_1, idx_2, ..., -1, -1]
     end
@@ -307,7 +307,7 @@ function check_domain!(system, v, u, v_ode, u_ode, semi)
     # Basically: `available_particles = [inactive_1, inactive_2, ..., -1, -1]`
     map_sort!(fluid_system.buffer.available_particles, fluid_system.buffer.active_particle)
 
-    fluid_system.buffer.next_particle[1] = 0
+    fluid_system.buffer.next_particle .= 0
 
     # Copy buffer particle information to the fluid system
     @threaded semi for buffer_id in 1:count(system.buffer.particle_outside)
@@ -341,7 +341,7 @@ function check_domain!(system, v, u, v_ode, u_ode, semi)
     # Basically: `available_particles = [inactive_1, inactive_2, ..., -1, -1]`
     map_sort!(system.buffer.available_particles, system.buffer.active_particle)
 
-    system.buffer.next_particle[1] = 0
+    system.buffer.next_particle .= 0
 
     # Copy fluid particle information to the buffer
     @threaded semi for fluid_id in 1:count(fluid_system.buffer.particle_outside)

From 278e5d956d86b5f6cbe444b7556afdb7641f92c8 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Tue, 13 May 2025 21:18:13 +0200
Subject: [PATCH 10/33] rm ambiguous adapt

---
 src/general/gpu.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/general/gpu.jl b/src/general/gpu.jl
index 1b371ad5f1..cc1cab3ecd 100644
--- a/src/general/gpu.jl
+++ b/src/general/gpu.jl
@@ -17,7 +17,6 @@ Adapt.@adapt_structure BoundaryModelDummyParticles
 Adapt.@adapt_structure BoundaryModelMonaghanKajtar
 Adapt.@adapt_structure BoundaryMovement
 Adapt.@adapt_structure TotalLagrangianSPHSystem
-Adapt.@adapt_structure OpenBoundarySPHSystem
 Adapt.@adapt_structure BoundaryZone
 Adapt.@adapt_structure SystemBuffer
 

From a4cac23bdd5eb2f2bf9a4cc94b62acf9cf08a34d Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Tue, 13 May 2025 22:06:08 +0200
Subject: [PATCH 11/33] undo avoiding atomic operation

---
 Project.toml                                 |  2 -
 src/TrixiParticles.jl                        |  1 -
 src/general/buffer.jl                        | 62 +++++++++------
 src/schemes/boundary/open_boundary/system.jl | 81 ++++----------------
 4 files changed, 54 insertions(+), 92 deletions(-)

diff --git a/Project.toml b/Project.toml
index d3ed917747..f6bfce2770 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,7 +4,6 @@ authors = ["erik.faulhaber <44124897+efaulhaber@users.noreply.github.com>"]
 version = "0.3.1-dev"
 
 [deps]
-AcceleratedKernels = "6a4ca0a5-0e36-4168-a932-d9be78d558f1"
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
@@ -41,7 +40,6 @@ OrdinaryDiffEqCore = "bbf590c4-e513-4bbe-9b18-05decba2e5d8"
 TrixiParticlesOrdinaryDiffEqExt = ["OrdinaryDiffEq", "OrdinaryDiffEqCore"]
 
 [compat]
-AcceleratedKernels = "0.3.3"
 Adapt = "4"
 CSV = "0.10"
 DataFrames = "1.6"
diff --git a/src/TrixiParticles.jl b/src/TrixiParticles.jl
index b68b545cfa..aaf299b5fb 100644
--- a/src/TrixiParticles.jl
+++ b/src/TrixiParticles.jl
@@ -2,7 +2,6 @@ module TrixiParticles
 
 using Reexport: @reexport
 
-using AcceleratedKernels: AcceleratedKernels
 using Adapt: Adapt
 using Base: @propagate_inbounds
 using CSV: CSV
diff --git a/src/general/buffer.jl b/src/general/buffer.jl
index d398a1771b..64168cfb34 100644
--- a/src/general/buffer.jl
+++ b/src/general/buffer.jl
@@ -1,24 +1,20 @@
-struct SystemBuffer{VB, RI, VI, NP}
-    active_particle       :: VB  # Vector{Bool}
-    active_particle_count :: RI  # Ref{Int}
-    candidates            :: VI  # Vector{Int}
-    particle_outside      :: VB  # Vector{Bool}
-    available_particles   :: VI  # Vector{Int}
-    next_particle         :: NP  # Vector{Int32}
-    eachparticle          :: VI  # Vector{Int}
+struct SystemBuffer{AP, APC, EP}
+    active_particle       :: AP # Vector{Bool}
+    active_particle_count :: APC
+    eachparticle          :: EP # Vector{Int}
     buffer_size           :: Int
 end
 
 function SystemBuffer(active_size, buffer_size::Integer)
-    # We cannot use a `BitVector` here, as writing to a `BitVector` is not thread-safe
-    active_particle = vcat(fill(true, active_size), fill(false, buffer_size))
-    candidates = collect(eachindex(active_particle))
-    particle_outside = vcat(fill(false, active_size), fill(true, buffer_size))
-    available_particles = collect(eachindex(active_particle))
+    # Using a `BitVector` is not an option as writing to it is not thread-safe.
+    # Also, to ensure thread-safe particle activation, we use an `atomic_cas` operation.
+    # Thus, `active_particle` is defined as a `Vector{UInt32}` because CUDA.jl
+    # does not support atomic operations on `Bool`.
+    # https://github.com/JuliaGPU/CUDA.jl/blob/2cc9285676a4cd28d0846ca62f0300c56d281d38/src/device/intrinsics/atomics.jl#L243
+    active_particle = vcat(fill(UInt32(1), active_size), fill(UInt32(0), buffer_size))
     eachparticle = collect(eachindex(active_particle))
 
-    return SystemBuffer(active_particle, Ref(active_size), candidates, particle_outside,
-                        available_particles, Int32[1], eachparticle, buffer_size)
+    return SystemBuffer(active_particle, Ref(active_size), eachparticle, buffer_size)
 end
 
 allocate_buffer(initial_condition, ::Nothing) = initial_condition
@@ -50,13 +46,13 @@ end
 @inline function update_system_buffer!(buffer::SystemBuffer, semi)
     (; active_particle) = buffer
 
-    buffer.active_particle_count[] = count(active_particle)
+    buffer.active_particle_count[] = sum(active_particle)
     buffer.eachparticle .= -1
 
     @threaded semi for i in 1:buffer.active_particle_count[]
         active = 0
         for j in eachindex(active_particle)
-            if active_particle[j]
+            if active_particle[j] == true
                 active += 1
                 if active == i
                     buffer.eachparticle[i] = j
@@ -69,15 +65,33 @@ end
     return buffer
 end
 
-@inline each_moving_particle(system,
-                             buffer) = view(buffer.eachparticle,
-                                            1:buffer.active_particle_count[])
+@inline each_moving_particle(system, buffer) = active_particles(system, buffer)
 
-@inline active_coordinates(u, system, buffer) = view(u, :, buffer.active_particle)
+@inline active_coordinates(u, system, buffer) = view(u, :, active_particles(system, buffer))
 
-@inline active_particles(system,
-                         buffer) = view(buffer.eachparticle,
-                                        1:buffer.active_particle_count[])
+@inline function active_particles(system, buffer)
+    return view(buffer.eachparticle, 1:buffer.active_particle_count[])
+end
+
+@inline function activate_next_particle(system)
+    (; active_particle) = system.buffer
+
+    for particle in eachindex(active_particle)
+        if PointNeighbors.Atomix.@atomic(active_particle[particle]) == false
+            # Activate this particle. The return value is the old value.
+            # If this is `true`, the particle was active before and we need to continue.
+            # This happens because a particle might have been activated by another thread
+            # between the condition and the line below.
+            was_active = PointNeighbors.Atomix.@atomicswap active_particle[particle] = true
+
+            if was_active == false
+                return particle
+            end
+        end
+    end
+
+    error("No buffer particles available")
+end
 
 @inline function deactivate_particle!(system, particle, u)
     (; active_particle) = system.buffer
diff --git a/src/schemes/boundary/open_boundary/system.jl b/src/schemes/boundary/open_boundary/system.jl
index c713f93b26..70257d3a6e 100644
--- a/src/schemes/boundary/open_boundary/system.jl
+++ b/src/schemes/boundary/open_boundary/system.jl
@@ -281,79 +281,31 @@ function check_domain!(system, v, u, v_ode, u_ode, semi)
     u_fluid = wrap_u(u_ode, fluid_system, semi)
     v_fluid = wrap_v(v_ode, fluid_system, semi)
 
-    function map_sort!(a, b) # (a::Vector{Int}, b::Vector{Bool})
-        map!((true_value, idx) -> true_value ? -1 : idx, a, b, eachindex(b))
-
-        AcceleratedKernels.sort!(a; rev=true)
-
-        return a # [idx_1, idx_2, ..., -1, -1]
-    end
-
-    system.buffer.particle_outside .= false
-
     # Check the boundary particles whether they're leaving the boundary zone
     @threaded semi for particle in each_moving_particle(system)
         particle_coords = current_coords(u, system, particle)
 
         # Check if boundary particle is outside the boundary zone
         if !is_in_boundary_zone(boundary_zone, particle_coords)
-            system.buffer.particle_outside[particle] = true
+            convert_particle!(system, fluid_system, boundary_zone, particle,
+                              v, u, v_fluid, u_fluid)
         end
     end
 
-    # Basically: # candidates = [candidate_1, candidate_2, ..., -1, -1]
-    map_sort!(system.buffer.candidates, .!(system.buffer.particle_outside))
-
-    # Basically: `available_particles = [inactive_1, inactive_2, ..., -1, -1]`
-    map_sort!(fluid_system.buffer.available_particles, fluid_system.buffer.active_particle)
-
-    fluid_system.buffer.next_particle .= 0
-
-    # Copy buffer particle information to the fluid system
-    @threaded semi for buffer_id in 1:count(system.buffer.particle_outside)
-        particle = system.buffer.candidates[buffer_id]
-
-        fluid_id = PointNeighbors.Atomix.@atomic fluid_system.buffer.next_particle[1] += 1
-        new_particle = fluid_system.buffer.available_particles[fluid_id]
-
-        convert_particle!(system, fluid_system, boundary_zone, particle, new_particle,
-                          v, u, v_fluid, u_fluid)
-    end
-
     update_system_buffer!(system.buffer, semi)
     update_system_buffer!(fluid_system.buffer, semi)
 
-    fluid_system.buffer.particle_outside .= false
-
     # Check the fluid particles whether they're entering the boundary zone
     @threaded semi for fluid_particle in each_moving_particle(fluid_system)
         fluid_coords = current_coords(u_fluid, fluid_system, fluid_particle)
 
         # Check if fluid particle is in boundary zone
         if is_in_boundary_zone(boundary_zone, fluid_coords)
-            fluid_system.buffer.particle_outside[fluid_particle] = true
+            convert_particle!(fluid_system, system, boundary_zone, fluid_particle,
+                              v, u, v_fluid, u_fluid)
         end
     end
 
-    # Basically: # candidates = [candidate_1, candidate_2, ..., -1, -1]
-    map_sort!(fluid_system.buffer.candidates, .!(fluid_system.buffer.particle_outside))
-
-    # Basically: `available_particles = [inactive_1, inactive_2, ..., -1, -1]`
-    map_sort!(system.buffer.available_particles, system.buffer.active_particle)
-
-    system.buffer.next_particle .= 0
-
-    # Copy fluid particle information to the buffer
-    @threaded semi for fluid_id in 1:count(fluid_system.buffer.particle_outside)
-        particle = fluid_system.buffer.candidates[fluid_id]
-
-        buffer_id = PointNeighbors.Atomix.@atomic system.buffer.next_particle[1] += 1
-        new_particle = system.buffer.available_particles[buffer_id]
-
-        convert_particle!(fluid_system, system, boundary_zone, particle, new_particle,
-                          v, u, v_fluid, u_fluid)
-    end
-
     update_system_buffer!(system.buffer, semi)
     update_system_buffer!(fluid_system.buffer, semi)
 
@@ -362,8 +314,8 @@ end
 
 # Outflow particle is outside the boundary zone
 @inline function convert_particle!(system::OpenBoundarySPHSystem, fluid_system,
-                                   boundary_zone::BoundaryZone{OutFlow},
-                                   particle, new_partcile, v, u, v_fluid, u_fluid)
+                                   boundary_zone::BoundaryZone{OutFlow}, particle, v, u,
+                                   v_fluid, u_fluid)
     deactivate_particle!(system, particle, u)
 
     return system
@@ -371,12 +323,12 @@ end
 
 # Inflow particle is outside the boundary zone
 @inline function convert_particle!(system::OpenBoundarySPHSystem, fluid_system,
-                                   boundary_zone::BoundaryZone{InFlow},
-                                   particle, new_partcile, v, u, v_fluid, u_fluid)
+                                   boundary_zone::BoundaryZone{InFlow}, particle, v, u,
+                                   v_fluid, u_fluid)
     (; spanning_set) = boundary_zone
 
     # Activate a new particle in simulation domain
-    transfer_particle!(fluid_system, system, particle, new_partcile, v_fluid, u_fluid, v, u)
+    transfer_particle!(fluid_system, system, particle, v_fluid, u_fluid, v, u)
 
     # Reset position of boundary particle
     for dim in 1:ndims(system)
@@ -388,8 +340,8 @@ end
 
 # Buffer particle is outside the boundary zone
 @inline function convert_particle!(system::OpenBoundarySPHSystem, fluid_system,
-                                   boundary_zone::BoundaryZone{BidirectionalFlow},
-                                   particle, new_particle, v, u, v_fluid, u_fluid)
+                                   boundary_zone::BoundaryZone{BidirectionalFlow}, particle,
+                                   v, u, v_fluid, u_fluid)
     relative_position = current_coords(u, system, particle) - boundary_zone.zone_origin
 
     # Check if particle is in- or outside the fluid domain.
@@ -401,7 +353,7 @@ end
     end
 
     # Activate a new particle in simulation domain
-    transfer_particle!(fluid_system, system, particle, new_particle, v_fluid, u_fluid, v, u)
+    transfer_particle!(fluid_system, system, particle, v_fluid, u_fluid, v, u)
 
     # Reset position of boundary particle
     for dim in 1:ndims(system)
@@ -413,10 +365,9 @@ end
 
 # Fluid particle is in boundary zone
 @inline function convert_particle!(fluid_system::FluidSystem, system,
-                                   boundary_zone,
-                                   particle, new_particle, v, u, v_fluid, u_fluid)
+                                   boundary_zone, particle, v, u, v_fluid, u_fluid)
     # Activate particle in boundary zone
-    transfer_particle!(system, fluid_system, particle, new_particle, v, u, v_fluid, u_fluid)
+    transfer_particle!(system, fluid_system, particle, v, u, v_fluid, u_fluid)
 
     # Deactivate particle in interior domain
     deactivate_particle!(fluid_system, particle, u_fluid)
@@ -424,9 +375,9 @@ end
     return fluid_system
 end
 
-@inline function transfer_particle!(system_new, system_old, particle_old, particle_new,
+@inline function transfer_particle!(system_new, system_old, particle_old,
                                     v_new, u_new, v_old, u_old)
-    system_new.buffer.active_particle[particle_new] = true
+    particle_new = activate_next_particle(system_new)
 
     # Transfer densities
     density = current_density(v_old, system_old, particle_old)

From d0a70af816063f8b4788a552e1c8f18c3febc798 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Tue, 13 May 2025 23:03:16 +0200
Subject: [PATCH 12/33] fix tests

---
 examples/fluid/pipe_flow_2d.jl       |  8 +++++---
 examples/fluid/pipe_flow_3d.jl       |  3 +--
 test/general/buffer.jl               | 10 +++++++---
 test/general/semidiscretization.jl   |  3 ++-
 test/systems/open_boundary_system.jl |  1 +
 5 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/examples/fluid/pipe_flow_2d.jl b/examples/fluid/pipe_flow_2d.jl
index 218ce023ac..74ab74ef8a 100644
--- a/examples/fluid/pipe_flow_2d.jl
+++ b/examples/fluid/pipe_flow_2d.jl
@@ -54,7 +54,9 @@ n_buffer_particles = 4 * pipe.n_particles_per_dimension[2]^(ndims(pipe.fluid) -
 wcsph = false
 
 smoothing_length = 1.5 * particle_spacing
-smoothing_kernel = WendlandC2Kernel{2}()
+
+NDIMS = ndims(pipe.fluid)
+smoothing_kernel = WendlandC2Kernel{NDIMS}()
 
 fluid_density_calculator = ContinuityDensity()
 
@@ -139,8 +141,8 @@ boundary_system = BoundarySPHSystem(pipe.boundary, boundary_model)
 min_corner = minimum(pipe.boundary.coordinates .- particle_spacing, dims=2)
 max_corner = maximum(pipe.boundary.coordinates .+ particle_spacing, dims=2)
 
-nhs = GridNeighborhoodSearch{2}(; cell_list=FullGridCellList(; min_corner, max_corner),
-                                update_strategy=ParallelUpdate())
+nhs = GridNeighborhoodSearch{NDIMS}(; cell_list=FullGridCellList(; min_corner, max_corner),
+                                    update_strategy=ParallelUpdate())
 
 semi = Semidiscretization(fluid_system, open_boundary_in, open_boundary_out,
                           boundary_system, neighborhood_search=nhs,
diff --git a/examples/fluid/pipe_flow_3d.jl b/examples/fluid/pipe_flow_3d.jl
index fb2cc7f0aa..93b8ed5a30 100644
--- a/examples/fluid/pipe_flow_3d.jl
+++ b/examples/fluid/pipe_flow_3d.jl
@@ -37,8 +37,7 @@ flow_direction = [1.0, 0.0, 0.0]
 trixi_include(@__MODULE__, joinpath(examples_dir(), "fluid", "pipe_flow_2d.jl"),
               domain_size=domain_size, boundary_size=boundary_size,
               flow_direction=flow_direction, faces=(false, false, true, true, true, true),
-              tspan=tspan, smoothing_kernel=WendlandC2Kernel{3}(),
-              reference_velocity=velocity_function3d,
+              tspan=tspan, reference_velocity=velocity_function3d,
               plane_in=([0.0, 0.0, 0.0], [0.0, domain_size[2], 0.0],
                         [0.0, 0.0, domain_size[3]]),
               plane_out=([domain_size[1], 0.0, 0.0], [domain_size[1], domain_size[2], 0.0],
diff --git a/test/general/buffer.jl b/test/general/buffer.jl
index 0a5fae26a8..bf02bebc1c 100644
--- a/test/general/buffer.jl
+++ b/test/general/buffer.jl
@@ -1,6 +1,7 @@
 @testset verbose=true "`SystemBuffer`" begin
     # Mock fluid system
     struct FluidSystemMock3 <: TrixiParticles.FluidSystem{2} end
+    TrixiParticles.initial_smoothing_length(system::FluidSystemMock3) = 1.0
 
     zone = BoundaryZone(; plane=([0.0, 0.0], [0.0, 1.0]), particle_spacing=0.2,
                         open_boundary_layers=2, density=1.0, plane_normal=[1.0, 0.0],
@@ -24,14 +25,16 @@
 
         particle_id = TrixiParticles.activate_next_particle(system_buffer)
 
-        TrixiParticles.update_system_buffer!(system_buffer.buffer)
+        TrixiParticles.update_system_buffer!(system_buffer.buffer,
+                                             DummySemidiscretization())
 
         @test TrixiParticles.each_moving_particle(system_buffer) == 1:(n_particles + 1)
 
         TrixiParticles.deactivate_particle!(system_buffer, particle_id,
                                             ones(2, particle_id))
 
-        TrixiParticles.update_system_buffer!(system_buffer.buffer)
+        TrixiParticles.update_system_buffer!(system_buffer.buffer,
+                                             DummySemidiscretization())
 
         @test TrixiParticles.each_moving_particle(system_buffer) == 1:n_particles
 
@@ -39,7 +42,8 @@
         TrixiParticles.deactivate_particle!(system_buffer, particle_id,
                                             ones(2, particle_id))
 
-        TrixiParticles.update_system_buffer!(system_buffer.buffer)
+        TrixiParticles.update_system_buffer!(system_buffer.buffer,
+                                             DummySemidiscretization())
 
         @test TrixiParticles.each_moving_particle(system_buffer) ==
               setdiff(1:n_particles, particle_id)
diff --git a/test/general/semidiscretization.jl b/test/general/semidiscretization.jl
index aaae518877..dea8c356ab 100644
--- a/test/general/semidiscretization.jl
+++ b/test/general/semidiscretization.jl
@@ -72,7 +72,8 @@
             solid_system2 = TotalLagrangianSPHSystem(ic, kernel, 1.0, 1.0, 1.0,
                                                      boundary_model=model_a)
 
-            @test_nowarn TrixiParticles.check_configuration((solid_system2, fluid_system))
+            @test_nowarn TrixiParticles.check_configuration((solid_system2, fluid_system),
+                                                            nothing)
 
             # FSI with wrong boundary model
             solid_system3 = TotalLagrangianSPHSystem(ic, kernel, 1.0, 1.0, 1.0,
diff --git a/test/systems/open_boundary_system.jl b/test/systems/open_boundary_system.jl
index 21e29efd9b..c728432a1c 100644
--- a/test/systems/open_boundary_system.jl
+++ b/test/systems/open_boundary_system.jl
@@ -5,6 +5,7 @@
 
         # Mock fluid system
         struct FluidSystemMock2 <: TrixiParticles.FluidSystem{2} end
+        TrixiParticles.initial_smoothing_length(system::FluidSystemMock2) = 1.0
 
         inflow = BoundaryZone(; plane, particle_spacing=0.1,
                               plane_normal=flow_direction, density=1.0,

From 562f1e5357d909699a8e8c00843f856112e26076 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Wed, 14 May 2025 15:04:07 +0200
Subject: [PATCH 13/33] fix

---
 examples/fluid/pipe_flow_2d.jl                          | 7 ++++---
 examples/fluid/pipe_flow_3d.jl                          | 2 ++
 src/schemes/boundary/dummy_particles/dummy_particles.jl | 8 ++++----
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/examples/fluid/pipe_flow_2d.jl b/examples/fluid/pipe_flow_2d.jl
index 74ab74ef8a..8380555d56 100644
--- a/examples/fluid/pipe_flow_2d.jl
+++ b/examples/fluid/pipe_flow_2d.jl
@@ -47,7 +47,9 @@ pipe = RectangularTank(particle_spacing, domain_size, boundary_size, fluid_densi
 # Shift pipe walls in negative x-direction for the inflow
 pipe.boundary.coordinates[1, :] .-= particle_spacing * open_boundary_layers
 
-n_buffer_particles = 4 * pipe.n_particles_per_dimension[2]^(ndims(pipe.fluid) - 1)
+NDIMS = ndims(pipe.fluid)
+
+n_buffer_particles = 4 * pipe.n_particles_per_dimension[2]^(NDIMS - 1)
 
 # ==========================================================================================
 # ==== Fluid
@@ -55,7 +57,6 @@ wcsph = false
 
 smoothing_length = 1.5 * particle_spacing
 
-NDIMS = ndims(pipe.fluid)
 smoothing_kernel = WendlandC2Kernel{NDIMS}()
 
 fluid_density_calculator = ContinuityDensity()
@@ -129,7 +130,7 @@ open_boundary_out = OpenBoundarySPHSystem(outflow; fluid_system,
 # ==== Boundary
 viscosity_boundary = ViscosityAdami(nu=1e-4)
 boundary_model = BoundaryModelDummyParticles(pipe.boundary.density, pipe.boundary.mass,
-                                             AdamiPressureExtrapolation(),
+                                              AdamiPressureExtrapolation(),
                                              state_equation=state_equation,
                                              viscosity=viscosity_boundary,
                                              smoothing_kernel, smoothing_length)
diff --git a/examples/fluid/pipe_flow_3d.jl b/examples/fluid/pipe_flow_3d.jl
index 93b8ed5a30..a898acb97b 100644
--- a/examples/fluid/pipe_flow_3d.jl
+++ b/examples/fluid/pipe_flow_3d.jl
@@ -38,6 +38,8 @@ trixi_include(@__MODULE__, joinpath(examples_dir(), "fluid", "pipe_flow_2d.jl"),
               domain_size=domain_size, boundary_size=boundary_size,
               flow_direction=flow_direction, faces=(false, false, true, true, true, true),
               tspan=tspan, reference_velocity=velocity_function3d,
+              min_corner=.-(domain_size ./ 2) .- open_boundary_layers * particle_spacing,
+              max_corner=domain_size .+ open_boundary_layers * particle_spacing,
               plane_in=([0.0, 0.0, 0.0], [0.0, domain_size[2], 0.0],
                         [0.0, 0.0, domain_size[3]]),
               plane_out=([domain_size[1], 0.0, 0.0], [domain_size[1], domain_size[2], 0.0],
diff --git a/src/schemes/boundary/dummy_particles/dummy_particles.jl b/src/schemes/boundary/dummy_particles/dummy_particles.jl
index d1ca06d542..7e8a0c9ffc 100644
--- a/src/schemes/boundary/dummy_particles/dummy_particles.jl
+++ b/src/schemes/boundary/dummy_particles/dummy_particles.jl
@@ -441,8 +441,7 @@ function compute_pressure!(boundary_model,
         end
     end
 
-    @trixi_timeit timer() "inverse state equation" @threaded semi for particle in
-                                                                      eachparticle(system)
+    @trixi_timeit timer() "inverse state equation" @threaded semi for particle in eachparticle(system)
         compute_adami_density!(boundary_model, system, system_coords, particle)
     end
 end
@@ -490,6 +489,7 @@ end
 
     # This needs to be serial to avoid race conditions when writing into `system`
     foreach_point_neighbor(neighbor_system, system, neighbor_coords, system_coords, semi;
+                           points=each_moving_particle(neighbor_system),
                            parallelization_backend=SerialBackend()) do neighbor, particle,
                                                                        pos_diff, distance
         # Since neighbor and particle are switched
@@ -620,8 +620,8 @@ end
     for dim in 1:ndims(system)
         # The second term is the precalculated smoothed velocity field of the fluid.
         wall_velocity[dim,
-                      particle] = 2 * v_boundary[dim] -
-                                  wall_velocity[dim, particle] / volume[particle]
+        particle] = 2 * v_boundary[dim] -
+                    wall_velocity[dim, particle] / volume[particle]
     end
     return viscosity
 end

From 53b2f698cba8cd2fde85b2145c619be194c98bfd Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Wed, 14 May 2025 15:14:31 +0200
Subject: [PATCH 14/33] formatting

---
 examples/fluid/pipe_flow_2d.jl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/examples/fluid/pipe_flow_2d.jl b/examples/fluid/pipe_flow_2d.jl
index 8380555d56..3a65f4b81c 100644
--- a/examples/fluid/pipe_flow_2d.jl
+++ b/examples/fluid/pipe_flow_2d.jl
@@ -56,7 +56,6 @@ n_buffer_particles = 4 * pipe.n_particles_per_dimension[2]^(NDIMS - 1)
 wcsph = false
 
 smoothing_length = 1.5 * particle_spacing
-
 smoothing_kernel = WendlandC2Kernel{NDIMS}()
 
 fluid_density_calculator = ContinuityDensity()
@@ -130,7 +129,7 @@ open_boundary_out = OpenBoundarySPHSystem(outflow; fluid_system,
 # ==== Boundary
 viscosity_boundary = ViscosityAdami(nu=1e-4)
 boundary_model = BoundaryModelDummyParticles(pipe.boundary.density, pipe.boundary.mass,
-                                              AdamiPressureExtrapolation(),
+                                             AdamiPressureExtrapolation(),
                                              state_equation=state_equation,
                                              viscosity=viscosity_boundary,
                                              smoothing_kernel, smoothing_length)

From e50f6b80469faf272d48a6d62db38d3865825e08 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Wed, 14 May 2025 15:15:55 +0200
Subject: [PATCH 15/33] formatting again

---
 src/schemes/boundary/dummy_particles/dummy_particles.jl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/schemes/boundary/dummy_particles/dummy_particles.jl b/src/schemes/boundary/dummy_particles/dummy_particles.jl
index 7e8a0c9ffc..4dc4d7f272 100644
--- a/src/schemes/boundary/dummy_particles/dummy_particles.jl
+++ b/src/schemes/boundary/dummy_particles/dummy_particles.jl
@@ -441,7 +441,8 @@ function compute_pressure!(boundary_model,
         end
     end
 
-    @trixi_timeit timer() "inverse state equation" @threaded semi for particle in eachparticle(system)
+    @trixi_timeit timer() "inverse state equation" @threaded semi for particle in
+                                                                      eachparticle(system)
         compute_adami_density!(boundary_model, system, system_coords, particle)
     end
 end
@@ -620,8 +621,8 @@ end
     for dim in 1:ndims(system)
         # The second term is the precalculated smoothed velocity field of the fluid.
         wall_velocity[dim,
-        particle] = 2 * v_boundary[dim] -
-                    wall_velocity[dim, particle] / volume[particle]
+                      particle] = 2 * v_boundary[dim] -
+                                  wall_velocity[dim, particle] / volume[particle]
     end
     return viscosity
 end

From 4b1a442cd1cd473fb3181cfaa127e4305d111963 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Thu, 15 May 2025 23:01:28 +0200
Subject: [PATCH 16/33] fix slow buffer update

---
 src/general/buffer.jl | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/src/general/buffer.jl b/src/general/buffer.jl
index 64168cfb34..cb49d7b7b4 100644
--- a/src/general/buffer.jl
+++ b/src/general/buffer.jl
@@ -47,20 +47,8 @@ end
     (; active_particle) = buffer
 
     buffer.active_particle_count[] = sum(active_particle)
-    buffer.eachparticle .= -1
-
-    @threaded semi for i in 1:buffer.active_particle_count[]
-        active = 0
-        for j in eachindex(active_particle)
-            if active_particle[j] == true
-                active += 1
-                if active == i
-                    buffer.eachparticle[i] = j
-                    break
-                end
-            end
-        end
-    end
+    buffer.eachparticle[1:buffer.active_particle_count[]] .= findall(x -> x == true,
+                                                                     active_particle)
 
     return buffer
 end

From edd1a91d3d0af4b05bdb793ace918cb71de14347 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Sat, 17 May 2025 18:01:56 +0200
Subject: [PATCH 17/33] fix mirroring

---
 .../boundary/open_boundary/mirroring.jl       | 47 ++++++++++++-------
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/src/schemes/boundary/open_boundary/mirroring.jl b/src/schemes/boundary/open_boundary/mirroring.jl
index f8474c875d..970d1ce6ed 100644
--- a/src/schemes/boundary/open_boundary/mirroring.jl
+++ b/src/schemes/boundary/open_boundary/mirroring.jl
@@ -12,10 +12,12 @@ struct BoundaryModelTafuni end
 function update_boundary_quantities!(system, ::BoundaryModelTafuni, v, u, v_ode, u_ode,
                                      semi, t)
     @trixi_timeit timer() "extrapolate and correct values" begin
+        fluid_system = corresponding_fluid_system(system, semi)
+
         v_open_boundary = wrap_v(v_ode, system, semi)
-        v_fluid = wrap_v(v_ode, system.fluid_system, semi)
+        v_fluid = wrap_v(v_ode, fluid_system, semi)
         u_open_boundary = wrap_u(u_ode, system, semi)
-        u_fluid = wrap_u(u_ode, system.fluid_system, semi)
+        u_fluid = wrap_u(u_ode, fluid_system, semi)
 
         extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary, u_fluid,
                             semi, t; system.cache...)
@@ -30,7 +32,9 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
     (; pressure, density, fluid_system, boundary_zone, reference_density,
      reference_velocity, reference_pressure) = system
 
-    state_equation = system_state_equation(system.fluid_system)
+    fluid_system = corresponding_fluid_system(system, semi)
+
+    state_equation = system_state_equation(fluid_system)
 
     # Static indices to avoid allocations
     two_to_end = SVector{ndims(system)}(2:(ndims(system) + 1))
@@ -47,6 +51,9 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
         # Set zero
         correction_matrix = zero(SMatrix{ndims(system) + 1, ndims(system) + 1,
                                          eltype(system)})
+
+        extrapolated_density_correction = zero(SVector{ndims(system) + 1, eltype(system)})
+
         extrapolated_pressure_correction = zero(SVector{ndims(system) + 1, eltype(system)})
 
         extrapolated_velocity_correction = zero(SMatrix{ndims(system), ndims(system) + 1,
@@ -75,13 +82,17 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
 
             correction_matrix += L
 
-            if !prescribed_pressure
+            if !prescribed_pressure && fluid_system isa EntropicallyDampedSPHSystem
                 extrapolated_pressure_correction += pressure_b * R
             end
 
             if !prescribed_velocity
                 extrapolated_velocity_correction += v_b * R'
             end
+
+            if !prescribed_density
+                extrapolated_density_correction += rho_b * R
+            end
         end
 
         # See also `correction_matrix_inversion_step!` for an explanation
@@ -100,16 +111,6 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
         #
         # in order to get zero gradient at the outlet interface.
         # Note: This modification is mentioned here for reference only and is NOT applied in this implementation.
-        if prescribed_pressure
-            pressure[particle] = reference_value(reference_pressure, pressure[particle],
-                                                 particle_coords, t)
-        else
-            f_p = L_inv * extrapolated_pressure_correction
-            df_p = f_p[two_to_end]
-
-            pressure[particle] = f_p[1] + dot(pos_diff, df_p)
-        end
-
         if prescribed_velocity
             v_particle = current_velocity(v_open_boundary, system, particle)
             v_ref = reference_value(reference_velocity, v_particle, particle_coords, t)
@@ -125,12 +126,26 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
             end
         end
 
-        # Unlike Tafuni et al. (2018), we calculate the density using the inverse state-equation
         if prescribed_density
             density[particle] = reference_value(reference_density, density[particle],
                                                 particle_coords, t)
         else
-            inverse_state_equation!(density, state_equation, pressure, particle)
+            f_d = L_inv * extrapolated_density_correction
+            df_d = f_d[two_to_end]
+
+            density[particle] = f_d[1] + dot(pos_diff, df_d)
+        end
+
+        if prescribed_pressure
+            pressure[particle] = reference_value(reference_pressure, pressure[particle],
+                                                 particle_coords, t)
+        elseif fluid_system isa WeaklyCompressibleSPHSystem
+            pressure[particle] = state_equation(density[particle])
+        else
+            f_d = L_inv * extrapolated_pressure_correction
+            df_d = f_d[two_to_end]
+
+            pressure[particle] = f_d[1] + dot(pos_diff, df_d)
         end
     end
 

From e3401a3d81492c8af7fda48e599b883dd2cbbbfa Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Sat, 17 May 2025 18:39:55 +0200
Subject: [PATCH 18/33] fix mirroring again

---
 src/schemes/boundary/open_boundary/mirroring.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/schemes/boundary/open_boundary/mirroring.jl b/src/schemes/boundary/open_boundary/mirroring.jl
index 970d1ce6ed..a44adebf0c 100644
--- a/src/schemes/boundary/open_boundary/mirroring.jl
+++ b/src/schemes/boundary/open_boundary/mirroring.jl
@@ -29,7 +29,7 @@ update_final!(system, ::BoundaryModelTafuni, v, u, v_ode, u_ode, semi, t) = syst
 function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary, u_fluid,
                              semi, t; prescribed_density=false,
                              prescribed_pressure=false, prescribed_velocity=false)
-    (; pressure, density, fluid_system, boundary_zone, reference_density,
+    (; pressure, density, boundary_zone, reference_density,
      reference_velocity, reference_pressure) = system
 
     fluid_system = corresponding_fluid_system(system, semi)

From e2059a64092beb0b3eddf76fcd99541b4f84437b Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Sat, 17 May 2025 18:40:06 +0200
Subject: [PATCH 19/33] add important TODOs

---
 src/general/buffer.jl                           | 1 +
 src/schemes/boundary/open_boundary/mirroring.jl | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/src/general/buffer.jl b/src/general/buffer.jl
index cb49d7b7b4..c1e12d106a 100644
--- a/src/general/buffer.jl
+++ b/src/general/buffer.jl
@@ -46,6 +46,7 @@ end
 @inline function update_system_buffer!(buffer::SystemBuffer, semi)
     (; active_particle) = buffer
 
+    # TODO: Parallelize (see https://github.com/trixi-framework/TrixiParticles.jl/issues/810)
     buffer.active_particle_count[] = sum(active_particle)
     buffer.eachparticle[1:buffer.active_particle_count[]] .= findall(x -> x == true,
                                                                      active_particle)
diff --git a/src/schemes/boundary/open_boundary/mirroring.jl b/src/schemes/boundary/open_boundary/mirroring.jl
index a44adebf0c..9fcc447269 100644
--- a/src/schemes/boundary/open_boundary/mirroring.jl
+++ b/src/schemes/boundary/open_boundary/mirroring.jl
@@ -80,6 +80,7 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
 
             L, R = correction_arrays(kernel_value, grad_kernel, pos_diff, rho_b, m_b)
 
+            # TODO: On GPU "unsupported dynamic function invocation (call to +)"
             correction_matrix += L
 
             if !prescribed_pressure && fluid_system isa EntropicallyDampedSPHSystem
@@ -96,6 +97,7 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
         end
 
         # See also `correction_matrix_inversion_step!` for an explanation
+        # TODO: On GPU "unsupported dynamic function invocation (call to det)" (also abs and <)
         if abs(det(correction_matrix)) < 1.0f-9
             L_inv = I
         else
@@ -130,7 +132,9 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
             density[particle] = reference_value(reference_density, density[particle],
                                                 particle_coords, t)
         else
+            # TODO: On GPU "unsupported dynamic function invocation (call to *)"
             f_d = L_inv * extrapolated_density_correction
+            # TODO: On GPU "unsupported dynamic function invocation (call to getindex)"
             df_d = f_d[two_to_end]
 
             density[particle] = f_d[1] + dot(pos_diff, df_d)
@@ -145,6 +149,7 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
             f_d = L_inv * extrapolated_pressure_correction
             df_d = f_d[two_to_end]
 
+            # TODO: On GPU "unsupported dynamic function invocation (call to dot)"
             pressure[particle] = f_d[1] + dot(pos_diff, df_d)
         end
     end

From 30dfec0d78e0b81a5b88e981cfd03d35103a4cfd Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Tue, 20 May 2025 14:06:57 +0200
Subject: [PATCH 20/33] pinv instead of I

---
 examples/fluid/pipe_flow_2d.jl                  | 17 +++++++++--------
 src/schemes/boundary/open_boundary/mirroring.jl |  2 +-
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/examples/fluid/pipe_flow_2d.jl b/examples/fluid/pipe_flow_2d.jl
index 3a65f4b81c..a80af242ce 100644
--- a/examples/fluid/pipe_flow_2d.jl
+++ b/examples/fluid/pipe_flow_2d.jl
@@ -53,7 +53,7 @@ n_buffer_particles = 4 * pipe.n_particles_per_dimension[2]^(NDIMS - 1)
 
 # ==========================================================================================
 # ==== Fluid
-wcsph = false
+wcsph = true
 
 smoothing_length = 1.5 * particle_spacing
 smoothing_kernel = WendlandC2Kernel{NDIMS}()
@@ -92,7 +92,7 @@ function velocity_function2d(pos, t)
     return SVector(prescribed_velocity, 0.0)
 end
 
-open_boundary_model = BoundaryModelLastiwka()
+open_boundary_model = BoundaryModelTafuni()
 
 boundary_type_in = InFlow()
 plane_in = ([0.0, 0.0], [0.0, domain_size[2]])
@@ -101,8 +101,8 @@ inflow = BoundaryZone(; plane=plane_in, plane_normal=flow_direction, open_bounda
                       boundary_type=boundary_type_in)
 
 reference_velocity_in = velocity_function2d
-reference_pressure_in = pressure
-reference_density_in = fluid_density
+reference_pressure_in = nothing
+reference_density_in = nothing
 open_boundary_in = OpenBoundarySPHSystem(inflow; fluid_system,
                                          boundary_model=open_boundary_model,
                                          buffer_size=n_buffer_particles,
@@ -116,9 +116,9 @@ outflow = BoundaryZone(; plane=plane_out, plane_normal=(-flow_direction),
                        open_boundary_layers, density=fluid_density, particle_spacing,
                        boundary_type=boundary_type_out)
 
-reference_velocity_out = velocity_function2d
-reference_pressure_out = pressure
-reference_density_out = fluid_density
+reference_velocity_out = nothing
+reference_pressure_out = nothing
+reference_density_out = nothing
 open_boundary_out = OpenBoundarySPHSystem(outflow; fluid_system,
                                           boundary_model=open_boundary_model,
                                           buffer_size=n_buffer_particles,
@@ -155,7 +155,8 @@ saving_callback = SolutionSavingCallback(dt=0.02, prefix="")
 
 extra_callback = nothing
 
-callbacks = CallbackSet(info_callback, saving_callback, UpdateCallback(), extra_callback)
+callbacks = CallbackSet(info_callback, saving_callback, UpdateCallback(),
+                        ParticleShiftingCallback(), extra_callback)
 
 sol = solve(ode, RDPK3SpFSAL35(),
             abstol=1e-5, # Default abstol is 1e-6 (may need to be tuned to prevent boundary penetration)
diff --git a/src/schemes/boundary/open_boundary/mirroring.jl b/src/schemes/boundary/open_boundary/mirroring.jl
index 9fcc447269..29a823e733 100644
--- a/src/schemes/boundary/open_boundary/mirroring.jl
+++ b/src/schemes/boundary/open_boundary/mirroring.jl
@@ -99,7 +99,7 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
         # See also `correction_matrix_inversion_step!` for an explanation
         # TODO: On GPU "unsupported dynamic function invocation (call to det)" (also abs and <)
         if abs(det(correction_matrix)) < 1.0f-9
-            L_inv = I
+            L_inv = pinv(correction_matrix)
         else
             L_inv = inv(correction_matrix)
         end

From a9af3682990c33081c7d5c2668fd3e8e52813fd1 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Tue, 20 May 2025 19:27:03 +0200
Subject: [PATCH 21/33] use Ref() for the closure

---
 .../boundary/open_boundary/mirroring.jl       | 35 ++++++++++---------
 src/schemes/boundary/open_boundary/system.jl  |  8 -----
 2 files changed, 19 insertions(+), 24 deletions(-)

diff --git a/src/schemes/boundary/open_boundary/mirroring.jl b/src/schemes/boundary/open_boundary/mirroring.jl
index 29a823e733..d1dac1b126 100644
--- a/src/schemes/boundary/open_boundary/mirroring.jl
+++ b/src/schemes/boundary/open_boundary/mirroring.jl
@@ -49,15 +49,18 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
         ghost_node_position = mirror_position(particle_coords, boundary_zone)
 
         # Set zero
-        correction_matrix = zero(SMatrix{ndims(system) + 1, ndims(system) + 1,
-                                         eltype(system)})
+        correction_matrix = Ref(zero(SMatrix{ndims(system) + 1, ndims(system) + 1,
+                                             eltype(system)}))
 
-        extrapolated_density_correction = zero(SVector{ndims(system) + 1, eltype(system)})
+        extrapolated_density_correction = Ref(zero(SVector{ndims(system) + 1,
+                                                           eltype(system)}))
 
-        extrapolated_pressure_correction = zero(SVector{ndims(system) + 1, eltype(system)})
+        extrapolated_pressure_correction = Ref(zero(SVector{ndims(system) + 1,
+                                                            eltype(system)}))
 
-        extrapolated_velocity_correction = zero(SMatrix{ndims(system), ndims(system) + 1,
-                                                        eltype(system)})
+        extrapolated_velocity_correction = Ref(zero(SMatrix{ndims(system),
+                                                            ndims(system) + 1,
+                                                            eltype(system)}))
 
         # TODO: Not public API
         PointNeighbors.foreach_neighbor(fluid_coords, nhs, particle, ghost_node_position,
@@ -81,27 +84,27 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
             L, R = correction_arrays(kernel_value, grad_kernel, pos_diff, rho_b, m_b)
 
             # TODO: On GPU "unsupported dynamic function invocation (call to +)"
-            correction_matrix += L
+            correction_matrix[] += L
 
             if !prescribed_pressure && fluid_system isa EntropicallyDampedSPHSystem
-                extrapolated_pressure_correction += pressure_b * R
+                extrapolated_pressure_correction[] += pressure_b * R
             end
 
             if !prescribed_velocity
-                extrapolated_velocity_correction += v_b * R'
+                extrapolated_velocity_correction[] += v_b * R'
             end
 
             if !prescribed_density
-                extrapolated_density_correction += rho_b * R
+                extrapolated_density_correction[] += rho_b * R
             end
         end
 
         # See also `correction_matrix_inversion_step!` for an explanation
         # TODO: On GPU "unsupported dynamic function invocation (call to det)" (also abs and <)
-        if abs(det(correction_matrix)) < 1.0f-9
-            L_inv = pinv(correction_matrix)
+        if abs(det(correction_matrix[])) < 1.0f-9
+            L_inv = typeof(correction_matrix[])(I)
         else
-            L_inv = inv(correction_matrix)
+            L_inv = inv(correction_matrix[])
         end
 
         pos_diff = particle_coords - ghost_node_position
@@ -121,7 +124,7 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
             end
         else
             @inbounds for dim in eachindex(pos_diff)
-                f_v = L_inv * extrapolated_velocity_correction[dim, :]
+                f_v = L_inv * extrapolated_velocity_correction[][dim, :]
                 df_v = f_v[two_to_end]
 
                 v_open_boundary[dim, particle] = f_v[1] + dot(pos_diff, df_v)
@@ -133,7 +136,7 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
                                                 particle_coords, t)
         else
             # TODO: On GPU "unsupported dynamic function invocation (call to *)"
-            f_d = L_inv * extrapolated_density_correction
+            f_d = L_inv * extrapolated_density_correction[]
             # TODO: On GPU "unsupported dynamic function invocation (call to getindex)"
             df_d = f_d[two_to_end]
 
@@ -146,7 +149,7 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
         elseif fluid_system isa WeaklyCompressibleSPHSystem
             pressure[particle] = state_equation(density[particle])
         else
-            f_d = L_inv * extrapolated_pressure_correction
+            f_d = L_inv * extrapolated_pressure_correction[]
             df_d = f_d[two_to_end]
 
             # TODO: On GPU "unsupported dynamic function invocation (call to dot)"
diff --git a/src/schemes/boundary/open_boundary/system.jl b/src/schemes/boundary/open_boundary/system.jl
index 70257d3a6e..4691e72e8f 100644
--- a/src/schemes/boundary/open_boundary/system.jl
+++ b/src/schemes/boundary/open_boundary/system.jl
@@ -471,14 +471,6 @@ end
 # When the neighbor is an open boundary system, just use the viscosity of the fluid `system` instead
 @inline viscosity_model(system, neighbor_system::OpenBoundarySPHSystem) = system.viscosity
 
-# This type of viscosity depends on the system, so we need to use the fluid system
-function kinematic_viscosity(system::OpenBoundarySPHSystem,
-                             viscosity::ArtificialViscosityMonaghan, smoothing_length,
-                             sound_speed)
-    return kinematic_viscosity(system.fluid_system, viscosity, smoothing_length,
-                               sound_speed)
-end
-
 function system_data(system::OpenBoundarySPHSystem, v_ode, u_ode, semi)
     v = wrap_v(v_ode, system, semi)
     u = wrap_u(u_ode, system, semi)

From bc869bd7dfa79462b129da3c5316d0562c54eecb Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Tue, 20 May 2025 20:09:30 +0200
Subject: [PATCH 22/33] fix tests

---
 examples/fluid/pipe_flow_2d.jl                | 17 ++++++++--------
 .../boundary/open_boundary/mirroring.jl       |  7 ++-----
 .../boundary/open_boundary/mirroring.jl       | 20 ++++++++-----------
 3 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/examples/fluid/pipe_flow_2d.jl b/examples/fluid/pipe_flow_2d.jl
index a80af242ce..3a65f4b81c 100644
--- a/examples/fluid/pipe_flow_2d.jl
+++ b/examples/fluid/pipe_flow_2d.jl
@@ -53,7 +53,7 @@ n_buffer_particles = 4 * pipe.n_particles_per_dimension[2]^(NDIMS - 1)
 
 # ==========================================================================================
 # ==== Fluid
-wcsph = true
+wcsph = false
 
 smoothing_length = 1.5 * particle_spacing
 smoothing_kernel = WendlandC2Kernel{NDIMS}()
@@ -92,7 +92,7 @@ function velocity_function2d(pos, t)
     return SVector(prescribed_velocity, 0.0)
 end
 
-open_boundary_model = BoundaryModelTafuni()
+open_boundary_model = BoundaryModelLastiwka()
 
 boundary_type_in = InFlow()
 plane_in = ([0.0, 0.0], [0.0, domain_size[2]])
@@ -101,8 +101,8 @@ inflow = BoundaryZone(; plane=plane_in, plane_normal=flow_direction, open_bounda
                       boundary_type=boundary_type_in)
 
 reference_velocity_in = velocity_function2d
-reference_pressure_in = nothing
-reference_density_in = nothing
+reference_pressure_in = pressure
+reference_density_in = fluid_density
 open_boundary_in = OpenBoundarySPHSystem(inflow; fluid_system,
                                          boundary_model=open_boundary_model,
                                          buffer_size=n_buffer_particles,
@@ -116,9 +116,9 @@ outflow = BoundaryZone(; plane=plane_out, plane_normal=(-flow_direction),
                        open_boundary_layers, density=fluid_density, particle_spacing,
                        boundary_type=boundary_type_out)
 
-reference_velocity_out = nothing
-reference_pressure_out = nothing
-reference_density_out = nothing
+reference_velocity_out = velocity_function2d
+reference_pressure_out = pressure
+reference_density_out = fluid_density
 open_boundary_out = OpenBoundarySPHSystem(outflow; fluid_system,
                                           boundary_model=open_boundary_model,
                                           buffer_size=n_buffer_particles,
@@ -155,8 +155,7 @@ saving_callback = SolutionSavingCallback(dt=0.02, prefix="")
 
 extra_callback = nothing
 
-callbacks = CallbackSet(info_callback, saving_callback, UpdateCallback(),
-                        ParticleShiftingCallback(), extra_callback)
+callbacks = CallbackSet(info_callback, saving_callback, UpdateCallback(), extra_callback)
 
 sol = solve(ode, RDPK3SpFSAL35(),
             abstol=1e-5, # Default abstol is 1e-6 (may need to be tuned to prevent boundary penetration)
diff --git a/src/schemes/boundary/open_boundary/mirroring.jl b/src/schemes/boundary/open_boundary/mirroring.jl
index d1dac1b126..0b5b371e50 100644
--- a/src/schemes/boundary/open_boundary/mirroring.jl
+++ b/src/schemes/boundary/open_boundary/mirroring.jl
@@ -49,6 +49,8 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
         ghost_node_position = mirror_position(particle_coords, boundary_zone)
 
         # Set zero
+        # Use `Ref` to ensure the variables are accessible and mutable within the closure
+        # (see https://docs.julialang.org/en/v1/manual/performance-tips/#man-performance-captured)
         correction_matrix = Ref(zero(SMatrix{ndims(system) + 1, ndims(system) + 1,
                                              eltype(system)}))
 
@@ -83,7 +85,6 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
 
             L, R = correction_arrays(kernel_value, grad_kernel, pos_diff, rho_b, m_b)
 
-            # TODO: On GPU "unsupported dynamic function invocation (call to +)"
             correction_matrix[] += L
 
             if !prescribed_pressure && fluid_system isa EntropicallyDampedSPHSystem
@@ -100,7 +101,6 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
         end
 
         # See also `correction_matrix_inversion_step!` for an explanation
-        # TODO: On GPU "unsupported dynamic function invocation (call to det)" (also abs and <)
         if abs(det(correction_matrix[])) < 1.0f-9
             L_inv = typeof(correction_matrix[])(I)
         else
@@ -135,9 +135,7 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
             density[particle] = reference_value(reference_density, density[particle],
                                                 particle_coords, t)
         else
-            # TODO: On GPU "unsupported dynamic function invocation (call to *)"
             f_d = L_inv * extrapolated_density_correction[]
-            # TODO: On GPU "unsupported dynamic function invocation (call to getindex)"
             df_d = f_d[two_to_end]
 
             density[particle] = f_d[1] + dot(pos_diff, df_d)
@@ -152,7 +150,6 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
             f_d = L_inv * extrapolated_pressure_correction[]
             df_d = f_d[two_to_end]
 
-            # TODO: On GPU "unsupported dynamic function invocation (call to dot)"
             pressure[particle] = f_d[1] + dot(pos_diff, df_d)
         end
     end
diff --git a/test/schemes/boundary/open_boundary/mirroring.jl b/test/schemes/boundary/open_boundary/mirroring.jl
index 729fa18502..0d996aeb03 100644
--- a/test/schemes/boundary/open_boundary/mirroring.jl
+++ b/test/schemes/boundary/open_boundary/mirroring.jl
@@ -49,12 +49,10 @@
 
         smoothing_length = 1.5 * particle_spacing
         smoothing_kernel = WendlandC2Kernel{ndims(domain_fluid)}()
-        fluid_system = WeaklyCompressibleSPHSystem(domain_fluid, SummationDensity(),
-                                                   nothing, smoothing_kernel,
-                                                   smoothing_length)
+        fluid_system = EntropicallyDampedSPHSystem(domain_fluid,smoothing_kernel,
+                                                   smoothing_length, 1.0)
 
         fluid_system.cache.density .= domain_fluid.density
-        fluid_system.pressure .= domain_fluid.pressure
 
         @testset verbose=true "plane normal $i" for i in eachindex(files)
             inflow = BoundaryZone(; plane=plane_boundary[i],
@@ -69,11 +67,11 @@
             TrixiParticles.initialize_neighborhood_searches!(semi)
 
             v_open_boundary = zero(inflow.initial_condition.velocity)
+            v_fluid = vcat(domain_fluid.velocity, domain_fluid.pressure')
 
             TrixiParticles.set_zero!(open_boundary.pressure)
 
-            TrixiParticles.extrapolate_values!(open_boundary, v_open_boundary,
-                                               domain_fluid.velocity,
+            TrixiParticles.extrapolate_values!(open_boundary, v_open_boundary, v_fluid,
                                                inflow.initial_condition.coordinates,
                                                domain_fluid.coordinates, semi, 0.0;
                                                prescribed_pressure=false,
@@ -146,12 +144,10 @@
 
         smoothing_length = 1.5 * particle_spacing
         smoothing_kernel = WendlandC2Kernel{ndims(domain_fluid)}()
-        fluid_system = WeaklyCompressibleSPHSystem(domain_fluid, SummationDensity(),
-                                                   nothing, smoothing_kernel,
-                                                   smoothing_length)
+        fluid_system = EntropicallyDampedSPHSystem(domain_fluid, smoothing_kernel,
+                                                   smoothing_length, 1.0)
 
         fluid_system.cache.density .= domain_fluid.density
-        fluid_system.pressure .= domain_fluid.pressure
 
         @testset verbose=true "plane normal $i" for i in eachindex(files)
             inflow = BoundaryZone(; plane=plane_boundary[i],
@@ -166,11 +162,11 @@
             TrixiParticles.initialize_neighborhood_searches!(semi)
 
             v_open_boundary = zero(inflow.initial_condition.velocity)
+            v_fluid = vcat(domain_fluid.velocity, domain_fluid.pressure')
 
             TrixiParticles.set_zero!(open_boundary.pressure)
 
-            TrixiParticles.extrapolate_values!(open_boundary, v_open_boundary,
-                                               domain_fluid.velocity,
+            TrixiParticles.extrapolate_values!(open_boundary, v_open_boundary, v_fluid,
                                                inflow.initial_condition.coordinates,
                                                domain_fluid.coordinates, semi, 0.0;
                                                prescribed_pressure=false,

From f506bf3bff609770d99df32e7c91e41b5ed27455 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Tue, 20 May 2025 20:13:37 +0200
Subject: [PATCH 23/33] apply formatter

---
 test/schemes/boundary/open_boundary/mirroring.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/schemes/boundary/open_boundary/mirroring.jl b/test/schemes/boundary/open_boundary/mirroring.jl
index 0d996aeb03..0865a440a0 100644
--- a/test/schemes/boundary/open_boundary/mirroring.jl
+++ b/test/schemes/boundary/open_boundary/mirroring.jl
@@ -49,7 +49,7 @@
 
         smoothing_length = 1.5 * particle_spacing
         smoothing_kernel = WendlandC2Kernel{ndims(domain_fluid)}()
-        fluid_system = EntropicallyDampedSPHSystem(domain_fluid,smoothing_kernel,
+        fluid_system = EntropicallyDampedSPHSystem(domain_fluid, smoothing_kernel,
                                                    smoothing_length, 1.0)
 
         fluid_system.cache.density .= domain_fluid.density

From 1bae3ce2276a166f17138941a472e493f9e35a69 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Tue, 20 May 2025 20:24:36 +0200
Subject: [PATCH 24/33] fix

---
 test/examples/examples_fluid.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/examples/examples_fluid.jl b/test/examples/examples_fluid.jl
index a9633217a6..4c557ee465 100644
--- a/test/examples/examples_fluid.jl
+++ b/test/examples/examples_fluid.jl
@@ -332,6 +332,7 @@
                                          reference_density_in=nothing,
                                          reference_pressure_in=nothing,
                                          reference_density_out=nothing,
+                                         reference_pressure_out=nothing,
                                          reference_velocity_out=nothing)
         @test sol.retcode == ReturnCode.Success
         @test count_rhs_allocations(sol, semi) == 0

From 2255d87b297ff935d918156e3fe3a98b915a9bdf Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Thu, 22 May 2025 16:29:07 +0200
Subject: [PATCH 25/33] add comment

---
 src/general/buffer.jl                        | 3 +++
 src/schemes/boundary/open_boundary/system.jl | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/general/buffer.jl b/src/general/buffer.jl
index c1e12d106a..5c6234991d 100644
--- a/src/general/buffer.jl
+++ b/src/general/buffer.jl
@@ -71,6 +71,9 @@ end
             # If this is `true`, the particle was active before and we need to continue.
             # This happens because a particle might have been activated by another thread
             # between the condition and the line below.
+            # Note: This doesn't work with Metal.jl. No error is thrown, but the operation is simply ignored.
+            # An atomic compare-and-swap operation is probably implemented for Metal.jl here:
+            # https://github.com/JuliaGPU/Metal.jl/blob/caf299690aa52448ee72ffc5688939b157fc1ba2/src/device/intrinsics/atomics.jl#L42
             was_active = PointNeighbors.Atomix.@atomicswap active_particle[particle] = true
 
             if was_active == false
diff --git a/src/schemes/boundary/open_boundary/system.jl b/src/schemes/boundary/open_boundary/system.jl
index 4691e72e8f..8a6df235fa 100644
--- a/src/schemes/boundary/open_boundary/system.jl
+++ b/src/schemes/boundary/open_boundary/system.jl
@@ -33,7 +33,8 @@ Open boundary system for in- and outflow particles.
     from the fluid domaim to the buffer zones (inflow and outflow) using ghost nodes.
 
 !!! warning "Experimental Implementation"
-	This is an experimental feature and may change in any future releases.
+    This is an experimental feature and may change in future releases.
+    It is GPU-compatible (e.g., with CUDA.jl and AMDGPU.jl), but currently **not** supported with Metal.jl.
 """
 struct OpenBoundarySPHSystem{BM, ELTYPE, NDIMS, IC, FS, FSI, ARRAY1D, BZ, RV,
                              RP, RD, B, UCU, C} <: System{NDIMS}

From e2bcba879d107d489456987abad7ac12951daa45 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Tue, 3 Jun 2025 15:04:51 +0200
Subject: [PATCH 26/33] add comment

---
 src/general/buffer.jl                           | 1 +
 src/schemes/boundary/open_boundary/mirroring.jl | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/general/buffer.jl b/src/general/buffer.jl
index 5c6234991d..305f381c92 100644
--- a/src/general/buffer.jl
+++ b/src/general/buffer.jl
@@ -47,6 +47,7 @@ end
     (; active_particle) = buffer
 
     # TODO: Parallelize (see https://github.com/trixi-framework/TrixiParticles.jl/issues/810)
+    # Update the number of active particles and the active particle indices
     buffer.active_particle_count[] = sum(active_particle)
     buffer.eachparticle[1:buffer.active_particle_count[]] .= findall(x -> x == true,
                                                                      active_particle)
diff --git a/src/schemes/boundary/open_boundary/mirroring.jl b/src/schemes/boundary/open_boundary/mirroring.jl
index 0b5b371e50..cd7b890405 100644
--- a/src/schemes/boundary/open_boundary/mirroring.jl
+++ b/src/schemes/boundary/open_boundary/mirroring.jl
@@ -87,7 +87,8 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
 
             correction_matrix[] += L
 
-            if !prescribed_pressure && fluid_system isa EntropicallyDampedSPHSystem
+            # For a WCSPH system, the pressure is determined by the state equation if it is not prescribed
+            if !prescribed_pressure && !(fluid_system isa WeaklyCompressibleSPHSystem)
                 extrapolated_pressure_correction[] += pressure_b * R
             end
 
@@ -145,6 +146,8 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
             pressure[particle] = reference_value(reference_pressure, pressure[particle],
                                                  particle_coords, t)
         elseif fluid_system isa WeaklyCompressibleSPHSystem
+            # For a WCSPH system, the pressure is determined by the state equation
+            # if it is not prescribed
             pressure[particle] = state_equation(density[particle])
         else
             f_d = L_inv * extrapolated_pressure_correction[]

From 395071d92f770c2a0abe852159d910e77de23b48 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Tue, 3 Jun 2025 19:27:15 +0200
Subject: [PATCH 27/33] fix allocation bug

---
 src/general/gpu.jl                           | 20 +----------
 src/general/semidiscretization.jl            | 38 +++++++++++++++++++-
 src/schemes/boundary/open_boundary/system.jl |  2 +-
 3 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/src/general/gpu.jl b/src/general/gpu.jl
index cc1cab3ecd..3e5d93e5b6 100644
--- a/src/general/gpu.jl
+++ b/src/general/gpu.jl
@@ -19,25 +19,7 @@ Adapt.@adapt_structure BoundaryMovement
 Adapt.@adapt_structure TotalLagrangianSPHSystem
 Adapt.@adapt_structure BoundaryZone
 Adapt.@adapt_structure SystemBuffer
-
-function Adapt.adapt_structure(to, system::OpenBoundarySPHSystem)
-    return OpenBoundarySPHSystem(Adapt.adapt(to, system.boundary_model),
-                                 Adapt.adapt(to, system.initial_condition),
-                                 nothing,  # Do not adapt `fluid_system``
-                                 Adapt.adapt(to, system.fluid_system_index),
-                                 Adapt.adapt(to, system.smoothing_length),
-                                 Adapt.adapt(to, system.mass),
-                                 Adapt.adapt(to, system.density),
-                                 Adapt.adapt(to, system.volume),
-                                 Adapt.adapt(to, system.pressure),
-                                 Adapt.adapt(to, system.boundary_zone),
-                                 Adapt.adapt(to, system.reference_velocity),
-                                 Adapt.adapt(to, system.reference_pressure),
-                                 Adapt.adapt(to, system.reference_density),
-                                 Adapt.adapt(to, system.buffer),
-                                 Adapt.adapt(to, system.update_callback_used),
-                                 Adapt.adapt(to, system.cache))
-end
+Adapt.@adapt_structure OpenBoundarySPHSystem
 
 KernelAbstractions.get_backend(::PtrArray) = KernelAbstractions.CPU()
 KernelAbstractions.get_backend(system::System) = KernelAbstractions.get_backend(system.mass)
diff --git a/src/general/semidiscretization.jl b/src/general/semidiscretization.jl
index 55f329da6e..839ed1790d 100644
--- a/src/general/semidiscretization.jl
+++ b/src/general/semidiscretization.jl
@@ -319,7 +319,16 @@ function semidiscretize(semi, tspan; reset_threads=true)
         # When e.g. `parallelization_backend=CUDABackend()`, this will convert all `Array`s
         # to `CuArray`s, moving data to the GPU.
         # See the comments in general/gpu.jl for more details.
-        semi_new = Adapt.adapt(semi.parallelization_backend, semi)
+        semi_ = Adapt.adapt(semi.parallelization_backend, semi)
+
+        # After `adapt`, the system type information may change.
+        # As a result, systems can no longer be found using `system_indices`.
+        # For systems that have a corresponding linked system, we need to relink them
+        # after adaptation to ensure correct references.
+        semi_new = Semidiscretization(set_system_links.(semi_.systems, Ref(semi_)),
+                                      semi_.ranges_u, semi_.ranges_v,
+                                      semi_.neighborhood_searches,
+                                      semi_.parallelization_backend)
     else
         semi_new = semi
     end
@@ -936,3 +945,30 @@ function check_configuration(system::OpenBoundarySPHSystem, systems,
                             "See the PointNeighbors.jl documentation for more details."))
     end
 end
+
+# After `adapt`, the system type information may change.
+# As a result, systems can no longer be found using `system_indices`.
+# For systems that have a corresponding linked system, we need to relink them
+# after adaptation to ensure correct references.
+set_system_links(system, semi) = system
+
+function set_system_links(system::OpenBoundarySPHSystem, semi)
+    fluid_system = semi.systems[system.fluid_system_index[]]
+
+    return OpenBoundarySPHSystem(system.boundary_model,
+                                 system.initial_condition,
+                                 fluid_system, # link to fluid system
+                                 system.fluid_system_index,
+                                 system.smoothing_length,
+                                 system.mass,
+                                 system.density,
+                                 system.volume,
+                                 system.pressure,
+                                 system.boundary_zone,
+                                 system.reference_velocity,
+                                 system.reference_pressure,
+                                 system.reference_density,
+                                 system.buffer,
+                                 system.update_callback_used,
+                                 system.cache)
+end
diff --git a/src/schemes/boundary/open_boundary/system.jl b/src/schemes/boundary/open_boundary/system.jl
index 8a6df235fa..e3047c038a 100644
--- a/src/schemes/boundary/open_boundary/system.jl
+++ b/src/schemes/boundary/open_boundary/system.jl
@@ -227,7 +227,7 @@ end
 update_callback_used!(system::OpenBoundarySPHSystem) = system.update_callback_used[] = true
 
 function corresponding_fluid_system(system::OpenBoundarySPHSystem, semi)
-    return semi.systems[system.fluid_system_index[]]
+    return system.fluid_system
 end
 
 function smoothing_length(system::OpenBoundarySPHSystem, particle)

From 6aca12b42814f63fab593eb3006975908d32924c Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Tue, 3 Jun 2025 19:27:35 +0200
Subject: [PATCH 28/33] update PointNeighbors version bound

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index f6bfce2770..a797bd3196 100644
--- a/Project.toml
+++ b/Project.toml
@@ -54,7 +54,7 @@ KernelAbstractions = "0.9"
 MuladdMacro = "0.2"
 OrdinaryDiffEq = "6.91"
 OrdinaryDiffEqCore = "1"
-PointNeighbors = "0.6.1"
+PointNeighbors = "0.6.3"
 Polyester = "0.7.10"
 ReadVTK = "0.2"
 RecipesBase = "1"

From ddf1538860b1064b7f93688b32520ec62186b54f Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Wed, 4 Jun 2025 21:47:32 +0200
Subject: [PATCH 29/33] implement suggestion

---
 examples/fluid/pipe_flow_3d.jl                |  3 +--
 src/general/buffer.jl                         | 10 +++++-----
 src/general/semidiscretization.jl             | 19 ++++++++++---------
 .../boundary/open_boundary/boundary_zones.jl  |  2 --
 .../method_of_characteristics.jl              |  6 +++---
 .../boundary/open_boundary/mirroring.jl       |  9 ++++++---
 src/schemes/boundary/open_boundary/system.jl  | 14 ++++++++------
 7 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/examples/fluid/pipe_flow_3d.jl b/examples/fluid/pipe_flow_3d.jl
index a898acb97b..e7bb72923e 100644
--- a/examples/fluid/pipe_flow_3d.jl
+++ b/examples/fluid/pipe_flow_3d.jl
@@ -38,8 +38,7 @@ trixi_include(@__MODULE__, joinpath(examples_dir(), "fluid", "pipe_flow_2d.jl"),
               domain_size=domain_size, boundary_size=boundary_size,
               flow_direction=flow_direction, faces=(false, false, true, true, true, true),
               tspan=tspan, reference_velocity=velocity_function3d,
-              min_corner=.-(domain_size ./ 2) .- open_boundary_layers * particle_spacing,
-              max_corner=domain_size .+ open_boundary_layers * particle_spacing,
+              open_boundary_layers=open_boundary_layers,
               plane_in=([0.0, 0.0, 0.0], [0.0, domain_size[2], 0.0],
                         [0.0, 0.0, domain_size[3]]),
               plane_out=([domain_size[1], 0.0, 0.0], [domain_size[1], domain_size[2], 0.0],
diff --git a/src/general/buffer.jl b/src/general/buffer.jl
index 305f381c92..43a04662be 100644
--- a/src/general/buffer.jl
+++ b/src/general/buffer.jl
@@ -11,7 +11,7 @@ function SystemBuffer(active_size, buffer_size::Integer)
     # Thus, `active_particle` is defined as a `Vector{UInt32}` because CUDA.jl
     # does not support atomic operations on `Bool`.
     # https://github.com/JuliaGPU/CUDA.jl/blob/2cc9285676a4cd28d0846ca62f0300c56d281d38/src/device/intrinsics/atomics.jl#L243
-    active_particle = vcat(fill(UInt32(1), active_size), fill(UInt32(0), buffer_size))
+    active_particle = vcat(fill(UInt32(true), active_size), fill(UInt32(false), buffer_size))
     eachparticle = collect(eachindex(active_particle))
 
     return SystemBuffer(active_particle, Ref(active_size), eachparticle, buffer_size)
@@ -68,10 +68,10 @@ end
 
     for particle in eachindex(active_particle)
         if PointNeighbors.Atomix.@atomic(active_particle[particle]) == false
-            # Activate this particle. The return value is the old value.
-            # If this is `true`, the particle was active before and we need to continue.
-            # This happens because a particle might have been activated by another thread
-            # between the condition and the line below.
+            # After we go into this condition, another thread might still activate this particle
+            # before we do. Therefore, we use an atomic swap, which activates the particle,
+            # but also returns the old value.
+            # If the old value is `true`, the particle was active before and we need to continue.
             # Note: This doesn't work with Metal.jl. No error is thrown, but the operation is simply ignored.
             # An atomic compare-and-swap operation is probably implemented for Metal.jl here:
             # https://github.com/JuliaGPU/Metal.jl/blob/caf299690aa52448ee72ffc5688939b157fc1ba2/src/device/intrinsics/atomics.jl#L42
diff --git a/src/general/semidiscretization.jl b/src/general/semidiscretization.jl
index 839ed1790d..7610ec29cc 100644
--- a/src/general/semidiscretization.jl
+++ b/src/general/semidiscretization.jl
@@ -321,10 +321,11 @@ function semidiscretize(semi, tspan; reset_threads=true)
         # See the comments in general/gpu.jl for more details.
         semi_ = Adapt.adapt(semi.parallelization_backend, semi)
 
-        # After `adapt`, the system type information may change.
-        # As a result, systems can no longer be found using `system_indices`.
-        # For systems that have a corresponding linked system, we need to relink them
-        # after adaptation to ensure correct references.
+        # We now have a new `Semidiscretization` with new systems.
+        # This means that systems linking to other systems still point to old systems.
+        # Therefore, we have to re-link them, which yields yet another `Semidiscretization`.
+        # Note that this re-creates systems containing links, so it only works as long
+        # as systems don't link to other systems containing links.
         semi_new = Semidiscretization(set_system_links.(semi_.systems, Ref(semi_)),
                                       semi_.ranges_u, semi_.ranges_v,
                                       semi_.neighborhood_searches,
@@ -929,8 +930,9 @@ function check_configuration(system::OpenBoundarySPHSystem, systems,
                              neighborhood_search::PointNeighbors.AbstractNeighborhoodSearch)
     (; boundary_model, boundary_zone) = system
 
+    # Store index of the fluid system. This is necessary for re-linking
+    # in case we use Adapt.jl to create a new semidiscretization.
     fluid_system_index = findfirst(==(system.fluid_system), systems)
-
     system.fluid_system_index[] = fluid_system_index
 
     if boundary_model isa BoundaryModelLastiwka &&
@@ -941,15 +943,14 @@ function check_configuration(system::OpenBoundarySPHSystem, systems,
 
     if first(PointNeighbors.requires_update(neighborhood_search))
         throw(ArgumentError("`OpenBoundarySPHSystem` requires a neighborhood search " *
-                            "that does not need an update for the `x` coordinates (e.g. `GridNeighborhoodSearch`). " *
+                            "that does not require an update for the first set of coordinates (e.g. `GridNeighborhoodSearch`). " *
                             "See the PointNeighbors.jl documentation for more details."))
     end
 end
 
 # After `adapt`, the system type information may change.
-# As a result, systems can no longer be found using `system_indices`.
-# For systems that have a corresponding linked system, we need to relink them
-# after adaptation to ensure correct references.
+# This means that systems linking to other systems still point to old systems.
+# Therefore, we have to re-link them based on the stored system index.
 set_system_links(system, semi) = system
 
 function set_system_links(system::OpenBoundarySPHSystem, semi)
diff --git a/src/schemes/boundary/open_boundary/boundary_zones.jl b/src/schemes/boundary/open_boundary/boundary_zones.jl
index 90d587bb36..6739f80f9f 100644
--- a/src/schemes/boundary/open_boundary/boundary_zones.jl
+++ b/src/schemes/boundary/open_boundary/boundary_zones.jl
@@ -123,8 +123,6 @@ function BoundaryZone(; plane, plane_normal, density, particle_spacing,
                         flow_direction, plane_normal_, boundary_type)
 end
 
-@inline Base.ndims(::BoundaryZone) = error("`ndims` not defined for `BoundaryZone`")
-
 function set_up_boundary_zone(plane, plane_normal, flow_direction, density,
                               particle_spacing, initial_condition, extrude_geometry,
                               open_boundary_layers; boundary_type)
diff --git a/src/schemes/boundary/open_boundary/method_of_characteristics.jl b/src/schemes/boundary/open_boundary/method_of_characteristics.jl
index a1f0c01ed6..8dacc446e7 100644
--- a/src/schemes/boundary/open_boundary/method_of_characteristics.jl
+++ b/src/schemes/boundary/open_boundary/method_of_characteristics.jl
@@ -115,9 +115,9 @@ function evaluate_characteristics!(system, v, u, v_ode, u_ode, semi, t)
 
             # Using the average of the values at the previous time step for particles which
             # are outside of the influence of fluid particles.
-            avg_J1 = zero(volume[particle])
-            avg_J2 = zero(volume[particle])
-            avg_J3 = zero(volume[particle])
+            avg_J1 = zero(eltype(volume))
+            avg_J2 = zero(eltype(volume))
+            avg_J3 = zero(eltype(volume))
             counter = 0
 
             for neighbor in each_moving_particle(system)
diff --git a/src/schemes/boundary/open_boundary/mirroring.jl b/src/schemes/boundary/open_boundary/mirroring.jl
index cd7b890405..52d4d5c0d8 100644
--- a/src/schemes/boundary/open_boundary/mirroring.jl
+++ b/src/schemes/boundary/open_boundary/mirroring.jl
@@ -44,13 +44,16 @@ function extrapolate_values!(system, v_open_boundary, v_fluid, u_open_boundary,
 
     fluid_coords = current_coordinates(u_fluid, fluid_system)
 
+    # We cannot use `foreach_point_neighbor` here because we are looking for neighbors
+    # of the ghost node positions of each particle.
+    # We can do this because we require the neighborhood search to support querying neighbors
+    # of arbitrary positions (see `PointNeighbors.requires_update`).
     @threaded semi for particle in each_moving_particle(system)
         particle_coords = current_coords(u_open_boundary, system, particle)
         ghost_node_position = mirror_position(particle_coords, boundary_zone)
 
-        # Set zero
-        # Use `Ref` to ensure the variables are accessible and mutable within the closure
-        # (see https://docs.julialang.org/en/v1/manual/performance-tips/#man-performance-captured)
+        # Use `Ref` to ensure the variables are accessible and mutable within the closure below
+        # (see https://docs.julialang.org/en/v1/manual/performance-tips/#man-performance-captured).
         correction_matrix = Ref(zero(SMatrix{ndims(system) + 1, ndims(system) + 1,
                                              eltype(system)}))
 
diff --git a/src/schemes/boundary/open_boundary/system.jl b/src/schemes/boundary/open_boundary/system.jl
index e3047c038a..f299fd9286 100644
--- a/src/schemes/boundary/open_boundary/system.jl
+++ b/src/schemes/boundary/open_boundary/system.jl
@@ -147,15 +147,17 @@ function OpenBoundarySPHSystem(boundary_zone::BoundaryZone;
                                        reference_density, reference_velocity,
                                        reference_pressure)
 
-    ucu = Ref(false) # Reference to check if the update callback is used
-    fsi = Ref(0) # Reference to the fluid system index
+    # These will be set later
+    update_callback_used = Ref(false)
+    fluid_system_index = Ref(0)
 
     smoothing_length = initial_smoothing_length(fluid_system)
 
-    return OpenBoundarySPHSystem(boundary_model, initial_condition, fluid_system, fsi,
-                                 smoothing_length, mass, density, volume, pressure,
-                                 boundary_zone, reference_velocity_, reference_pressure_,
-                                 reference_density_, buffer, ucu, cache)
+    return OpenBoundarySPHSystem(boundary_model, initial_condition, fluid_system,
+                                 fluid_system_index, smoothing_length, mass, density,
+                                 volume, pressure, boundary_zone, reference_velocity_,
+                                 reference_pressure_, reference_density_, buffer,
+                                 update_callback_used, cache)
 end
 
 function create_cache_open_boundary(boundary_model, initial_condition,

From d71e14a452434532371e89409fd40efeacf055fb Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Wed, 4 Jun 2025 21:50:28 +0200
Subject: [PATCH 30/33] apply formatter

---
 src/general/buffer.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/general/buffer.jl b/src/general/buffer.jl
index 43a04662be..e6a2c0fffd 100644
--- a/src/general/buffer.jl
+++ b/src/general/buffer.jl
@@ -11,7 +11,8 @@ function SystemBuffer(active_size, buffer_size::Integer)
     # Thus, `active_particle` is defined as a `Vector{UInt32}` because CUDA.jl
     # does not support atomic operations on `Bool`.
     # https://github.com/JuliaGPU/CUDA.jl/blob/2cc9285676a4cd28d0846ca62f0300c56d281d38/src/device/intrinsics/atomics.jl#L243
-    active_particle = vcat(fill(UInt32(true), active_size), fill(UInt32(false), buffer_size))
+    active_particle = vcat(fill(UInt32(true), active_size),
+                           fill(UInt32(false), buffer_size))
     eachparticle = collect(eachindex(active_particle))
 
     return SystemBuffer(active_particle, Ref(active_size), eachparticle, buffer_size)

From 9663d5890e3f8d24916d50b52230bced8ee185f2 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Thu, 5 Jun 2025 11:56:28 +0200
Subject: [PATCH 31/33] add comment

---
 src/schemes/boundary/open_boundary/method_of_characteristics.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/schemes/boundary/open_boundary/method_of_characteristics.jl b/src/schemes/boundary/open_boundary/method_of_characteristics.jl
index 8dacc446e7..db6eedc83b 100644
--- a/src/schemes/boundary/open_boundary/method_of_characteristics.jl
+++ b/src/schemes/boundary/open_boundary/method_of_characteristics.jl
@@ -102,6 +102,7 @@ function evaluate_characteristics!(system, v, u, v_ode, u_ode, semi, t)
     set_zero!(volume)
 
     # Evaluate the characteristic variables with the fluid system
+    # The following function acts as a wrapper to keep the code organized and modular.
     evaluate_characteristics!(system, fluid_system, v, u, v_ode, u_ode, semi, t)
 
     # Only some of the in-/outlet particles are in the influence of the fluid particles.

From de26d028922f6e72d6c21ca3059512c9aa9cd286 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Thu, 5 Jun 2025 15:21:26 +0200
Subject: [PATCH 32/33] add comment

---
 src/general/buffer.jl | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/general/buffer.jl b/src/general/buffer.jl
index e6a2c0fffd..15a7bc4579 100644
--- a/src/general/buffer.jl
+++ b/src/general/buffer.jl
@@ -96,10 +96,8 @@ end
         u[dim, particle] = eltype(system)(1e16)
     end
 
-    # To ensure thread safety, the buffer particle is only released for reuse
-    # after the write operation (`u`) has been completed.
-    # This guarantees that no other thread can access the active particle prematurely,
-    # avoiding race conditions.
+    # `activate_next_particle!` and `deactivate_particle!` are never called on the same buffer inside a kernel,
+    # so we don't have any race conditions on this `active_particle` vector.
     active_particle[particle] = false
 
     return system

From 5fc3c7a2dabef38238f347d8f4e3398b1c6744f5 Mon Sep 17 00:00:00 2001
From: LasNikas <niklas.nehe@web.de>
Date: Thu, 5 Jun 2025 16:09:35 +0200
Subject: [PATCH 33/33] revert comment

---
 src/schemes/boundary/open_boundary/method_of_characteristics.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/schemes/boundary/open_boundary/method_of_characteristics.jl b/src/schemes/boundary/open_boundary/method_of_characteristics.jl
index db6eedc83b..8dacc446e7 100644
--- a/src/schemes/boundary/open_boundary/method_of_characteristics.jl
+++ b/src/schemes/boundary/open_boundary/method_of_characteristics.jl
@@ -102,7 +102,6 @@ function evaluate_characteristics!(system, v, u, v_ode, u_ode, semi, t)
     set_zero!(volume)
 
     # Evaluate the characteristic variables with the fluid system
-    # The following function acts as a wrapper to keep the code organized and modular.
     evaluate_characteristics!(system, fluid_system, v, u, v_ode, u_ode, semi, t)
 
     # Only some of the in-/outlet particles are in the influence of the fluid particles.