trixi-framework · LasNikas · Jun 6, 2025 · Apr 16, 2025 · Apr 16, 2025 · Apr 16, 2025
diff --git a/examples/fluid/pipe_flow_2d.jl b/examples/fluid/pipe_flow_2d.jl
@@ -47,14 +47,16 @@ pipe = RectangularTank(particle_spacing, domain_size, boundary_size, fluid_densi
 # Shift pipe walls in negative x-direction for the inflow
 pipe.boundary.coordinates[1, :] .-= particle_spacing * open_boundary_layers
 
-n_buffer_particles = 4 * pipe.n_particles_per_dimension[2]^(ndims(pipe.fluid) - 1)
+NDIMS = ndims(pipe.fluid)
+
+n_buffer_particles = 4 * pipe.n_particles_per_dimension[2]^(NDIMS - 1)
 
 # ==========================================================================================
 # ==== Fluid
 wcsph = false
 
 smoothing_length = 1.5 * particle_spacing
-smoothing_kernel = WendlandC2Kernel{2}()
+smoothing_kernel = WendlandC2Kernel{NDIMS}()
 
 fluid_density_calculator = ContinuityDensity()
 
@@ -136,8 +138,15 @@ boundary_system = BoundarySPHSystem(pipe.boundary, boundary_model)
 
 # ==========================================================================================
 # ==== Simulation
+min_corner = minimum(pipe.boundary.coordinates .- particle_spacing, dims=2)
+max_corner = maximum(pipe.boundary.coordinates .+ particle_spacing, dims=2)
+
+nhs = GridNeighborhoodSearch{NDIMS}(; cell_list=FullGridCellList(; min_corner, max_corner),
+                                    update_strategy=ParallelUpdate())
+
 semi = Semidiscretization(fluid_system, open_boundary_in, open_boundary_out,
-                          boundary_system, parallelization_backend=PolyesterBackend())
+                          boundary_system, neighborhood_search=nhs,
+                          parallelization_backend=PolyesterBackend())
 
 ode = semidiscretize(semi, tspan)
 

diff --git a/examples/fluid/pipe_flow_3d.jl b/examples/fluid/pipe_flow_3d.jl
@@ -37,8 +37,9 @@ flow_direction = [1.0, 0.0, 0.0]
 trixi_include(@__MODULE__, joinpath(examples_dir(), "fluid", "pipe_flow_2d.jl"),
               domain_size=domain_size, boundary_size=boundary_size,
               flow_direction=flow_direction, faces=(false, false, true, true, true, true),
-              tspan=tspan, smoothing_kernel=WendlandC2Kernel{3}(),
-              reference_velocity=velocity_function3d,
+              tspan=tspan, reference_velocity=velocity_function3d,
+              min_corner=.-(domain_size ./ 2) .- open_boundary_layers * particle_spacing,
+              max_corner=domain_size .+ open_boundary_layers * particle_spacing,
               plane_in=([0.0, 0.0, 0.0], [0.0, domain_size[2], 0.0],
                         [0.0, 0.0, domain_size[3]]),
               plane_out=([domain_size[1], 0.0, 0.0], [domain_size[1], domain_size[2], 0.0],

diff --git a/src/general/buffer.jl b/src/general/buffer.jl
@@ -1,15 +1,20 @@
-struct SystemBuffer{V}
-    active_particle :: Vector{Bool}
-    eachparticle    :: V # Vector{Int}
-    buffer_size     :: Int
+struct SystemBuffer{AP, APC, EP}
+    active_particle       :: AP # Vector{Bool}
+    active_particle_count :: APC
+    eachparticle          :: EP # Vector{Int}
+    buffer_size           :: Int
+end
 
-    function SystemBuffer(active_size, buffer_size::Integer)
-        # We cannot use a `BitVector` here, as writing to a `BitVector` is not thread-safe
-        active_particle = vcat(fill(true, active_size), fill(false, buffer_size))
-        eachparticle = collect(1:active_size)
+function SystemBuffer(active_size, buffer_size::Integer)
+    # Using a `BitVector` is not an option as writing to it is not thread-safe.
+    # Also, to ensure thread-safe particle activation, we use an `atomic_cas` operation.
+    # Thus, `active_particle` is defined as a `Vector{UInt32}` because CUDA.jl
+    # does not support atomic operations on `Bool`.
+    # https://github.com/JuliaGPU/CUDA.jl/blob/2cc9285676a4cd28d0846ca62f0300c56d281d38/src/device/intrinsics/atomics.jl#L243
+    active_particle = vcat(fill(UInt32(1), active_size), fill(UInt32(0), buffer_size))
+    eachparticle = collect(eachindex(active_particle))
 
-        return new{typeof(eachparticle)}(active_particle, eachparticle, buffer_size)
-    end
+    return SystemBuffer(active_particle, Ref(active_size), eachparticle, buffer_size)
 end
 
 allocate_buffer(initial_condition, ::Nothing) = initial_condition
@@ -18,7 +23,8 @@ function allocate_buffer(initial_condition, buffer::SystemBuffer)
     (; buffer_size) = buffer
 
     # Initialize particles far away from simulation domain
-    coordinates = fill(1e16, ndims(initial_condition), buffer_size)
+    coordinates = fill(eltype(initial_condition)(1e16), ndims(initial_condition),
+                       buffer_size)
 
     if all(rho -> isapprox(rho, first(initial_condition.density), atol=eps(), rtol=eps()),
            initial_condition.density)
@@ -34,55 +40,65 @@ function allocate_buffer(initial_condition, buffer::SystemBuffer)
     return union(initial_condition, buffer_ic)
 end
 
-@inline update_system_buffer!(buffer::Nothing) = buffer
+@inline update_system_buffer!(buffer::Nothing, semi) = buffer
 
 # TODO `resize` allocates. Find a non-allocating version
-@inline function update_system_buffer!(buffer::SystemBuffer)
+@inline function update_system_buffer!(buffer::SystemBuffer, semi)
     (; active_particle) = buffer
 
-    resize!(buffer.eachparticle, count(active_particle))
-
-    i = 1
-    for j in eachindex(active_particle)
-        if active_particle[j]
-            buffer.eachparticle[i] = j
-            i += 1
-        end
-    end
+    # TODO: Parallelize (see https://github.com/trixi-framework/TrixiParticles.jl/issues/810)
+    buffer.active_particle_count[] = sum(active_particle)
+    buffer.eachparticle[1:buffer.active_particle_count[]] .= findall(x -> x == true,
+                                                                     active_particle)
 
     return buffer
 end
 
-@inline each_moving_particle(system, buffer) = buffer.eachparticle
+@inline each_moving_particle(system, buffer) = active_particles(system, buffer)
 
-@inline active_coordinates(u, system, buffer) = view(u, :, buffer.active_particle)
+@inline active_coordinates(u, system, buffer) = view(u, :, active_particles(system, buffer))
 
-@inline active_particles(system, buffer) = buffer.eachparticle
+@inline function active_particles(system, buffer)
+    return view(buffer.eachparticle, 1:buffer.active_particle_count[])
+end
 
 @inline function activate_next_particle(system)
     (; active_particle) = system.buffer
 
-    next_particle = findfirst(x -> !x, active_particle)
-
-    if isnothing(next_particle)
-        error("0 out of $(system.buffer.buffer_size) buffer particles available")
+    for particle in eachindex(active_particle)
+        if PointNeighbors.Atomix.@atomic(active_particle[particle]) == false
+            # Activate this particle. The return value is the old value.
+            # If this is `true`, the particle was active before and we need to continue.
+            # This happens because a particle might have been activated by another thread
+            # between the condition and the line below.
+            # Note: This doesn't work with Metal.jl. No error is thrown, but the operation is simply ignored.
+            # An atomic compare-and-swap operation is probably implemented for Metal.jl here:
+            # https://github.com/JuliaGPU/Metal.jl/blob/caf299690aa52448ee72ffc5688939b157fc1ba2/src/device/intrinsics/atomics.jl#L42
+            was_active = PointNeighbors.Atomix.@atomicswap active_particle[particle] = true
+
+            if was_active == false
+                return particle
+            end
+        end
     end
 
-    active_particle[next_particle] = true
-
-    return next_particle
+    error("No buffer particles available")
 end
 
 @inline function deactivate_particle!(system, particle, u)
     (; active_particle) = system.buffer
 
-    active_particle[particle] = false
-
     # Set particle far away from simulation domain
     for dim in 1:ndims(system)
         # Inf or NaN causes instability outcome.
-        u[dim, particle] = 1e16
+        u[dim, particle] = eltype(system)(1e16)
     end
 
+    # To ensure thread safety, the buffer particle is only released for reuse
+    # after the write operation (`u`) has been completed.
+    # This guarantees that no other thread can access the active particle prematurely,
+    # avoiding race conditions.
+    active_particle[particle] = false
+
     return system
 end
diff --git a/src/general/gpu.jl b/src/general/gpu.jl
@@ -17,6 +17,27 @@ Adapt.@adapt_structure BoundaryModelDummyParticles
 Adapt.@adapt_structure BoundaryModelMonaghanKajtar
 Adapt.@adapt_structure BoundaryMovement
 Adapt.@adapt_structure TotalLagrangianSPHSystem
+Adapt.@adapt_structure BoundaryZone
+Adapt.@adapt_structure SystemBuffer
+
+function Adapt.adapt_structure(to, system::OpenBoundarySPHSystem)
+    return OpenBoundarySPHSystem(Adapt.adapt(to, system.boundary_model),
+                                 Adapt.adapt(to, system.initial_condition),
+                                 nothing,  # Do not adapt `fluid_system``
+                                 Adapt.adapt(to, system.fluid_system_index),
+                                 Adapt.adapt(to, system.smoothing_length),
+                                 Adapt.adapt(to, system.mass),
+                                 Adapt.adapt(to, system.density),
+                                 Adapt.adapt(to, system.volume),
+                                 Adapt.adapt(to, system.pressure),
+                                 Adapt.adapt(to, system.boundary_zone),
+                                 Adapt.adapt(to, system.reference_velocity),
+                                 Adapt.adapt(to, system.reference_pressure),
+                                 Adapt.adapt(to, system.reference_density),
+                                 Adapt.adapt(to, system.buffer),
+                                 Adapt.adapt(to, system.update_callback_used),
+                                 Adapt.adapt(to, system.cache))
+end
 
 KernelAbstractions.get_backend(::PtrArray) = KernelAbstractions.CPU()
 KernelAbstractions.get_backend(system::System) = KernelAbstractions.get_backend(system.mass)

diff --git a/src/general/semidiscretization.jl b/src/general/semidiscretization.jl
@@ -74,7 +74,7 @@ function Semidiscretization(systems::Union{System, Nothing}...;
 
     # Check e.g. that the boundary systems are using a state equation if EDAC is not used.
     # Other checks might be added here later.
-    check_configuration(systems)
+    check_configuration(systems, neighborhood_search)
 
     sizes_u = [u_nvariables(system) * n_moving_particles(system)
                for system in systems]
@@ -376,7 +376,8 @@ function initialize_neighborhood_searches!(semi)
         foreach_system(semi) do neighbor
             PointNeighbors.initialize!(get_neighborhood_search(system, neighbor, semi),
                                        initial_coordinates(system),
-                                       initial_coordinates(neighbor))
+                                       initial_coordinates(neighbor),
+                                       eachindex_y=active_particles(neighbor))
         end
     end
 
@@ -669,7 +670,7 @@ function update_nhs!(neighborhood_search,
     update!(neighborhood_search,
             current_coordinates(u_system, system),
             current_coordinates(u_neighbor, neighbor),
-            semi, points_moving=(true, true))
+            semi, points_moving=(true, true), eachindex_y=active_particles(neighbor))
 end
 
 function update_nhs!(neighborhood_search,
@@ -692,7 +693,7 @@ function update_nhs!(neighborhood_search,
     update!(neighborhood_search,
             current_coordinates(u_system, system),
             current_coordinates(u_neighbor, neighbor),
-            semi, points_moving=(true, true))
+            semi, points_moving=(true, true), eachindex_y=active_particles(neighbor))
 end
 
 function update_nhs!(neighborhood_search,
@@ -705,7 +706,7 @@ function update_nhs!(neighborhood_search,
     update!(neighborhood_search,
             current_coordinates(u_system, system),
             current_coordinates(u_neighbor, neighbor),
-            semi, points_moving=(true, true))
+            semi, points_moving=(true, true), eachindex_y=active_particles(neighbor))
 end
 
 function update_nhs!(neighborhood_search,
@@ -729,7 +730,7 @@ function update_nhs!(neighborhood_search,
     update!(neighborhood_search,
             current_coordinates(u_system, system),
             current_coordinates(u_neighbor, neighbor),
-            semi, points_moving=(true, true))
+            semi, points_moving=(true, true), eachindex_y=active_particles(neighbor))
 end
 
 function update_nhs!(neighborhood_search,
@@ -764,7 +765,8 @@ function update_nhs!(neighborhood_search,
     update!(neighborhood_search,
             current_coordinates(u_system, system),
             current_coordinates(u_neighbor, neighbor),
-            semi, points_moving=(system.ismoving[], true))
+            semi, points_moving=(system.ismoving[], true),
+            eachindex_y=active_particles(neighbor))
 end
 
 # This function is the same as the one above to avoid ambiguous dispatch when using `Union`
@@ -841,20 +843,22 @@ function update_nhs!(neighborhood_search,
 end
 
 # Forward to PointNeighbors.jl
-function update!(neighborhood_search, x, y, semi; points_moving=(true, false))
-    PointNeighbors.update!(neighborhood_search, x, y; points_moving,
+function update!(neighborhood_search, x, y, semi; points_moving=(true, false),
+                 eachindex_y=axes(y, 2))
+    PointNeighbors.update!(neighborhood_search, x, y; points_moving, eachindex_y,
                            parallelization_backend=semi.parallelization_backend)
 end
 
-function check_configuration(systems)
+function check_configuration(systems,
+                             nhs::Union{Nothing, PointNeighbors.AbstractNeighborhoodSearch})
     foreach_system(systems) do system
-        check_configuration(system, systems)
+        check_configuration(system, systems, nhs)
     end
 
     check_system_color(systems)
 end
 
-check_configuration(system, systems) = nothing
+check_configuration(system::System, systems, nhs) = nothing
 
 function check_system_color(systems)
     if any(system isa FluidSystem && !(system isa ParticlePackingSystem) &&
@@ -871,7 +875,7 @@ function check_system_color(systems)
     end
 end
 
-function check_configuration(fluid_system::FluidSystem, systems)
+function check_configuration(fluid_system::FluidSystem, systems, nhs)
     if !(fluid_system isa ParticlePackingSystem) && !isnothing(fluid_system.surface_tension)
         foreach_system(systems) do neighbor
             if neighbor isa FluidSystem && isnothing(fluid_system.surface_tension) &&
@@ -882,8 +886,8 @@ function check_configuration(fluid_system::FluidSystem, systems)
     end
 end
 
-function check_configuration(boundary_system::BoundarySPHSystem, systems)
-    (; boundary_model) = boundary_system
+function check_configuration(system::BoundarySPHSystem, systems, nhs)
+    (; boundary_model) = system
 
     foreach_system(systems) do neighbor
         if neighbor isa WeaklyCompressibleSPHSystem &&
@@ -895,7 +899,7 @@ function check_configuration(boundary_system::BoundarySPHSystem, systems)
     end
 end
 
-function check_configuration(system::TotalLagrangianSPHSystem, systems)
+function check_configuration(system::TotalLagrangianSPHSystem, systems, nhs)
     (; boundary_model) = system
 
     foreach_system(systems) do neighbor
@@ -912,12 +916,23 @@ function check_configuration(system::TotalLagrangianSPHSystem, systems)
     end
 end
 
-function check_configuration(system::OpenBoundarySPHSystem, systems)
+function check_configuration(system::OpenBoundarySPHSystem, systems,
+                             neighborhood_search::PointNeighbors.AbstractNeighborhoodSearch)
     (; boundary_model, boundary_zone) = system
 
+    fluid_system_index = findfirst(==(system.fluid_system), systems)
+
+    system.fluid_system_index[] = fluid_system_index
+
     if boundary_model isa BoundaryModelLastiwka &&
        boundary_zone isa BoundaryZone{BidirectionalFlow}
         throw(ArgumentError("`BoundaryModelLastiwka` needs a specific flow direction. " *
                             "Please specify inflow and outflow."))
     end
+
+    if first(PointNeighbors.requires_update(neighborhood_search))
+        throw(ArgumentError("`OpenBoundarySPHSystem` requires a neighborhood search " *
+                            "that does not need an update for the `x` coordinates (e.g. `GridNeighborhoodSearch`). " *
+                            "See the PointNeighbors.jl documentation for more details."))
+    end
 end
diff --git a/src/schemes/boundary/dummy_particles/dummy_particles.jl b/src/schemes/boundary/dummy_particles/dummy_particles.jl
@@ -502,6 +502,7 @@ end
 
     # This needs to be serial to avoid race conditions when writing into `system`
     foreach_point_neighbor(neighbor_system, system, neighbor_coords, system_coords, semi;
+                           points=each_moving_particle(neighbor_system),
                            parallelization_backend=SerialBackend()) do neighbor, particle,
                                                                        pos_diff, distance
         # Since neighbor and particle are switched