trixi-framework · LasNikas · Jun 6, 2025 · Apr 16, 2025 · Apr 16, 2025 · Apr 16, 2025
diff --git a/Project.toml b/Project.toml
@@ -54,7 +54,7 @@ KernelAbstractions = "0.9"
 MuladdMacro = "0.2"
 OrdinaryDiffEq = "6.91"
 OrdinaryDiffEqCore = "1"
-PointNeighbors = "0.6.1"
+PointNeighbors = "0.6.3"
 Polyester = "0.7.10"
 ReadVTK = "0.2"
 RecipesBase = "1"

diff --git a/examples/fluid/pipe_flow_2d.jl b/examples/fluid/pipe_flow_2d.jl
@@ -47,14 +47,16 @@ pipe = RectangularTank(particle_spacing, domain_size, boundary_size, fluid_densi
 # Shift pipe walls in negative x-direction for the inflow
 pipe.boundary.coordinates[1, :] .-= particle_spacing * open_boundary_layers
 
-n_buffer_particles = 4 * pipe.n_particles_per_dimension[2]^(ndims(pipe.fluid) - 1)
+NDIMS = ndims(pipe.fluid)
+
+n_buffer_particles = 4 * pipe.n_particles_per_dimension[2]^(NDIMS - 1)
 
 # ==========================================================================================
 # ==== Fluid
 wcsph = false
 
 smoothing_length = 1.5 * particle_spacing
-smoothing_kernel = WendlandC2Kernel{2}()
+smoothing_kernel = WendlandC2Kernel{NDIMS}()
 
 fluid_density_calculator = ContinuityDensity()
 
@@ -136,8 +138,15 @@ boundary_system = BoundarySPHSystem(pipe.boundary, boundary_model)
 
 # ==========================================================================================
 # ==== Simulation
+min_corner = minimum(pipe.boundary.coordinates .- particle_spacing, dims=2)
+max_corner = maximum(pipe.boundary.coordinates .+ particle_spacing, dims=2)
+
+nhs = GridNeighborhoodSearch{NDIMS}(; cell_list=FullGridCellList(; min_corner, max_corner),
+                                    update_strategy=ParallelUpdate())
+
 semi = Semidiscretization(fluid_system, open_boundary_in, open_boundary_out,
-                          boundary_system, parallelization_backend=PolyesterBackend())
+                          boundary_system, neighborhood_search=nhs,
+                          parallelization_backend=PolyesterBackend())
 
 ode = semidiscretize(semi, tspan)
 

diff --git a/examples/fluid/pipe_flow_3d.jl b/examples/fluid/pipe_flow_3d.jl
@@ -37,8 +37,8 @@ flow_direction = [1.0, 0.0, 0.0]
 trixi_include(@__MODULE__, joinpath(examples_dir(), "fluid", "pipe_flow_2d.jl"),
               domain_size=domain_size, boundary_size=boundary_size,
               flow_direction=flow_direction, faces=(false, false, true, true, true, true),
-              tspan=tspan, smoothing_kernel=WendlandC2Kernel{3}(),
-              reference_velocity=velocity_function3d,
+              tspan=tspan, reference_velocity=velocity_function3d,
+              open_boundary_layers=open_boundary_layers,
               plane_in=([0.0, 0.0, 0.0], [0.0, domain_size[2], 0.0],
                         [0.0, 0.0, domain_size[3]]),
               plane_out=([domain_size[1], 0.0, 0.0], [domain_size[1], domain_size[2], 0.0],

diff --git a/src/general/buffer.jl b/src/general/buffer.jl
@@ -1,15 +1,21 @@
-struct SystemBuffer{V}
-    active_particle :: Vector{Bool}
-    eachparticle    :: V # Vector{Int}
-    buffer_size     :: Int
-
-    function SystemBuffer(active_size, buffer_size::Integer)
-        # We cannot use a `BitVector` here, as writing to a `BitVector` is not thread-safe
-        active_particle = vcat(fill(true, active_size), fill(false, buffer_size))
-        eachparticle = collect(1:active_size)
+struct SystemBuffer{AP, APC, EP}
+    active_particle       :: AP # Vector{Bool}
+    active_particle_count :: APC
+    eachparticle          :: EP # Vector{Int}
+    buffer_size           :: Int
+end
 
-        return new{typeof(eachparticle)}(active_particle, eachparticle, buffer_size)
-    end
+function SystemBuffer(active_size, buffer_size::Integer)
+    # Using a `BitVector` is not an option as writing to it is not thread-safe.
+    # Also, to ensure thread-safe particle activation, we use an `atomic_cas` operation.
+    # Thus, `active_particle` is defined as a `Vector{UInt32}` because CUDA.jl
+    # does not support atomic operations on `Bool`.
+    # https://github.com/JuliaGPU/CUDA.jl/blob/2cc9285676a4cd28d0846ca62f0300c56d281d38/src/device/intrinsics/atomics.jl#L243
+    active_particle = vcat(fill(UInt32(true), active_size),
+                           fill(UInt32(false), buffer_size))
+    eachparticle = collect(eachindex(active_particle))
+
+    return SystemBuffer(active_particle, Ref(active_size), eachparticle, buffer_size)
 end
 
 allocate_buffer(initial_condition, ::Nothing) = initial_condition
@@ -18,7 +24,8 @@ function allocate_buffer(initial_condition, buffer::SystemBuffer)
     (; buffer_size) = buffer
 
     # Initialize particles far away from simulation domain
-    coordinates = fill(1e16, ndims(initial_condition), buffer_size)
+    coordinates = fill(eltype(initial_condition)(1e16), ndims(initial_condition),
+                       buffer_size)
 
     if all(rho -> isapprox(rho, first(initial_condition.density), atol=eps(), rtol=eps()),
            initial_condition.density)
@@ -34,55 +41,64 @@ function allocate_buffer(initial_condition, buffer::SystemBuffer)
     return union(initial_condition, buffer_ic)
 end
 
-@inline update_system_buffer!(buffer::Nothing) = buffer
+@inline update_system_buffer!(buffer::Nothing, semi) = buffer
 
 # TODO `resize` allocates. Find a non-allocating version
-@inline function update_system_buffer!(buffer::SystemBuffer)
+@inline function update_system_buffer!(buffer::SystemBuffer, semi)
     (; active_particle) = buffer
 
-    resize!(buffer.eachparticle, count(active_particle))
-
-    i = 1
-    for j in eachindex(active_particle)
-        if active_particle[j]
-            buffer.eachparticle[i] = j
-            i += 1
-        end
-    end
+    # TODO: Parallelize (see https://github.com/trixi-framework/TrixiParticles.jl/issues/810)
+    # Update the number of active particles and the active particle indices
+    buffer.active_particle_count[] = sum(active_particle)
+    buffer.eachparticle[1:buffer.active_particle_count[]] .= findall(x -> x == true,
+                                                                     active_particle)
 
     return buffer
 end
 
-@inline each_moving_particle(system, buffer) = buffer.eachparticle
+@inline each_moving_particle(system, buffer) = active_particles(system, buffer)
 
-@inline active_coordinates(u, system, buffer) = view(u, :, buffer.active_particle)
+@inline active_coordinates(u, system, buffer) = view(u, :, active_particles(system, buffer))
 
-@inline active_particles(system, buffer) = buffer.eachparticle
+@inline function active_particles(system, buffer)
+    return view(buffer.eachparticle, 1:buffer.active_particle_count[])
+end
 
 @inline function activate_next_particle(system)
     (; active_particle) = system.buffer
 
-    next_particle = findfirst(x -> !x, active_particle)
-
-    if isnothing(next_particle)
-        error("0 out of $(system.buffer.buffer_size) buffer particles available")
+    for particle in eachindex(active_particle)
+        if PointNeighbors.Atomix.@atomic(active_particle[particle]) == false
+            # After we go into this condition, another thread might still activate this particle
+            # before we do. Therefore, we use an atomic swap, which activates the particle,
+            # but also returns the old value.
+            # If the old value is `true`, the particle was active before and we need to continue.
+            # Note: This doesn't work with Metal.jl. No error is thrown, but the operation is simply ignored.
+            # An atomic compare-and-swap operation is probably implemented for Metal.jl here:
+            # https://github.com/JuliaGPU/Metal.jl/blob/caf299690aa52448ee72ffc5688939b157fc1ba2/src/device/intrinsics/atomics.jl#L42
+            was_active = PointNeighbors.Atomix.@atomicswap active_particle[particle] = true
+
+            if was_active == false
+                return particle
+            end
+        end
     end
 
-    active_particle[next_particle] = true
-
-    return next_particle
+    error("No buffer particles available")
 end
 
 @inline function deactivate_particle!(system, particle, u)
     (; active_particle) = system.buffer
 
-    active_particle[particle] = false
-
     # Set particle far away from simulation domain
     for dim in 1:ndims(system)
         # Inf or NaN causes instability outcome.
-        u[dim, particle] = 1e16
+        u[dim, particle] = eltype(system)(1e16)
     end
 
+    # `activate_next_particle!` and `deactivate_particle!` are never called on the same buffer inside a kernel,
+    # so we don't have any race conditions on this `active_particle` vector.
+    active_particle[particle] = false
+
     return system
 end
diff --git a/src/general/gpu.jl b/src/general/gpu.jl
@@ -17,6 +17,9 @@ Adapt.@adapt_structure BoundaryModelDummyParticles
 Adapt.@adapt_structure BoundaryModelMonaghanKajtar
 Adapt.@adapt_structure BoundaryMovement
 Adapt.@adapt_structure TotalLagrangianSPHSystem
+Adapt.@adapt_structure BoundaryZone
+Adapt.@adapt_structure SystemBuffer
+Adapt.@adapt_structure OpenBoundarySPHSystem
 
 KernelAbstractions.get_backend(::PtrArray) = KernelAbstractions.CPU()
 KernelAbstractions.get_backend(system::System) = KernelAbstractions.get_backend(system.mass)

diff --git a/src/general/semidiscretization.jl b/src/general/semidiscretization.jl
@@ -74,7 +74,7 @@ function Semidiscretization(systems::Union{System, Nothing}...;
 
     # Check e.g. that the boundary systems are using a state equation if EDAC is not used.
     # Other checks might be added here later.
-    check_configuration(systems)
+    check_configuration(systems, neighborhood_search)
 
     sizes_u = [u_nvariables(system) * n_moving_particles(system)
                for system in systems]
@@ -318,7 +318,17 @@ function semidiscretize(semi, tspan; reset_threads=true)
         # When e.g. `parallelization_backend=CUDABackend()`, this will convert all `Array`s
         # to `CuArray`s, moving data to the GPU.
         # See the comments in general/gpu.jl for more details.
-        semi_new = Adapt.adapt(semi.parallelization_backend, semi)
+        semi_ = Adapt.adapt(semi.parallelization_backend, semi)
+
+        # We now have a new `Semidiscretization` with new systems.
+        # This means that systems linking to other systems still point to old systems.
+        # Therefore, we have to re-link them, which yields yet another `Semidiscretization`.
+        # Note that this re-creates systems containing links, so it only works as long
+        # as systems don't link to other systems containing links.
+        semi_new = Semidiscretization(set_system_links.(semi_.systems, Ref(semi_)),
+                                      semi_.ranges_u, semi_.ranges_v,
+                                      semi_.neighborhood_searches,
+                                      semi_.parallelization_backend)
     else
         semi_new = semi
     end
@@ -375,7 +385,8 @@ function initialize_neighborhood_searches!(semi)
         foreach_system(semi) do neighbor
             PointNeighbors.initialize!(get_neighborhood_search(system, neighbor, semi),
                                        initial_coordinates(system),
-                                       initial_coordinates(neighbor))
+                                       initial_coordinates(neighbor),
+                                       eachindex_y=active_particles(neighbor))
         end
     end
 
@@ -668,7 +679,7 @@ function update_nhs!(neighborhood_search,
     update!(neighborhood_search,
             current_coordinates(u_system, system),
             current_coordinates(u_neighbor, neighbor),
-            semi, points_moving=(true, true))
+            semi, points_moving=(true, true), eachindex_y=active_particles(neighbor))
 end
 
 function update_nhs!(neighborhood_search,
@@ -691,7 +702,7 @@ function update_nhs!(neighborhood_search,
     update!(neighborhood_search,
             current_coordinates(u_system, system),
             current_coordinates(u_neighbor, neighbor),
-            semi, points_moving=(true, true))
+            semi, points_moving=(true, true), eachindex_y=active_particles(neighbor))
 end
 
 function update_nhs!(neighborhood_search,
@@ -704,7 +715,7 @@ function update_nhs!(neighborhood_search,
     update!(neighborhood_search,
             current_coordinates(u_system, system),
             current_coordinates(u_neighbor, neighbor),
-            semi, points_moving=(true, true))
+            semi, points_moving=(true, true), eachindex_y=active_particles(neighbor))
 end
 
 function update_nhs!(neighborhood_search,
@@ -728,7 +739,7 @@ function update_nhs!(neighborhood_search,
     update!(neighborhood_search,
             current_coordinates(u_system, system),
             current_coordinates(u_neighbor, neighbor),
-            semi, points_moving=(true, true))
+            semi, points_moving=(true, true), eachindex_y=active_particles(neighbor))
 end
 
 function update_nhs!(neighborhood_search,
@@ -763,7 +774,8 @@ function update_nhs!(neighborhood_search,
     update!(neighborhood_search,
             current_coordinates(u_system, system),
             current_coordinates(u_neighbor, neighbor),
-            semi, points_moving=(system.ismoving[], true))
+            semi, points_moving=(system.ismoving[], true),
+            eachindex_y=active_particles(neighbor))
 end
 
 # This function is the same as the one above to avoid ambiguous dispatch when using `Union`
@@ -840,20 +852,22 @@ function update_nhs!(neighborhood_search,
 end
 
 # Forward to PointNeighbors.jl
-function update!(neighborhood_search, x, y, semi; points_moving=(true, false))
-    PointNeighbors.update!(neighborhood_search, x, y; points_moving,
+function update!(neighborhood_search, x, y, semi; points_moving=(true, false),
+                 eachindex_y=axes(y, 2))
+    PointNeighbors.update!(neighborhood_search, x, y; points_moving, eachindex_y,
                            parallelization_backend=semi.parallelization_backend)
 end
 
-function check_configuration(systems)
+function check_configuration(systems,
+                             nhs::Union{Nothing, PointNeighbors.AbstractNeighborhoodSearch})
     foreach_system(systems) do system
-        check_configuration(system, systems)
+        check_configuration(system, systems, nhs)
     end
 
     check_system_color(systems)
 end
 
-check_configuration(system, systems) = nothing
+check_configuration(system::System, systems, nhs) = nothing
 
 function check_system_color(systems)
     if any(system isa FluidSystem && !(system isa ParticlePackingSystem) &&
@@ -870,7 +884,7 @@ function check_system_color(systems)
     end
 end
 
-function check_configuration(fluid_system::FluidSystem, systems)
+function check_configuration(fluid_system::FluidSystem, systems, nhs)
     if !(fluid_system isa ParticlePackingSystem) && !isnothing(fluid_system.surface_tension)
         foreach_system(systems) do neighbor
             if neighbor isa FluidSystem && isnothing(fluid_system.surface_tension) &&
@@ -881,8 +895,8 @@ function check_configuration(fluid_system::FluidSystem, systems)
     end
 end
 
-function check_configuration(boundary_system::BoundarySPHSystem, systems)
-    (; boundary_model) = boundary_system
+function check_configuration(system::BoundarySPHSystem, systems, nhs)
+    (; boundary_model) = system
 
     foreach_system(systems) do neighbor
         if neighbor isa WeaklyCompressibleSPHSystem &&
@@ -894,7 +908,7 @@ function check_configuration(boundary_system::BoundarySPHSystem, systems)
     end
 end
 
-function check_configuration(system::TotalLagrangianSPHSystem, systems)
+function check_configuration(system::TotalLagrangianSPHSystem, systems, nhs)
     (; boundary_model) = system
 
     foreach_system(systems) do neighbor
@@ -911,12 +925,50 @@ function check_configuration(system::TotalLagrangianSPHSystem, systems)
     end
 end
 
-function check_configuration(system::OpenBoundarySPHSystem, systems)
+function check_configuration(system::OpenBoundarySPHSystem, systems,
+                             neighborhood_search::PointNeighbors.AbstractNeighborhoodSearch)
     (; boundary_model, boundary_zone) = system
 
+    # Store index of the fluid system. This is necessary for re-linking
+    # in case we use Adapt.jl to create a new semidiscretization.
+    fluid_system_index = findfirst(==(system.fluid_system), systems)
+    system.fluid_system_index[] = fluid_system_index
+
     if boundary_model isa BoundaryModelLastiwka &&
        boundary_zone isa BoundaryZone{BidirectionalFlow}
         throw(ArgumentError("`BoundaryModelLastiwka` needs a specific flow direction. " *
                             "Please specify inflow and outflow."))
     end
+
+    if first(PointNeighbors.requires_update(neighborhood_search))
+        throw(ArgumentError("`OpenBoundarySPHSystem` requires a neighborhood search " *
+                            "that does not require an update for the first set of coordinates (e.g. `GridNeighborhoodSearch`). " *
+                            "See the PointNeighbors.jl documentation for more details."))
+    end
+end
+
+# After `adapt`, the system type information may change.
+# This means that systems linking to other systems still point to old systems.
+# Therefore, we have to re-link them based on the stored system index.
+set_system_links(system, semi) = system
+
+function set_system_links(system::OpenBoundarySPHSystem, semi)
+    fluid_system = semi.systems[system.fluid_system_index[]]
+
+    return OpenBoundarySPHSystem(system.boundary_model,
+                                 system.initial_condition,
+                                 fluid_system, # link to fluid system
+                                 system.fluid_system_index,
+                                 system.smoothing_length,
+                                 system.mass,
+                                 system.density,
+                                 system.volume,
+                                 system.pressure,
+                                 system.boundary_zone,
+                                 system.reference_velocity,
+                                 system.reference_pressure,
+                                 system.reference_density,
+                                 system.buffer,
+                                 system.update_callback_used,
+                                 system.cache)
 end
diff --git a/src/schemes/boundary/dummy_particles/dummy_particles.jl b/src/schemes/boundary/dummy_particles/dummy_particles.jl
@@ -502,6 +502,7 @@ end
 
     # This needs to be serial to avoid race conditions when writing into `system`
     foreach_point_neighbor(neighbor_system, system, neighbor_coords, system_coords, semi;
+                           points=each_moving_particle(neighbor_system),
                            parallelization_backend=SerialBackend()) do neighbor, particle,
                                                                        pos_diff, distance
         # Since neighbor and particle are switched