Skip to content

Open boundaries - NHS fix and thread supported loop #773

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
bb14bb4
fix
Apr 16, 2025
ead8aa2
threaded loop
Apr 16, 2025
1e8fe3d
revise mirroring
Apr 16, 2025
5ece6cb
adapt example
Apr 16, 2025
7fea2d7
make gpu compatible
Apr 17, 2025
d9736bc
Merge branch 'main' into open-boundaries-nhs-fix-patch-2
May 7, 2025
fba3ee0
Merge branch 'main' into open-boundaries-nhs-fix-patch-2
May 8, 2025
03fe742
fix check configuration
May 8, 2025
63b60a1
adapt open boundaries
May 9, 2025
1de092a
works on CPU but not on GPU
May 12, 2025
7cfe8b2
first working prototype on GPU
May 13, 2025
7933680
Merge branch 'main' into open-boundaries-nhs-fix-patch-2
May 13, 2025
278e5d9
rm ambiguous adapt
May 13, 2025
a4cac23
undo avoiding atomic operation
May 13, 2025
d0a70af
fix tests
May 13, 2025
b7f17c8
Merge branch 'main' into open-boundaries-nhs-fix-patch-2
May 14, 2025
562f1e5
fix
May 14, 2025
53b2f69
formatting
May 14, 2025
e50f6b8
formatting again
May 14, 2025
cb9a3a8
Merge branch 'main' into open-boundaries-nhs-fix-patch-2
May 15, 2025
4b1a442
fix slow buffer update
May 15, 2025
edd1a91
fix mirroring
May 17, 2025
e3401a3
fix mirroring again
May 17, 2025
e2059a6
add important TODOs
May 17, 2025
40d9bdf
Merge branch 'main' into open-boundaries-nhs-fix-patch-2
May 19, 2025
30dfec0
pinv instead of I
May 20, 2025
a9af368
use Ref() for the closure
May 20, 2025
bc869bd
fix tests
May 20, 2025
f506bf3
apply formatter
May 20, 2025
1bae3ce
fix
May 20, 2025
c6b7e41
Merge branch 'main' into open-boundaries-nhs-fix-patch-2
May 21, 2025
2255d87
add comment
May 22, 2025
e9874a5
Merge branch 'main' into open-boundaries-nhs-fix-patch-2
May 22, 2025
8ff54d5
Merge branch 'main' into open-boundaries-nhs-fix-patch-2
Jun 2, 2025
e2bcba8
add comment
Jun 3, 2025
395071d
fix allocation bug
Jun 3, 2025
6aca12b
update PointNeighbors version bound
Jun 3, 2025
ddf1538
implement suggestion
Jun 4, 2025
d71e14a
apply formatter
Jun 4, 2025
8903c5f
Merge branch 'main' into open-boundaries-nhs-fix-patch-2
LasNikas Jun 5, 2025
9663d58
add comment
Jun 5, 2025
de26d02
add comment
Jun 5, 2025
5fc3c7a
revert comment
Jun 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions examples/fluid/pipe_flow_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,16 @@ pipe = RectangularTank(particle_spacing, domain_size, boundary_size, fluid_densi
# Shift pipe walls in negative x-direction for the inflow
pipe.boundary.coordinates[1, :] .-= particle_spacing * open_boundary_layers

n_buffer_particles = 4 * pipe.n_particles_per_dimension[2]^(ndims(pipe.fluid) - 1)
NDIMS = ndims(pipe.fluid)

n_buffer_particles = 4 * pipe.n_particles_per_dimension[2]^(NDIMS - 1)

# ==========================================================================================
# ==== Fluid
wcsph = false

smoothing_length = 1.5 * particle_spacing
smoothing_kernel = WendlandC2Kernel{2}()
smoothing_kernel = WendlandC2Kernel{NDIMS}()

fluid_density_calculator = ContinuityDensity()

Expand Down Expand Up @@ -136,8 +138,15 @@ boundary_system = BoundarySPHSystem(pipe.boundary, boundary_model)

# ==========================================================================================
# ==== Simulation
min_corner = minimum(pipe.boundary.coordinates .- particle_spacing, dims=2)
max_corner = maximum(pipe.boundary.coordinates .+ particle_spacing, dims=2)

nhs = GridNeighborhoodSearch{NDIMS}(; cell_list=FullGridCellList(; min_corner, max_corner),
update_strategy=ParallelUpdate())

semi = Semidiscretization(fluid_system, open_boundary_in, open_boundary_out,
boundary_system, parallelization_backend=PolyesterBackend())
boundary_system, neighborhood_search=nhs,
parallelization_backend=PolyesterBackend())

ode = semidiscretize(semi, tspan)

Expand Down
5 changes: 3 additions & 2 deletions examples/fluid/pipe_flow_3d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ flow_direction = [1.0, 0.0, 0.0]
trixi_include(@__MODULE__, joinpath(examples_dir(), "fluid", "pipe_flow_2d.jl"),
domain_size=domain_size, boundary_size=boundary_size,
flow_direction=flow_direction, faces=(false, false, true, true, true, true),
tspan=tspan, smoothing_kernel=WendlandC2Kernel{3}(),
reference_velocity=velocity_function3d,
tspan=tspan, reference_velocity=velocity_function3d,
min_corner=.-(domain_size ./ 2) .- open_boundary_layers * particle_spacing,
max_corner=domain_size .+ open_boundary_layers * particle_spacing,
plane_in=([0.0, 0.0, 0.0], [0.0, domain_size[2], 0.0],
[0.0, 0.0, domain_size[3]]),
plane_out=([domain_size[1], 0.0, 0.0], [domain_size[1], domain_size[2], 0.0],
Expand Down
86 changes: 51 additions & 35 deletions src/general/buffer.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
struct SystemBuffer{V}
active_particle :: Vector{Bool}
eachparticle :: V # Vector{Int}
buffer_size :: Int
struct SystemBuffer{AP, APC, EP}
active_particle :: AP # Vector{Bool}
active_particle_count :: APC
eachparticle :: EP # Vector{Int}
buffer_size :: Int
end

function SystemBuffer(active_size, buffer_size::Integer)
# We cannot use a `BitVector` here, as writing to a `BitVector` is not thread-safe
active_particle = vcat(fill(true, active_size), fill(false, buffer_size))
eachparticle = collect(1:active_size)
function SystemBuffer(active_size, buffer_size::Integer)
# Using a `BitVector` is not an option as writing to it is not thread-safe.
# Also, to ensure thread-safe particle activation, we use an `atomic_cas` operation.
# Thus, `active_particle` is defined as a `Vector{UInt32}` because CUDA.jl
# does not support atomic operations on `Bool`.
# https://github.com/JuliaGPU/CUDA.jl/blob/2cc9285676a4cd28d0846ca62f0300c56d281d38/src/device/intrinsics/atomics.jl#L243
active_particle = vcat(fill(UInt32(1), active_size), fill(UInt32(0), buffer_size))
eachparticle = collect(eachindex(active_particle))

return new{typeof(eachparticle)}(active_particle, eachparticle, buffer_size)
end
return SystemBuffer(active_particle, Ref(active_size), eachparticle, buffer_size)
end

allocate_buffer(initial_condition, ::Nothing) = initial_condition
Expand All @@ -18,7 +23,8 @@ function allocate_buffer(initial_condition, buffer::SystemBuffer)
(; buffer_size) = buffer

# Initialize particles far away from simulation domain
coordinates = fill(1e16, ndims(initial_condition), buffer_size)
coordinates = fill(eltype(initial_condition)(1e16), ndims(initial_condition),
buffer_size)

if all(rho -> isapprox(rho, first(initial_condition.density), atol=eps(), rtol=eps()),
initial_condition.density)
Expand All @@ -34,55 +40,65 @@ function allocate_buffer(initial_condition, buffer::SystemBuffer)
return union(initial_condition, buffer_ic)
end

@inline update_system_buffer!(buffer::Nothing) = buffer
@inline update_system_buffer!(buffer::Nothing, semi) = buffer

# TODO `resize` allocates. Find a non-allocating version
@inline function update_system_buffer!(buffer::SystemBuffer)
@inline function update_system_buffer!(buffer::SystemBuffer, semi)
(; active_particle) = buffer

resize!(buffer.eachparticle, count(active_particle))

i = 1
for j in eachindex(active_particle)
if active_particle[j]
buffer.eachparticle[i] = j
i += 1
end
end
# TODO: Parallelize (see https://github.com/trixi-framework/TrixiParticles.jl/issues/810)
buffer.active_particle_count[] = sum(active_particle)
buffer.eachparticle[1:buffer.active_particle_count[]] .= findall(x -> x == true,
active_particle)

return buffer
end

@inline each_moving_particle(system, buffer) = buffer.eachparticle
@inline each_moving_particle(system, buffer) = active_particles(system, buffer)

@inline active_coordinates(u, system, buffer) = view(u, :, buffer.active_particle)
@inline active_coordinates(u, system, buffer) = view(u, :, active_particles(system, buffer))

@inline active_particles(system, buffer) = buffer.eachparticle
@inline function active_particles(system, buffer)
return view(buffer.eachparticle, 1:buffer.active_particle_count[])
end

@inline function activate_next_particle(system)
(; active_particle) = system.buffer

next_particle = findfirst(x -> !x, active_particle)

if isnothing(next_particle)
error("0 out of $(system.buffer.buffer_size) buffer particles available")
for particle in eachindex(active_particle)
if PointNeighbors.Atomix.@atomic(active_particle[particle]) == false
# Activate this particle. The return value is the old value.
# If this is `true`, the particle was active before and we need to continue.
# This happens because a particle might have been activated by another thread
# between the condition and the line below.
# Note: This doesn't work with Metal.jl. No error is thrown, but the operation is simply ignored.
# An atomic compare-and-swap operation is probably implemented for Metal.jl here:
# https://github.com/JuliaGPU/Metal.jl/blob/caf299690aa52448ee72ffc5688939b157fc1ba2/src/device/intrinsics/atomics.jl#L42
was_active = PointNeighbors.Atomix.@atomicswap active_particle[particle] = true

if was_active == false
return particle
end
end
end

active_particle[next_particle] = true

return next_particle
error("No buffer particles available")
end

@inline function deactivate_particle!(system, particle, u)
(; active_particle) = system.buffer

active_particle[particle] = false

# Set particle far away from simulation domain
for dim in 1:ndims(system)
# Inf or NaN causes instability outcome.
u[dim, particle] = 1e16
u[dim, particle] = eltype(system)(1e16)
end

# To ensure thread safety, the buffer particle is only released for reuse
# after the write operation (`u`) has been completed.
# This guarantees that no other thread can access the active particle prematurely,
# avoiding race conditions.
active_particle[particle] = false

return system
end
21 changes: 21 additions & 0 deletions src/general/gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,27 @@ Adapt.@adapt_structure BoundaryModelDummyParticles
Adapt.@adapt_structure BoundaryModelMonaghanKajtar
Adapt.@adapt_structure BoundaryMovement
Adapt.@adapt_structure TotalLagrangianSPHSystem
Adapt.@adapt_structure BoundaryZone
Adapt.@adapt_structure SystemBuffer

function Adapt.adapt_structure(to, system::OpenBoundarySPHSystem)
return OpenBoundarySPHSystem(Adapt.adapt(to, system.boundary_model),
Adapt.adapt(to, system.initial_condition),
nothing, # Do not adapt `fluid_system``
Adapt.adapt(to, system.fluid_system_index),
Adapt.adapt(to, system.smoothing_length),
Adapt.adapt(to, system.mass),
Adapt.adapt(to, system.density),
Adapt.adapt(to, system.volume),
Adapt.adapt(to, system.pressure),
Adapt.adapt(to, system.boundary_zone),
Adapt.adapt(to, system.reference_velocity),
Adapt.adapt(to, system.reference_pressure),
Adapt.adapt(to, system.reference_density),
Adapt.adapt(to, system.buffer),
Adapt.adapt(to, system.update_callback_used),
Adapt.adapt(to, system.cache))
end

KernelAbstractions.get_backend(::PtrArray) = KernelAbstractions.CPU()
KernelAbstractions.get_backend(system::System) = KernelAbstractions.get_backend(system.mass)
Expand Down
49 changes: 32 additions & 17 deletions src/general/semidiscretization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ function Semidiscretization(systems::Union{System, Nothing}...;

# Check e.g. that the boundary systems are using a state equation if EDAC is not used.
# Other checks might be added here later.
check_configuration(systems)
check_configuration(systems, neighborhood_search)

sizes_u = [u_nvariables(system) * n_moving_particles(system)
for system in systems]
Expand Down Expand Up @@ -376,7 +376,8 @@ function initialize_neighborhood_searches!(semi)
foreach_system(semi) do neighbor
PointNeighbors.initialize!(get_neighborhood_search(system, neighbor, semi),
initial_coordinates(system),
initial_coordinates(neighbor))
initial_coordinates(neighbor),
eachindex_y=active_particles(neighbor))
end
end

Expand Down Expand Up @@ -669,7 +670,7 @@ function update_nhs!(neighborhood_search,
update!(neighborhood_search,
current_coordinates(u_system, system),
current_coordinates(u_neighbor, neighbor),
semi, points_moving=(true, true))
semi, points_moving=(true, true), eachindex_y=active_particles(neighbor))
end

function update_nhs!(neighborhood_search,
Expand All @@ -692,7 +693,7 @@ function update_nhs!(neighborhood_search,
update!(neighborhood_search,
current_coordinates(u_system, system),
current_coordinates(u_neighbor, neighbor),
semi, points_moving=(true, true))
semi, points_moving=(true, true), eachindex_y=active_particles(neighbor))
end

function update_nhs!(neighborhood_search,
Expand All @@ -705,7 +706,7 @@ function update_nhs!(neighborhood_search,
update!(neighborhood_search,
current_coordinates(u_system, system),
current_coordinates(u_neighbor, neighbor),
semi, points_moving=(true, true))
semi, points_moving=(true, true), eachindex_y=active_particles(neighbor))
end

function update_nhs!(neighborhood_search,
Expand All @@ -729,7 +730,7 @@ function update_nhs!(neighborhood_search,
update!(neighborhood_search,
current_coordinates(u_system, system),
current_coordinates(u_neighbor, neighbor),
semi, points_moving=(true, true))
semi, points_moving=(true, true), eachindex_y=active_particles(neighbor))
end

function update_nhs!(neighborhood_search,
Expand Down Expand Up @@ -764,7 +765,8 @@ function update_nhs!(neighborhood_search,
update!(neighborhood_search,
current_coordinates(u_system, system),
current_coordinates(u_neighbor, neighbor),
semi, points_moving=(system.ismoving[], true))
semi, points_moving=(system.ismoving[], true),
eachindex_y=active_particles(neighbor))
end

# This function is the same as the one above to avoid ambiguous dispatch when using `Union`
Expand Down Expand Up @@ -841,20 +843,22 @@ function update_nhs!(neighborhood_search,
end

# Forward to PointNeighbors.jl
function update!(neighborhood_search, x, y, semi; points_moving=(true, false))
PointNeighbors.update!(neighborhood_search, x, y; points_moving,
function update!(neighborhood_search, x, y, semi; points_moving=(true, false),
eachindex_y=axes(y, 2))
PointNeighbors.update!(neighborhood_search, x, y; points_moving, eachindex_y,
parallelization_backend=semi.parallelization_backend)
end

function check_configuration(systems)
function check_configuration(systems,
nhs::Union{Nothing, PointNeighbors.AbstractNeighborhoodSearch})
foreach_system(systems) do system
check_configuration(system, systems)
check_configuration(system, systems, nhs)
end

check_system_color(systems)
end

check_configuration(system, systems) = nothing
check_configuration(system::System, systems, nhs) = nothing

function check_system_color(systems)
if any(system isa FluidSystem && !(system isa ParticlePackingSystem) &&
Expand All @@ -871,7 +875,7 @@ function check_system_color(systems)
end
end

function check_configuration(fluid_system::FluidSystem, systems)
function check_configuration(fluid_system::FluidSystem, systems, nhs)
if !(fluid_system isa ParticlePackingSystem) && !isnothing(fluid_system.surface_tension)
foreach_system(systems) do neighbor
if neighbor isa FluidSystem && isnothing(fluid_system.surface_tension) &&
Expand All @@ -882,8 +886,8 @@ function check_configuration(fluid_system::FluidSystem, systems)
end
end

function check_configuration(boundary_system::BoundarySPHSystem, systems)
(; boundary_model) = boundary_system
function check_configuration(system::BoundarySPHSystem, systems, nhs)
(; boundary_model) = system

foreach_system(systems) do neighbor
if neighbor isa WeaklyCompressibleSPHSystem &&
Expand All @@ -895,7 +899,7 @@ function check_configuration(boundary_system::BoundarySPHSystem, systems)
end
end

function check_configuration(system::TotalLagrangianSPHSystem, systems)
function check_configuration(system::TotalLagrangianSPHSystem, systems, nhs)
(; boundary_model) = system

foreach_system(systems) do neighbor
Expand All @@ -912,12 +916,23 @@ function check_configuration(system::TotalLagrangianSPHSystem, systems)
end
end

function check_configuration(system::OpenBoundarySPHSystem, systems)
function check_configuration(system::OpenBoundarySPHSystem, systems,
neighborhood_search::PointNeighbors.AbstractNeighborhoodSearch)
(; boundary_model, boundary_zone) = system

fluid_system_index = findfirst(==(system.fluid_system), systems)

system.fluid_system_index[] = fluid_system_index

if boundary_model isa BoundaryModelLastiwka &&
boundary_zone isa BoundaryZone{BidirectionalFlow}
throw(ArgumentError("`BoundaryModelLastiwka` needs a specific flow direction. " *
"Please specify inflow and outflow."))
end

if first(PointNeighbors.requires_update(neighborhood_search))
throw(ArgumentError("`OpenBoundarySPHSystem` requires a neighborhood search " *
"that does not need an update for the `x` coordinates (e.g. `GridNeighborhoodSearch`). " *
"See the PointNeighbors.jl documentation for more details."))
end
end
1 change: 1 addition & 0 deletions src/schemes/boundary/dummy_particles/dummy_particles.jl
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,7 @@ end

# This needs to be serial to avoid race conditions when writing into `system`
foreach_point_neighbor(neighbor_system, system, neighbor_coords, system_coords, semi;
points=each_moving_particle(neighbor_system),
parallelization_backend=SerialBackend()) do neighbor, particle,
pos_diff, distance
# Since neighbor and particle are switched
Expand Down
Loading
Loading