Skip to content

Open boundaries - NHS fix and thread supported loop #773

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
bb14bb4
fix
Apr 16, 2025
ead8aa2
threaded loop
Apr 16, 2025
1e8fe3d
revise mirroring
Apr 16, 2025
5ece6cb
adapt example
Apr 16, 2025
7fea2d7
make gpu compatible
Apr 17, 2025
d9736bc
Merge branch 'main' into open-boundaries-nhs-fix-patch-2
May 7, 2025
fba3ee0
Merge branch 'main' into open-boundaries-nhs-fix-patch-2
May 8, 2025
03fe742
fix check configuration
May 8, 2025
63b60a1
adapt open boundaries
May 9, 2025
1de092a
works on CPU but not on GPU
May 12, 2025
7cfe8b2
first working prototype on GPU
May 13, 2025
7933680
Merge branch 'main' into open-boundaries-nhs-fix-patch-2
May 13, 2025
278e5d9
rm ambiguous adapt
May 13, 2025
a4cac23
undo avoiding atomic operation
May 13, 2025
d0a70af
fix tests
May 13, 2025
b7f17c8
Merge branch 'main' into open-boundaries-nhs-fix-patch-2
May 14, 2025
562f1e5
fix
May 14, 2025
53b2f69
formatting
May 14, 2025
e50f6b8
formatting again
May 14, 2025
cb9a3a8
Merge branch 'main' into open-boundaries-nhs-fix-patch-2
May 15, 2025
4b1a442
fix slow buffer update
May 15, 2025
edd1a91
fix mirroring
May 17, 2025
e3401a3
fix mirroring again
May 17, 2025
e2059a6
add important TODOs
May 17, 2025
40d9bdf
Merge branch 'main' into open-boundaries-nhs-fix-patch-2
May 19, 2025
30dfec0
pinv instead of I
May 20, 2025
a9af368
use Ref() for the closure
May 20, 2025
bc869bd
fix tests
May 20, 2025
f506bf3
apply formatter
May 20, 2025
1bae3ce
fix
May 20, 2025
c6b7e41
Merge branch 'main' into open-boundaries-nhs-fix-patch-2
May 21, 2025
2255d87
add comment
May 22, 2025
e9874a5
Merge branch 'main' into open-boundaries-nhs-fix-patch-2
May 22, 2025
8ff54d5
Merge branch 'main' into open-boundaries-nhs-fix-patch-2
Jun 2, 2025
e2bcba8
add comment
Jun 3, 2025
395071d
fix allocation bug
Jun 3, 2025
6aca12b
update PointNeighbors version bound
Jun 3, 2025
ddf1538
implement suggestion
Jun 4, 2025
d71e14a
apply formatter
Jun 4, 2025
8903c5f
Merge branch 'main' into open-boundaries-nhs-fix-patch-2
LasNikas Jun 5, 2025
9663d58
add comment
Jun 5, 2025
de26d02
add comment
Jun 5, 2025
5fc3c7a
revert comment
Jun 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ KernelAbstractions = "0.9"
MuladdMacro = "0.2"
OrdinaryDiffEq = "6.91"
OrdinaryDiffEqCore = "1"
PointNeighbors = "0.6.1"
PointNeighbors = "0.6.3"
Polyester = "0.7.10"
ReadVTK = "0.2"
RecipesBase = "1"
Expand Down
15 changes: 12 additions & 3 deletions examples/fluid/pipe_flow_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,16 @@ pipe = RectangularTank(particle_spacing, domain_size, boundary_size, fluid_densi
# Shift pipe walls in negative x-direction for the inflow
pipe.boundary.coordinates[1, :] .-= particle_spacing * open_boundary_layers

n_buffer_particles = 4 * pipe.n_particles_per_dimension[2]^(ndims(pipe.fluid) - 1)
NDIMS = ndims(pipe.fluid)

n_buffer_particles = 4 * pipe.n_particles_per_dimension[2]^(NDIMS - 1)

# ==========================================================================================
# ==== Fluid
wcsph = false

smoothing_length = 1.5 * particle_spacing
smoothing_kernel = WendlandC2Kernel{2}()
smoothing_kernel = WendlandC2Kernel{NDIMS}()

fluid_density_calculator = ContinuityDensity()

Expand Down Expand Up @@ -136,8 +138,15 @@ boundary_system = BoundarySPHSystem(pipe.boundary, boundary_model)

# ==========================================================================================
# ==== Simulation
min_corner = minimum(pipe.boundary.coordinates .- particle_spacing, dims=2)
max_corner = maximum(pipe.boundary.coordinates .+ particle_spacing, dims=2)

nhs = GridNeighborhoodSearch{NDIMS}(; cell_list=FullGridCellList(; min_corner, max_corner),
update_strategy=ParallelUpdate())

semi = Semidiscretization(fluid_system, open_boundary_in, open_boundary_out,
boundary_system, parallelization_backend=PolyesterBackend())
boundary_system, neighborhood_search=nhs,
parallelization_backend=PolyesterBackend())

ode = semidiscretize(semi, tspan)

Expand Down
4 changes: 2 additions & 2 deletions examples/fluid/pipe_flow_3d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ flow_direction = [1.0, 0.0, 0.0]
trixi_include(@__MODULE__, joinpath(examples_dir(), "fluid", "pipe_flow_2d.jl"),
domain_size=domain_size, boundary_size=boundary_size,
flow_direction=flow_direction, faces=(false, false, true, true, true, true),
tspan=tspan, smoothing_kernel=WendlandC2Kernel{3}(),
reference_velocity=velocity_function3d,
tspan=tspan, reference_velocity=velocity_function3d,
open_boundary_layers=open_boundary_layers,
plane_in=([0.0, 0.0, 0.0], [0.0, domain_size[2], 0.0],
[0.0, 0.0, domain_size[3]]),
plane_out=([domain_size[1], 0.0, 0.0], [domain_size[1], domain_size[2], 0.0],
Expand Down
88 changes: 52 additions & 36 deletions src/general/buffer.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
struct SystemBuffer{V}
active_particle :: Vector{Bool}
eachparticle :: V # Vector{Int}
buffer_size :: Int

function SystemBuffer(active_size, buffer_size::Integer)
# We cannot use a `BitVector` here, as writing to a `BitVector` is not thread-safe
active_particle = vcat(fill(true, active_size), fill(false, buffer_size))
eachparticle = collect(1:active_size)
struct SystemBuffer{AP, APC, EP}
active_particle :: AP # Vector{Bool}
active_particle_count :: APC
eachparticle :: EP # Vector{Int}
buffer_size :: Int
end

return new{typeof(eachparticle)}(active_particle, eachparticle, buffer_size)
end
function SystemBuffer(active_size, buffer_size::Integer)
# Using a `BitVector` is not an option as writing to it is not thread-safe.
# Also, to ensure thread-safe particle activation, we use an `atomic_cas` operation.
# Thus, `active_particle` is defined as a `Vector{UInt32}` because CUDA.jl
# does not support atomic operations on `Bool`.
# https://github.com/JuliaGPU/CUDA.jl/blob/2cc9285676a4cd28d0846ca62f0300c56d281d38/src/device/intrinsics/atomics.jl#L243
active_particle = vcat(fill(UInt32(true), active_size),
fill(UInt32(false), buffer_size))
eachparticle = collect(eachindex(active_particle))

return SystemBuffer(active_particle, Ref(active_size), eachparticle, buffer_size)
end

allocate_buffer(initial_condition, ::Nothing) = initial_condition
Expand All @@ -18,7 +24,8 @@ function allocate_buffer(initial_condition, buffer::SystemBuffer)
(; buffer_size) = buffer

# Initialize particles far away from simulation domain
coordinates = fill(1e16, ndims(initial_condition), buffer_size)
coordinates = fill(eltype(initial_condition)(1e16), ndims(initial_condition),
buffer_size)

if all(rho -> isapprox(rho, first(initial_condition.density), atol=eps(), rtol=eps()),
initial_condition.density)
Expand All @@ -34,55 +41,64 @@ function allocate_buffer(initial_condition, buffer::SystemBuffer)
return union(initial_condition, buffer_ic)
end

@inline update_system_buffer!(buffer::Nothing) = buffer
@inline update_system_buffer!(buffer::Nothing, semi) = buffer

# TODO `resize` allocates. Find a non-allocating version
@inline function update_system_buffer!(buffer::SystemBuffer)
@inline function update_system_buffer!(buffer::SystemBuffer, semi)
(; active_particle) = buffer

resize!(buffer.eachparticle, count(active_particle))

i = 1
for j in eachindex(active_particle)
if active_particle[j]
buffer.eachparticle[i] = j
i += 1
end
end
# TODO: Parallelize (see https://github.com/trixi-framework/TrixiParticles.jl/issues/810)
# Update the number of active particles and the active particle indices
buffer.active_particle_count[] = sum(active_particle)
buffer.eachparticle[1:buffer.active_particle_count[]] .= findall(x -> x == true,
active_particle)

return buffer
end

@inline each_moving_particle(system, buffer) = buffer.eachparticle
@inline each_moving_particle(system, buffer) = active_particles(system, buffer)

@inline active_coordinates(u, system, buffer) = view(u, :, buffer.active_particle)
@inline active_coordinates(u, system, buffer) = view(u, :, active_particles(system, buffer))

@inline active_particles(system, buffer) = buffer.eachparticle
@inline function active_particles(system, buffer)
return view(buffer.eachparticle, 1:buffer.active_particle_count[])
end

@inline function activate_next_particle(system)
(; active_particle) = system.buffer

next_particle = findfirst(x -> !x, active_particle)

if isnothing(next_particle)
error("0 out of $(system.buffer.buffer_size) buffer particles available")
for particle in eachindex(active_particle)
if PointNeighbors.Atomix.@atomic(active_particle[particle]) == false
# After we go into this condition, another thread might still activate this particle
# before we do. Therefore, we use an atomic swap, which activates the particle,
# but also returns the old value.
# If the old value is `true`, the particle was active before and we need to continue.
# Note: This doesn't work with Metal.jl. No error is thrown, but the operation is simply ignored.
# An atomic compare-and-swap operation is probably implemented for Metal.jl here:
# https://github.com/JuliaGPU/Metal.jl/blob/caf299690aa52448ee72ffc5688939b157fc1ba2/src/device/intrinsics/atomics.jl#L42
was_active = PointNeighbors.Atomix.@atomicswap active_particle[particle] = true

if was_active == false
return particle
end
end
end

active_particle[next_particle] = true

return next_particle
error("No buffer particles available")
end

@inline function deactivate_particle!(system, particle, u)
(; active_particle) = system.buffer

active_particle[particle] = false

# Set particle far away from simulation domain
for dim in 1:ndims(system)
# Inf or NaN causes instability outcome.
u[dim, particle] = 1e16
u[dim, particle] = eltype(system)(1e16)
end

# `activate_next_particle!` and `deactivate_particle!` are never called on the same buffer inside a kernel,
# so we don't have any race conditions on this `active_particle` vector.
active_particle[particle] = false

return system
end
3 changes: 3 additions & 0 deletions src/general/gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ Adapt.@adapt_structure BoundaryModelDummyParticles
Adapt.@adapt_structure BoundaryModelMonaghanKajtar
Adapt.@adapt_structure BoundaryMovement
Adapt.@adapt_structure TotalLagrangianSPHSystem
Adapt.@adapt_structure BoundaryZone
Adapt.@adapt_structure SystemBuffer
Adapt.@adapt_structure OpenBoundarySPHSystem

KernelAbstractions.get_backend(::PtrArray) = KernelAbstractions.CPU()
KernelAbstractions.get_backend(system::System) = KernelAbstractions.get_backend(system.mass)
Expand Down
88 changes: 70 additions & 18 deletions src/general/semidiscretization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ function Semidiscretization(systems::Union{System, Nothing}...;

# Check e.g. that the boundary systems are using a state equation if EDAC is not used.
# Other checks might be added here later.
check_configuration(systems)
check_configuration(systems, neighborhood_search)

sizes_u = [u_nvariables(system) * n_moving_particles(system)
for system in systems]
Expand Down Expand Up @@ -318,7 +318,17 @@ function semidiscretize(semi, tspan; reset_threads=true)
# When e.g. `parallelization_backend=CUDABackend()`, this will convert all `Array`s
# to `CuArray`s, moving data to the GPU.
# See the comments in general/gpu.jl for more details.
semi_new = Adapt.adapt(semi.parallelization_backend, semi)
semi_ = Adapt.adapt(semi.parallelization_backend, semi)

# We now have a new `Semidiscretization` with new systems.
# This means that systems linking to other systems still point to old systems.
# Therefore, we have to re-link them, which yields yet another `Semidiscretization`.
# Note that this re-creates systems containing links, so it only works as long
# as systems don't link to other systems containing links.
semi_new = Semidiscretization(set_system_links.(semi_.systems, Ref(semi_)),
semi_.ranges_u, semi_.ranges_v,
semi_.neighborhood_searches,
semi_.parallelization_backend)
else
semi_new = semi
end
Expand Down Expand Up @@ -375,7 +385,8 @@ function initialize_neighborhood_searches!(semi)
foreach_system(semi) do neighbor
PointNeighbors.initialize!(get_neighborhood_search(system, neighbor, semi),
initial_coordinates(system),
initial_coordinates(neighbor))
initial_coordinates(neighbor),
eachindex_y=active_particles(neighbor))
end
end

Expand Down Expand Up @@ -668,7 +679,7 @@ function update_nhs!(neighborhood_search,
update!(neighborhood_search,
current_coordinates(u_system, system),
current_coordinates(u_neighbor, neighbor),
semi, points_moving=(true, true))
semi, points_moving=(true, true), eachindex_y=active_particles(neighbor))
end

function update_nhs!(neighborhood_search,
Expand All @@ -691,7 +702,7 @@ function update_nhs!(neighborhood_search,
update!(neighborhood_search,
current_coordinates(u_system, system),
current_coordinates(u_neighbor, neighbor),
semi, points_moving=(true, true))
semi, points_moving=(true, true), eachindex_y=active_particles(neighbor))
end

function update_nhs!(neighborhood_search,
Expand All @@ -704,7 +715,7 @@ function update_nhs!(neighborhood_search,
update!(neighborhood_search,
current_coordinates(u_system, system),
current_coordinates(u_neighbor, neighbor),
semi, points_moving=(true, true))
semi, points_moving=(true, true), eachindex_y=active_particles(neighbor))
end

function update_nhs!(neighborhood_search,
Expand All @@ -728,7 +739,7 @@ function update_nhs!(neighborhood_search,
update!(neighborhood_search,
current_coordinates(u_system, system),
current_coordinates(u_neighbor, neighbor),
semi, points_moving=(true, true))
semi, points_moving=(true, true), eachindex_y=active_particles(neighbor))
end

function update_nhs!(neighborhood_search,
Expand Down Expand Up @@ -763,7 +774,8 @@ function update_nhs!(neighborhood_search,
update!(neighborhood_search,
current_coordinates(u_system, system),
current_coordinates(u_neighbor, neighbor),
semi, points_moving=(system.ismoving[], true))
semi, points_moving=(system.ismoving[], true),
eachindex_y=active_particles(neighbor))
end

# This function is the same as the one above to avoid ambiguous dispatch when using `Union`
Expand Down Expand Up @@ -840,20 +852,22 @@ function update_nhs!(neighborhood_search,
end

# Forward to PointNeighbors.jl
function update!(neighborhood_search, x, y, semi; points_moving=(true, false))
PointNeighbors.update!(neighborhood_search, x, y; points_moving,
function update!(neighborhood_search, x, y, semi; points_moving=(true, false),
eachindex_y=axes(y, 2))
PointNeighbors.update!(neighborhood_search, x, y; points_moving, eachindex_y,
parallelization_backend=semi.parallelization_backend)
end

function check_configuration(systems)
function check_configuration(systems,
nhs::Union{Nothing, PointNeighbors.AbstractNeighborhoodSearch})
foreach_system(systems) do system
check_configuration(system, systems)
check_configuration(system, systems, nhs)
end

check_system_color(systems)
end

check_configuration(system, systems) = nothing
check_configuration(system::System, systems, nhs) = nothing

function check_system_color(systems)
if any(system isa FluidSystem && !(system isa ParticlePackingSystem) &&
Expand All @@ -870,7 +884,7 @@ function check_system_color(systems)
end
end

function check_configuration(fluid_system::FluidSystem, systems)
function check_configuration(fluid_system::FluidSystem, systems, nhs)
if !(fluid_system isa ParticlePackingSystem) && !isnothing(fluid_system.surface_tension)
foreach_system(systems) do neighbor
if neighbor isa FluidSystem && isnothing(fluid_system.surface_tension) &&
Expand All @@ -881,8 +895,8 @@ function check_configuration(fluid_system::FluidSystem, systems)
end
end

function check_configuration(boundary_system::BoundarySPHSystem, systems)
(; boundary_model) = boundary_system
function check_configuration(system::BoundarySPHSystem, systems, nhs)
(; boundary_model) = system

foreach_system(systems) do neighbor
if neighbor isa WeaklyCompressibleSPHSystem &&
Expand All @@ -894,7 +908,7 @@ function check_configuration(boundary_system::BoundarySPHSystem, systems)
end
end

function check_configuration(system::TotalLagrangianSPHSystem, systems)
function check_configuration(system::TotalLagrangianSPHSystem, systems, nhs)
(; boundary_model) = system

foreach_system(systems) do neighbor
Expand All @@ -911,12 +925,50 @@ function check_configuration(system::TotalLagrangianSPHSystem, systems)
end
end

function check_configuration(system::OpenBoundarySPHSystem, systems)
function check_configuration(system::OpenBoundarySPHSystem, systems,
neighborhood_search::PointNeighbors.AbstractNeighborhoodSearch)
(; boundary_model, boundary_zone) = system

# Store index of the fluid system. This is necessary for re-linking
# in case we use Adapt.jl to create a new semidiscretization.
fluid_system_index = findfirst(==(system.fluid_system), systems)
system.fluid_system_index[] = fluid_system_index

if boundary_model isa BoundaryModelLastiwka &&
boundary_zone isa BoundaryZone{BidirectionalFlow}
throw(ArgumentError("`BoundaryModelLastiwka` needs a specific flow direction. " *
"Please specify inflow and outflow."))
end

if first(PointNeighbors.requires_update(neighborhood_search))
throw(ArgumentError("`OpenBoundarySPHSystem` requires a neighborhood search " *
"that does not require an update for the first set of coordinates (e.g. `GridNeighborhoodSearch`). " *
"See the PointNeighbors.jl documentation for more details."))
end
end

# After `adapt`, the system type information may change.
# This means that systems linking to other systems still point to old systems.
# Therefore, we have to re-link them based on the stored system index.
set_system_links(system, semi) = system

function set_system_links(system::OpenBoundarySPHSystem, semi)
fluid_system = semi.systems[system.fluid_system_index[]]

return OpenBoundarySPHSystem(system.boundary_model,
system.initial_condition,
fluid_system, # link to fluid system
system.fluid_system_index,
system.smoothing_length,
system.mass,
system.density,
system.volume,
system.pressure,
system.boundary_zone,
system.reference_velocity,
system.reference_pressure,
system.reference_density,
system.buffer,
system.update_callback_used,
system.cache)
end
1 change: 1 addition & 0 deletions src/schemes/boundary/dummy_particles/dummy_particles.jl
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,7 @@ end

# This needs to be serial to avoid race conditions when writing into `system`
foreach_point_neighbor(neighbor_system, system, neighbor_coords, system_coords, semi;
points=each_moving_particle(neighbor_system),
parallelization_backend=SerialBackend()) do neighbor, particle,
pos_diff, distance
# Since neighbor and particle are switched
Expand Down
Loading
Loading