From c939eec0af21f0072b131f302da71a23549f25b6 Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Wed, 14 May 2025 01:04:56 -0400 Subject: [PATCH 01/14] Commented out an example line and running unit test. Going to add subroutine and condense. --- src/simulation/m_time_steppers.fpp | 32 +++++++++++++++++------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index f8cdb7a7ac..1740a4cfb9 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -791,20 +791,6 @@ contains (3._wp*pb_ts(1)%sf(j, k, l, q, i) & + pb_ts(2)%sf(j, k, l, q, i) & + dt*rhs_pb(j, k, l, q, i))/4._wp - end do - end do - end do - end do - end do - end if - - if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode mv_ts(2)%sf(j, k, l, q, i) = & (3._wp*mv_ts(1)%sf(j, k, l, q, i) & + mv_ts(2)%sf(j, k, l, q, i) & @@ -816,6 +802,24 @@ contains end do end if + !! if (qbmm .and. (.not. polytropic)) then + !! !$acc parallel loop collapse(5) gang vector default(present) + !! do i = 1, nb + !! do l = 0, p + !! do k = 0, n + !! do j = 0, m + !! do q = 1, nnode + !! mv_ts(2)%sf(j, k, l, q, i) = & + !! (3._wp*mv_ts(1)%sf(j, k, l, q, i) & + !! + mv_ts(2)%sf(j, k, l, q, i) & + !! + dt*rhs_mv(j, k, l, q, i))/4._wp + !! end do + !! end do + !! end do + !! end do + !! end do + !! end if + if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt/4._wp) if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(2)%vf) From d4ba8eaa5e8f89cdc35ff764f1be0849e57d3a37 Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Wed, 14 May 2025 02:48:23 -0400 Subject: [PATCH 02/14] Moved the q, pb, mv evolution to a submodule that passes tests. --- src/simulation/m_time_steppers.fpp | 95 +++++++++++++++--------------- 1 file changed, 48 insertions(+), 47 deletions(-) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 1740a4cfb9..07041afbee 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -357,6 +357,52 @@ contains end subroutine s_initialize_time_steppers_module + subroutine s_evolve_q_pb_mv(index1, index2, scaler1, scaler2, scaler3, scaler4) !! TODO :: Get a better name for this + + integer, intent(in) :: index1, index2 !! TODO :: I have no idea what index is meant to represent. Rename this. + real(wp), intent(in) :: scaler1, scaler2, scaler3, scaler4 + integer :: i, j, k, l, q + + !$acc parallel loop collapse(4) gang vector default(present) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_cons_ts(index1)%vf(i)%sf(j, k, l) = & + (scaler1 * q_cons_ts(1)%vf(i)%sf(j, k, l) & + + scaler2 * q_cons_ts(index2)%vf(i)%sf(j, k, l) & + + scaler3 * dt * rhs_vf(i)%sf(j, k, l)) / scaler4 + end do + end do + end do + end do + + !Evolve pb and mv for non-polytropic qbmm + !! TODO :: It really feels like this loop should be separated from the above loop. Consider making two different subroutines + if (qbmm .and. (.not. polytropic)) then + !$acc parallel loop collapse(5) gang vector default(present) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + pb_ts(index1)%sf(j, k, l, q, i) = & + (scaler1 * pb_ts(1)%sf(j, k, l, q, i) & + + scaler2 * pb_ts(index2)%sf(j, k, l, q, i) & + + scaler3 * dt * rhs_pb(j, k, l, q, i)) / scaler4 + mv_ts(index1)%sf(j, k, l, q, i) = & + (scaler1 * mv_ts(1)%sf(j, k, l, q, i) & + + scaler2 * mv_ts(index2)%sf(j, k, l, q, i) & + + scaler3 * dt * rhs_mv(j, k, l, q, i)) / scaler4 + end do + end do + end do + end do + end do + end if + + end subroutine s_evolve_q_pb_mv + !> 1st order TVD RK time-stepping algorithm !! @param t_step Current time step subroutine s_1st_order_tvd_rk(t_step, time_avg) @@ -398,53 +444,8 @@ contains call s_update_lagrange_tdv_rk(stage=1) end if - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - q_cons_ts(1)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l) - end do - end do - end do - end do - - !Evolve pb and mv for non-polytropic qbmm - if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(1)%sf(j, k, l, q, i) = & - pb_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_pb(j, k, l, q, i) - end do - end do - end do - end do - end do - end if - - if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(1)%sf(j, k, l, q, i) = & - mv_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_mv(j, k, l, q, i) - end do - end do - end do - end do - end do - end if + call s_evolve_q_pb_mv(1, 1, 1._wp, 1._wp, 1._wp, 1._wp) + if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, dt) From d0533651372906aeb30e51127111a2af566216a5 Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Wed, 14 May 2025 03:11:37 -0400 Subject: [PATCH 03/14] Successfully wrapped in function --- src/simulation/m_time_steppers.fpp | 101 ++++++++++++++++------------- 1 file changed, 56 insertions(+), 45 deletions(-) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 07041afbee..b5477e70e2 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -357,51 +357,27 @@ contains end subroutine s_initialize_time_steppers_module - subroutine s_evolve_q_pb_mv(index1, index2, scaler1, scaler2, scaler3, scaler4) !! TODO :: Get a better name for this + subroutine s_evolve_q(index, scaler1, scaler2, scaler3) !! TODO :: Get a better name for this - integer, intent(in) :: index1, index2 !! TODO :: I have no idea what index is meant to represent. Rename this. - real(wp), intent(in) :: scaler1, scaler2, scaler3, scaler4 - integer :: i, j, k, l, q + integer, intent(in) :: index !! TODO :: I have no idea what index is meant to represent. Rename this. + real(wp), intent(in) :: scaler1, scaler2, scaler3 + integer :: i, j, k, l !$acc parallel loop collapse(4) gang vector default(present) do i = 1, sys_size do l = 0, p do k = 0, n do j = 0, m - q_cons_ts(index1)%vf(i)%sf(j, k, l) = & + q_cons_ts(index)%vf(i)%sf(j, k, l) = & (scaler1 * q_cons_ts(1)%vf(i)%sf(j, k, l) & - + scaler2 * q_cons_ts(index2)%vf(i)%sf(j, k, l) & - + scaler3 * dt * rhs_vf(i)%sf(j, k, l)) / scaler4 + + scaler2 * q_cons_ts(2)%vf(i)%sf(j, k, l) & + + scaler3 * dt * rhs_vf(i)%sf(j, k, l)) / (scaler1 + scaler2) end do end do end do end do - !Evolve pb and mv for non-polytropic qbmm - !! TODO :: It really feels like this loop should be separated from the above loop. Consider making two different subroutines - if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(index1)%sf(j, k, l, q, i) = & - (scaler1 * pb_ts(1)%sf(j, k, l, q, i) & - + scaler2 * pb_ts(index2)%sf(j, k, l, q, i) & - + scaler3 * dt * rhs_pb(j, k, l, q, i)) / scaler4 - mv_ts(index1)%sf(j, k, l, q, i) = & - (scaler1 * mv_ts(1)%sf(j, k, l, q, i) & - + scaler2 * mv_ts(index2)%sf(j, k, l, q, i) & - + scaler3 * dt * rhs_mv(j, k, l, q, i)) / scaler4 - end do - end do - end do - end do - end do - end if - - end subroutine s_evolve_q_pb_mv + end subroutine s_evolve_q !> 1st order TVD RK time-stepping algorithm !! @param t_step Current time step @@ -444,7 +420,53 @@ contains call s_update_lagrange_tdv_rk(stage=1) end if - call s_evolve_q_pb_mv(1, 1, 1._wp, 1._wp, 1._wp, 1._wp) + !$acc parallel loop collapse(4) gang vector default(present) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_cons_ts(1)%vf(i)%sf(j, k, l) = & + q_cons_ts(1)%vf(i)%sf(j, k, l) & + + dt*rhs_vf(i)%sf(j, k, l) + end do + end do + end do + end do + + !Evolve pb and mv for non-polytropic qbmm + if (qbmm .and. (.not. polytropic)) then + !$acc parallel loop collapse(5) gang vector default(present) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + pb_ts(1)%sf(j, k, l, q, i) = & + pb_ts(1)%sf(j, k, l, q, i) & + + dt*rhs_pb(j, k, l, q, i) + end do + end do + end do + end do + end do + end if + + if (qbmm .and. (.not. polytropic)) then + !$acc parallel loop collapse(5) gang vector default(present) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + mv_ts(1)%sf(j, k, l, q, i) = & + mv_ts(1)%sf(j, k, l, q, i) & + + dt*rhs_mv(j, k, l, q, i) + end do + end do + end do + end do + end do + end if if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, dt) @@ -692,18 +714,7 @@ contains call s_update_lagrange_tdv_rk(stage=1) end if - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(2)%vf(i)%sf(j, k, l) = & - q_cons_ts(1)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l) - end do - end do - end do - end do + call s_evolve_q(2, 1.0_wp, 0.0_wp, 1.0_wp) !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then From 776548d821203d60aa4f7b8fb9b42b02ee1ffe23 Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Wed, 14 May 2025 03:17:15 -0400 Subject: [PATCH 04/14] Tests also pass with second loop. Now moving on to more tests --- src/simulation/m_time_steppers.fpp | 121 +++++++---------------------- 1 file changed, 29 insertions(+), 92 deletions(-) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index b5477e70e2..14dc231cae 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -357,11 +357,11 @@ contains end subroutine s_initialize_time_steppers_module - subroutine s_evolve_q(index, scaler1, scaler2, scaler3) !! TODO :: Get a better name for this + subroutine s_evolve_q_pb_mv(index, scaler1, scaler2, scaler3) !! TODO :: Get a better name for this integer, intent(in) :: index !! TODO :: I have no idea what index is meant to represent. Rename this. real(wp), intent(in) :: scaler1, scaler2, scaler3 - integer :: i, j, k, l + integer :: i, j, k, l, q !$acc parallel loop collapse(4) gang vector default(present) do i = 1, sys_size @@ -377,7 +377,31 @@ contains end do end do - end subroutine s_evolve_q + !Evolve pb and mv for non-polytropic qbmm + if (qbmm .and. (.not. polytropic)) then + !$acc parallel loop collapse(5) gang vector default(present) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + pb_ts(index)%sf(j, k, l, q, i) = & + (scaler1 * pb_ts(1)%sf(j, k, l, q, i) & + + scaler2 * pb_ts(2)%sf(j, k, l, q, i) & + + scaler3 * dt * rhs_pb(j, k, l, q, i)) / (scaler1 + scaler2) + + mv_ts(index)%sf(j, k, l, q, i) = & + (scaler1 * mv_ts(1)%sf(j, k, l, q, i) & + + scaler2 * mv_ts(2)%sf(j, k, l, q, i) & + + scaler3 * dt * rhs_mv(j, k, l, q, i)) / (scaler1 + scaler2) + end do + end do + end do + end do + end do + end if + + end subroutine s_evolve_q_pb_mv !> 1st order TVD RK time-stepping algorithm !! @param t_step Current time step @@ -714,42 +738,7 @@ contains call s_update_lagrange_tdv_rk(stage=1) end if - call s_evolve_q(2, 1.0_wp, 0.0_wp, 1.0_wp) - - !Evolve pb and mv for non-polytropic qbmm - if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(2)%sf(j, k, l, q, i) = & - pb_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_pb(j, k, l, q, i) - end do - end do - end do - end do - end do - end if - - if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(2)%sf(j, k, l, q, i) = & - mv_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_mv(j, k, l, q, i) - end do - end do - end do - end do - end do - end if + call s_evolve_q_pb_mv(2, 1.0_wp, 0.0_wp, 1.0_wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt) @@ -778,59 +767,7 @@ contains call s_update_lagrange_tdv_rk(stage=2) end if - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(2)%vf(i)%sf(j, k, l) = & - (3._wp*q_cons_ts(1)%vf(i)%sf(j, k, l) & - + q_cons_ts(2)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l))/4._wp - end do - end do - end do - end do - - if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(2)%sf(j, k, l, q, i) = & - (3._wp*pb_ts(1)%sf(j, k, l, q, i) & - + pb_ts(2)%sf(j, k, l, q, i) & - + dt*rhs_pb(j, k, l, q, i))/4._wp - mv_ts(2)%sf(j, k, l, q, i) = & - (3._wp*mv_ts(1)%sf(j, k, l, q, i) & - + mv_ts(2)%sf(j, k, l, q, i) & - + dt*rhs_mv(j, k, l, q, i))/4._wp - end do - end do - end do - end do - end do - end if - - !! if (qbmm .and. (.not. polytropic)) then - !! !$acc parallel loop collapse(5) gang vector default(present) - !! do i = 1, nb - !! do l = 0, p - !! do k = 0, n - !! do j = 0, m - !! do q = 1, nnode - !! mv_ts(2)%sf(j, k, l, q, i) = & - !! (3._wp*mv_ts(1)%sf(j, k, l, q, i) & - !! + mv_ts(2)%sf(j, k, l, q, i) & - !! + dt*rhs_mv(j, k, l, q, i))/4._wp - !! end do - !! end do - !! end do - !! end do - !! end do - !! end if + call s_evolve_q_pb_mv(2, 3.0_wp, 1.0_wp, 1.0_wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt/4._wp) From fa024feb6bee724e3702e46158ad90f4225e4d51 Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Wed, 14 May 2025 03:35:04 -0400 Subject: [PATCH 05/14] Ran one last batch of unit tests and removed the final lines of code --- src/simulation/m_time_steppers.fpp | 203 +---------------------------- 1 file changed, 7 insertions(+), 196 deletions(-) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 14dc231cae..9bd4f8c513 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -359,8 +359,8 @@ contains subroutine s_evolve_q_pb_mv(index, scaler1, scaler2, scaler3) !! TODO :: Get a better name for this - integer, intent(in) :: index !! TODO :: I have no idea what index is meant to represent. Rename this. - real(wp), intent(in) :: scaler1, scaler2, scaler3 + integer, intent(in) :: index !! TODO :: Rename this + real(wp), intent(in) :: scaler1, scaler2, scaler3 !! TODO :: Rename these too integer :: i, j, k, l, q !$acc parallel loop collapse(4) gang vector default(present) @@ -371,7 +371,7 @@ contains q_cons_ts(index)%vf(i)%sf(j, k, l) = & (scaler1 * q_cons_ts(1)%vf(i)%sf(j, k, l) & + scaler2 * q_cons_ts(2)%vf(i)%sf(j, k, l) & - + scaler3 * dt * rhs_vf(i)%sf(j, k, l)) / (scaler1 + scaler2) + + scaler3 * dt * rhs_vf(i)%sf(j, k, l)) / (scaler1 + scaler2) !! TODO :: scaler1 + scaler2 should be called a normalization constant end do end do end do @@ -444,54 +444,7 @@ contains call s_update_lagrange_tdv_rk(stage=1) end if - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - q_cons_ts(1)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l) - end do - end do - end do - end do - - !Evolve pb and mv for non-polytropic qbmm - if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(1)%sf(j, k, l, q, i) = & - pb_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_pb(j, k, l, q, i) - end do - end do - end do - end do - end do - end if - - if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(1)%sf(j, k, l, q, i) = & - mv_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_mv(j, k, l, q, i) - end do - end do - end do - end do - end do - end if - + call s_evolve_q_pb_mv(1, 1.0_wp, 0.0_wp, 1.0_wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, dt) @@ -550,53 +503,7 @@ contains call s_update_lagrange_tdv_rk(stage=1) end if - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(2)%vf(i)%sf(j, k, l) = & - q_cons_ts(1)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l) - end do - end do - end do - end do - - !Evolve pb and mv for non-polytropic qbmm - if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(2)%sf(j, k, l, q, i) = & - pb_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_pb(j, k, l, q, i) - end do - end do - end do - end do - end do - end if - - if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(2)%sf(j, k, l, q, i) = & - mv_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_mv(j, k, l, q, i) - end do - end do - end do - end do - end do - end if + call s_evolve_q_pb_mv(2, 1.0_wp, 0.0_wp, 1.0_wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt) @@ -625,55 +532,7 @@ contains call s_update_lagrange_tdv_rk(stage=2) end if - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - (q_cons_ts(1)%vf(i)%sf(j, k, l) & - + q_cons_ts(2)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l))/2._wp - end do - end do - end do - end do - - if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(1)%sf(j, k, l, q, i) = & - (pb_ts(1)%sf(j, k, l, q, i) & - + pb_ts(2)%sf(j, k, l, q, i) & - + dt*rhs_pb(j, k, l, q, i))/2._wp - end do - end do - end do - end do - end do - end if - - if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(1)%sf(j, k, l, q, i) = & - (mv_ts(1)%sf(j, k, l, q, i) & - + mv_ts(2)%sf(j, k, l, q, i) & - + dt*rhs_mv(j, k, l, q, i))/2._wp - end do - end do - end do - end do - end do - end if + call s_evolve_q_pb_mv(1, 1.0_wp, 1.0_wp, 1.0_wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp) @@ -795,55 +654,7 @@ contains call s_update_lagrange_tdv_rk(stage=3) end if - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - (q_cons_ts(1)%vf(i)%sf(j, k, l) & - + 2._wp*q_cons_ts(2)%vf(i)%sf(j, k, l) & - + 2._wp*dt*rhs_vf(i)%sf(j, k, l))/3._wp - end do - end do - end do - end do - - if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(1)%sf(j, k, l, q, i) = & - (pb_ts(1)%sf(j, k, l, q, i) & - + 2._wp*pb_ts(2)%sf(j, k, l, q, i) & - + 2._wp*dt*rhs_pb(j, k, l, q, i))/3._wp - end do - end do - end do - end do - end do - end if - - if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(1)%sf(j, k, l, q, i) = & - (mv_ts(1)%sf(j, k, l, q, i) & - + 2._wp*mv_ts(2)%sf(j, k, l, q, i) & - + 2._wp*dt*rhs_mv(j, k, l, q, i))/3._wp - end do - end do - end do - end do - end do - end if + call s_evolve_q_pb_mv(1, 1.0_wp, 2.0_wp, 2.0_wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp) From 1af301c088112b867aeda82ad106d98ae9574075 Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Wed, 14 May 2025 20:34:29 -0400 Subject: [PATCH 06/14] I had to remove a bad memory access that I have in for the 1D case --- src/simulation/m_time_steppers.fpp | 34 +++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 9bd4f8c513..13207e9f59 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -444,7 +444,39 @@ contains call s_update_lagrange_tdv_rk(stage=1) end if - call s_evolve_q_pb_mv(1, 1.0_wp, 0.0_wp, 1.0_wp) + !$acc parallel loop collapse(4) gang vector default(present) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_cons_ts(1)%vf(i)%sf(j, k, l) = & + q_cons_ts(1)%vf(i)%sf(j, k, l) & + + dt*rhs_vf(i)%sf(j, k, l) + end do + end do + end do + end do + + !Evolve pb and mv for non-polytropic qbmm + if (qbmm .and. (.not. polytropic)) then + !$acc parallel loop collapse(5) gang vector default(present) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + pb_ts(1)%sf(j, k, l, q, i) = & + pb_ts(1)%sf(j, k, l, q, i) & + + dt*rhs_pb(j, k, l, q, i) + mv_ts(1)%sf(j, k, l, q, i) = & + mv_ts(1)%sf(j, k, l, q, i) & + + dt*rhs_mv(j, k, l, q, i) + end do + end do + end do + end do + end do + end if if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, dt) From 5534f6c3e1700d6392370958d8085b71d454a75d Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Wed, 14 May 2025 20:37:43 -0400 Subject: [PATCH 07/14] Formatted with ./mfc.sh format functino call --- src/simulation/m_time_steppers.fpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 13207e9f59..6c6da6c4be 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -369,9 +369,9 @@ contains do k = 0, n do j = 0, m q_cons_ts(index)%vf(i)%sf(j, k, l) = & - (scaler1 * q_cons_ts(1)%vf(i)%sf(j, k, l) & - + scaler2 * q_cons_ts(2)%vf(i)%sf(j, k, l) & - + scaler3 * dt * rhs_vf(i)%sf(j, k, l)) / (scaler1 + scaler2) !! TODO :: scaler1 + scaler2 should be called a normalization constant + (scaler1*q_cons_ts(1)%vf(i)%sf(j, k, l) & + + scaler2*q_cons_ts(2)%vf(i)%sf(j, k, l) & + + scaler3*dt*rhs_vf(i)%sf(j, k, l))/(scaler1 + scaler2) !! TODO :: scaler1 + scaler2 should be called a normalization constant end do end do end do @@ -386,14 +386,14 @@ contains do j = 0, m do q = 1, nnode pb_ts(index)%sf(j, k, l, q, i) = & - (scaler1 * pb_ts(1)%sf(j, k, l, q, i) & - + scaler2 * pb_ts(2)%sf(j, k, l, q, i) & - + scaler3 * dt * rhs_pb(j, k, l, q, i)) / (scaler1 + scaler2) + (scaler1*pb_ts(1)%sf(j, k, l, q, i) & + + scaler2*pb_ts(2)%sf(j, k, l, q, i) & + + scaler3*dt*rhs_pb(j, k, l, q, i))/(scaler1 + scaler2) mv_ts(index)%sf(j, k, l, q, i) = & - (scaler1 * mv_ts(1)%sf(j, k, l, q, i) & - + scaler2 * mv_ts(2)%sf(j, k, l, q, i) & - + scaler3 * dt * rhs_mv(j, k, l, q, i)) / (scaler1 + scaler2) + (scaler1*mv_ts(1)%sf(j, k, l, q, i) & + + scaler2*mv_ts(2)%sf(j, k, l, q, i) & + + scaler3*dt*rhs_mv(j, k, l, q, i))/(scaler1 + scaler2) end do end do end do From 50904ff0bfafbb27a8a1229443b4354199de9039 Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Thu, 15 May 2025 09:08:10 -0400 Subject: [PATCH 08/14] Trying to go back and separate the parallel loops, to test if that is the failyre on NHVPC --- src/simulation/m_time_steppers.fpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 6c6da6c4be..4dc6337fd7 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -389,7 +389,18 @@ contains (scaler1*pb_ts(1)%sf(j, k, l, q, i) & + scaler2*pb_ts(2)%sf(j, k, l, q, i) & + scaler3*dt*rhs_pb(j, k, l, q, i))/(scaler1 + scaler2) + end do + end do + end do + end do + end do + !$acc parallel loop collapse(5) gang vector default(present) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode mv_ts(index)%sf(j, k, l, q, i) = & (scaler1*mv_ts(1)%sf(j, k, l, q, i) & + scaler2*mv_ts(2)%sf(j, k, l, q, i) & From 9f72081fb465370298c9ff49983f9eb0596b2b60 Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Fri, 16 May 2025 22:13:53 -0400 Subject: [PATCH 09/14] Small commit adding an additional scaler in case there was a precision issue for the intel compiler. I also have split out the loops in the subroutine to test if trying to block those together is causing an issue --- src/simulation/m_time_steppers.fpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 4dc6337fd7..4183f67aad 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -357,10 +357,10 @@ contains end subroutine s_initialize_time_steppers_module - subroutine s_evolve_q_pb_mv(index, scaler1, scaler2, scaler3) !! TODO :: Get a better name for this + subroutine s_evolve_q_pb_mv(index, scaler1, scaler2, scaler3, scaler4) !! TODO :: Get a better name for this integer, intent(in) :: index !! TODO :: Rename this - real(wp), intent(in) :: scaler1, scaler2, scaler3 !! TODO :: Rename these too + real(wp), intent(in) :: scaler1, scaler2, scaler3, scaler4 !! TODO :: Rename these too integer :: i, j, k, l, q !$acc parallel loop collapse(4) gang vector default(present) @@ -371,7 +371,7 @@ contains q_cons_ts(index)%vf(i)%sf(j, k, l) = & (scaler1*q_cons_ts(1)%vf(i)%sf(j, k, l) & + scaler2*q_cons_ts(2)%vf(i)%sf(j, k, l) & - + scaler3*dt*rhs_vf(i)%sf(j, k, l))/(scaler1 + scaler2) !! TODO :: scaler1 + scaler2 should be called a normalization constant + + scaler3*dt*rhs_vf(i)%sf(j, k, l))/scaler4 !! TODO :: scaler1 + scaler2 should be called a normalization constant end do end do end do @@ -388,7 +388,7 @@ contains pb_ts(index)%sf(j, k, l, q, i) = & (scaler1*pb_ts(1)%sf(j, k, l, q, i) & + scaler2*pb_ts(2)%sf(j, k, l, q, i) & - + scaler3*dt*rhs_pb(j, k, l, q, i))/(scaler1 + scaler2) + + scaler3*dt*rhs_pb(j, k, l, q, i))/scaler4 end do end do end do @@ -404,7 +404,7 @@ contains mv_ts(index)%sf(j, k, l, q, i) = & (scaler1*mv_ts(1)%sf(j, k, l, q, i) & + scaler2*mv_ts(2)%sf(j, k, l, q, i) & - + scaler3*dt*rhs_mv(j, k, l, q, i))/(scaler1 + scaler2) + + scaler3*dt*rhs_mv(j, k, l, q, i))/scaler4 end do end do end do @@ -479,6 +479,7 @@ contains pb_ts(1)%sf(j, k, l, q, i) = & pb_ts(1)%sf(j, k, l, q, i) & + dt*rhs_pb(j, k, l, q, i) + mv_ts(1)%sf(j, k, l, q, i) = & mv_ts(1)%sf(j, k, l, q, i) & + dt*rhs_mv(j, k, l, q, i) @@ -546,7 +547,7 @@ contains call s_update_lagrange_tdv_rk(stage=1) end if - call s_evolve_q_pb_mv(2, 1.0_wp, 0.0_wp, 1.0_wp) + call s_evolve_q_pb_mv(2, 1.0_wp, 0.0_wp, 1.0_wp, 1.0_wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt) @@ -575,7 +576,7 @@ contains call s_update_lagrange_tdv_rk(stage=2) end if - call s_evolve_q_pb_mv(1, 1.0_wp, 1.0_wp, 1.0_wp) + call s_evolve_q_pb_mv(1, 1.0_wp, 1.0_wp, 1.0_wp, 2.0_wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp) @@ -640,7 +641,7 @@ contains call s_update_lagrange_tdv_rk(stage=1) end if - call s_evolve_q_pb_mv(2, 1.0_wp, 0.0_wp, 1.0_wp) + call s_evolve_q_pb_mv(2, 1.0_wp, 0.0_wp, 1.0_wp, 1.0_wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt) @@ -669,7 +670,7 @@ contains call s_update_lagrange_tdv_rk(stage=2) end if - call s_evolve_q_pb_mv(2, 3.0_wp, 1.0_wp, 1.0_wp) + call s_evolve_q_pb_mv(2, 3.0_wp, 1.0_wp, 1.0_wp, 4.0_wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt/4._wp) @@ -697,7 +698,7 @@ contains call s_update_lagrange_tdv_rk(stage=3) end if - call s_evolve_q_pb_mv(1, 1.0_wp, 2.0_wp, 2.0_wp) + call s_evolve_q_pb_mv(1, 1.0_wp, 2.0_wp, 2.0_wp, 3.0_wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp) From ab2861d736bcb4b399a3979002850b08498037a0 Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Sat, 17 May 2025 06:26:07 -0400 Subject: [PATCH 10/14] Found the issue. Memory locality problem for intel GPU when trying to separate my for loops. Definately not desired behavior for ACC, but simple enough to work around. --- src/simulation/m_time_steppers.fpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 4183f67aad..497e990ec3 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -479,7 +479,17 @@ contains pb_ts(1)%sf(j, k, l, q, i) = & pb_ts(1)%sf(j, k, l, q, i) & + dt*rhs_pb(j, k, l, q, i) - + end do + end do + end do + end do + end do + + !$acc parallel loop collapse(5) gang vector default(present) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m mv_ts(1)%sf(j, k, l, q, i) = & mv_ts(1)%sf(j, k, l, q, i) & + dt*rhs_mv(j, k, l, q, i) From e67e861df9043e6e1c22a3961678b5922c828577 Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Sat, 17 May 2025 06:31:14 -0400 Subject: [PATCH 11/14] Missed a do statement --- src/simulation/m_time_steppers.fpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 497e990ec3..1a5780c0df 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -490,6 +490,7 @@ contains do l = 0, p do k = 0, n do j = 0, m + do q = 1, nnode mv_ts(1)%sf(j, k, l, q, i) = & mv_ts(1)%sf(j, k, l, q, i) & + dt*rhs_mv(j, k, l, q, i) From e8041e07f01b46777f96a36aea41a6256a1e1f48 Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Sat, 17 May 2025 06:42:14 -0400 Subject: [PATCH 12/14] formatting --- src/simulation/m_time_steppers.fpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 1a5780c0df..3d41666620 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -484,7 +484,7 @@ contains end do end do end do - + !$acc parallel loop collapse(5) gang vector default(present) do i = 1, nb do l = 0, p From b1d7d04c57e996a627ac7f4a6255e0a4bddbda31 Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Sat, 17 May 2025 16:50:11 -0400 Subject: [PATCH 13/14] Intermittent commit after playing with running local tests with act --- src/simulation/m_time_steppers.fpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 3d41666620..dbd8ae9946 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -371,7 +371,7 @@ contains q_cons_ts(index)%vf(i)%sf(j, k, l) = & (scaler1*q_cons_ts(1)%vf(i)%sf(j, k, l) & + scaler2*q_cons_ts(2)%vf(i)%sf(j, k, l) & - + scaler3*dt*rhs_vf(i)%sf(j, k, l))/scaler4 !! TODO :: scaler1 + scaler2 should be called a normalization constant + + scaler3*dt*rhs_vf(i)%sf(j, k, l))/scaler4 !! TODO :: scaler4 should be called a normalization constant end do end do end do @@ -394,7 +394,9 @@ contains end do end do end do + end if + if (qbmm .and. (.not. polytropic)) then !$acc parallel loop collapse(5) gang vector default(present) do i = 1, nb do l = 0, p @@ -484,7 +486,9 @@ contains end do end do end do + end if + if (qbmm .and. (.not. polytropic)) then !$acc parallel loop collapse(5) gang vector default(present) do i = 1, nb do l = 0, p @@ -558,7 +562,7 @@ contains call s_update_lagrange_tdv_rk(stage=1) end if - call s_evolve_q_pb_mv(2, 1.0_wp, 0.0_wp, 1.0_wp, 1.0_wp) + call s_evolve_q_pb_mv(2, 1._wp, 0._wp, 1._wp, 1._wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt) @@ -587,7 +591,7 @@ contains call s_update_lagrange_tdv_rk(stage=2) end if - call s_evolve_q_pb_mv(1, 1.0_wp, 1.0_wp, 1.0_wp, 2.0_wp) + call s_evolve_q_pb_mv(1, 1._wp, 1._wp, 1._wp, 2._wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp) @@ -652,7 +656,7 @@ contains call s_update_lagrange_tdv_rk(stage=1) end if - call s_evolve_q_pb_mv(2, 1.0_wp, 0.0_wp, 1.0_wp, 1.0_wp) + call s_evolve_q_pb_mv(2, 1._wp, 0._wp, 1._wp, 1._wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt) @@ -681,7 +685,7 @@ contains call s_update_lagrange_tdv_rk(stage=2) end if - call s_evolve_q_pb_mv(2, 3.0_wp, 1.0_wp, 1.0_wp, 4.0_wp) + call s_evolve_q_pb_mv(2, 3._wp, 1._wp, 1._wp, 4._wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt/4._wp) @@ -709,7 +713,7 @@ contains call s_update_lagrange_tdv_rk(stage=3) end if - call s_evolve_q_pb_mv(1, 1.0_wp, 2.0_wp, 2.0_wp, 3.0_wp) + call s_evolve_q_pb_mv(1, 1._wp, 2._wp, 2._wp, 3._wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp) From 75d69a2b2f21410d12690781be135e3fbb9bd8ee Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Sun, 1 Jun 2025 22:08:13 -0400 Subject: [PATCH 14/14] Testing out forcing an inline to resolve the intel compiler issue --- src/simulation/m_time_steppers.fpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index fa75c951e0..e246fd7c89 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -538,6 +538,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) + !DIR$ FORCEINLINE call s_evolve_q_pb_mv(2, 1._wp, 0._wp, 1._wp, 1._wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt) @@ -564,6 +565,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) + !DIR$ FORCEINLINE call s_evolve_q_pb_mv(1, 1._wp, 1._wp, 1._wp, 2._wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp) @@ -626,6 +628,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) + !DIR$ FORCEINLINE call s_evolve_q_pb_mv(2, 1._wp, 0._wp, 1._wp, 1._wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt) @@ -652,6 +655,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) + !DIR$ FORCEINLINE call s_evolve_q_pb_mv(2, 3._wp, 1._wp, 1._wp, 4._wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt/4._wp) @@ -677,6 +681,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=3) + !DIR$ FORCEINLINE call s_evolve_q_pb_mv(1, 1._wp, 2._wp, 2._wp, 3._wp) if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp)