MFlowCode
diff --git a/‎.github/pull_request_template.md
Lines changed: 1 addition & 1 deletion b/‎.github/pull_request_template.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/frontier/build.sh
Lines changed: 6 additions & 1 deletion b/‎.github/workflows/frontier/build.sh
Lines changed: 6 additions & 1 deletion
diff --git a/‎.github/workflows/frontier/submit.sh
Lines changed: 14 additions & 1 deletion b/‎.github/workflows/frontier/submit.sh
Lines changed: 14 additions & 1 deletion
diff --git a/‎.github/workflows/frontier/test.sh
Lines changed: 5 additions & 2 deletions b/‎.github/workflows/frontier/test.sh
Lines changed: 5 additions & 2 deletions
diff --git a/‎.github/workflows/test.yml
Lines changed: 1 addition & 4 deletions b/‎.github/workflows/test.yml
Lines changed: 1 addition & 4 deletions
diff --git a/‎docs/documentation/case.md
Lines changed: 2 additions & 6 deletions b/‎docs/documentation/case.md
Lines changed: 2 additions & 6 deletions
diff --git a/‎docs/documentation/running.md
Lines changed: 3 additions & 3 deletions b/‎docs/documentation/running.md
Lines changed: 3 additions & 3 deletions
diff --git a/‎examples/2D_lagrange_bubblescreen/case.py
Lines changed: 170 additions & 0 deletions b/‎examples/2D_lagrange_bubblescreen/case.py
Lines changed: 170 additions & 0 deletions
diff --git a/‎examples/2D_lagrange_bubblescreen/input/README.txt
Lines changed: 5 additions & 0 deletions b/‎examples/2D_lagrange_bubblescreen/input/README.txt
Lines changed: 5 additions & 0 deletions
diff --git a/‎examples/3D_lagrange_shbubcollapse/case.py
Lines changed: 2 additions & 3 deletions b/‎examples/3D_lagrange_shbubcollapse/case.py
Lines changed: 2 additions & 3 deletions
@@ -54,5 +54,5 @@ To make sure the code is performing as expected on GPU devices, I have:
 - [ ] Ran the code on MI200+ GPUs and ensure the new features performed as expected (the GPU results match the CPU results)
 - [ ] Enclosed the new feature via `nvtx` ranges so that they can be identified in profiles
 - [ ] Ran a Nsight Systems profile using `./mfc.sh run XXXX --gpu -t simulation --nsys`, and have attached the output file (`.nsys-rep`) and plain text results to this PR
-- [ ] Ran an Omniperf profile using `./mfc.sh run XXXX --gpu -t simulation --omniperf`, and have attached the output file and plain text results to this PR.
+- [ ] Ran a Rocprof Systems profile using `./mfc.sh run XXXX --gpu -t simulation --rsys --hip-trace`, and have attached the output file and plain text results to this PR.
 - [ ] Ran my code using various numbers of different GPUs (1, 2, and 8, for example) in parallel and made sure that the results scale similarly to what happens if you run without the new code/feature
@@ -1,4 +1,9 @@
 #!/bin/bash
 
+build_opts=""
+if [ "$1" == "gpu" ]; then
+    build_opts="--gpu"
+fi
+
 . ./mfc.sh load -c f -m g
-./mfc.sh test --dry-run -j 8 --gpu
+./mfc.sh test --dry-run -j 8 $build_opts
@@ -13,16 +13,29 @@ else
     exit 1
 fi
 
+if [ "$2" == "cpu" ]; then
+    sbatch_device_opts="\
+#SBATCH -n 32                       # Number of cores required"
+elif [ "$2" == "gpu" ]; then
+    sbatch_device_opts="\
+#SBATCH -n 8                       # Number of cores required"
+else
+    usage
+    exit 1
+fi
+
+
 job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2"
 
 sbatch <<EOT
 #!/bin/bash
 #SBATCH -JMFC-$job_slug            # Job name
 #SBATCH -A CFD154                  # charge account
 #SBATCH -N 1                       # Number of nodes required
-#SBATCH -n 8                       # Number of cores required
+$sbatch_device_opts
 #SBATCH -t 01:59:00                # Duration of the job (Ex: 15 mins)
 #SBATCH -o$job_slug.out            # Combined output and error messages file
+#SBATCH -p extended                # Extended partition for shorter queues
 #SBATCH -q debug                   # Use debug QOS - only one job per user allowed in queue!
 #SBATCH -W                         # Do not exit until the submitted job terminates.
 
 
@@ -3,5 +3,8 @@
 gpus=`rocm-smi --showid | awk '{print $1}' | grep -Eo '[0-9]+' | uniq | tr '\n' ' '`
 ngpus=`echo "$gpus" | tr -d '[:space:]' | wc -c`
 
-./mfc.sh test --max-attempts 3 -j $ngpus -- -c frontier
-
+if [ "$job_device" == "gpu" ]; then
+    ./mfc.sh test --max-attempts 3 -j $ngpus -- -c frontier
+else
+    ./mfc.sh test --max-attempts 3 -j 32 -- -c frontier
+fi
@@ -97,9 +97,6 @@ jobs:
       matrix:
         device: ['cpu', 'gpu']
         lbl: ['gt', 'frontier']
-        exclude:
-          - device:   cpu
-            lbl: frontier
     runs-on:
       group:  phoenix
       labels: ${{ matrix.lbl }}
@@ -116,7 +113,7 @@ jobs:
 
       - name: Build
         if:   matrix.lbl == 'frontier'
-        run:  bash .github/workflows/frontier/build.sh
+        run:  bash .github/workflows/frontier/build.sh ${{ matrix.device }}
 
       - name: Test
         if:   matrix.lbl == 'frontier'
 
@@ -436,7 +436,7 @@ The effect and use of the source term are assessed by [Schmidmayer et al., 2019]
 - `time_stepper` specifies the order of the Runge-Kutta (RK) time integration scheme that is used for temporal integration in simulation, from the 1st to 5th order by corresponding integer.
 Note that `time_stepper = 3` specifies the total variation diminishing (TVD), third order RK scheme ([Gottlieb and Shu, 1998](references.md)).
 
-- `adap_dt` activates the Strang operator splitting scheme which splits flux and source terms in time marching, and an adaptive time stepping strategy is implemented for the source term. It requires ``bubbles = 'T'``, ``polytropic = 'T'``, ``adv_n = 'T'`` and `time_stepper = 3`.
+- `adap_dt` activates the Strang operator splitting scheme which splits flux and source terms in time marching, and an adaptive time stepping strategy is implemented for the source term. It requires ``bubbles_euler = 'T'``, ``polytropic = 'T'``, ``adv_n = 'T'`` and `time_stepper = 3`. Additionally, it can be used with ``bubbles_lagrange = 'T'`` and `time_stepper = 3`
 
 - `weno_order` specifies the order of WENO scheme that is used for spatial reconstruction of variables by an integer of 1, 3, 5, and 7, that correspond to the 1st, 3rd, 5th, and 7th order, respectively.
 
@@ -461,7 +461,7 @@ It is recommended to set `weno_eps` to $10^{-6}$ for WENO-JS, and to $10^{-40}$
 `riemann_solver = 1`, `2`, and `3` correspond to HLL, HLLC, and Exact Riemann solver, respectively ([Toro, 2013](references.md)).
 `riemann_solver = 4` is only for MHD simulations. It resolves 5 of the full seven-wave structure of the MHD equations ([Miyoshi and Kusano, 2005](references.md)).
 
-- `low_Mach` specifies the choice of the low Mach number correction scheme for the HLLC Riemann solver. `low_Mach = 0` is default value and does not apply any correction scheme. `low_Mach = 1` and `2` apply the anti-dissipation pressure correction method ([Chen et al., 2022](references.md)) and the improved velocity reconstruction method ([Thornber et al., 2008](references.md)). This feature requires `riemann_solver = 2` and `model_eqns = 2`.
+- `low_Mach` specifies the choice of the low Mach number correction scheme for the HLLC Riemann solver. `low_Mach = 0` is default value and does not apply any correction scheme. `low_Mach = 1` and `2` apply the anti-dissipation pressure correction method ([Chen et al., 2022](references.md)) and the improved velocity reconstruction method ([Thornber et al., 2008](references.md)). This feature requires `model_eqns = 2` or `3`. `low_Mach = 1` works for `riemann_solver = 1` and `2`, but `low_Mach = 2` only works for `riemann_solver = 2`.
 
 - `avg_state` specifies the choice of the method to compute averaged variables at the cell-boundaries from the left and the right states in the Riemann solver by an integer of 1 or 2.
 `avg_state = 1` and `2` correspond to Roe- and arithmetic averages, respectively.
@@ -790,8 +790,6 @@ When ``polytropic = 'F'``, the gas compression is modeled as non-polytropic due
 | `x0`                  | Real    | Reference length                                          |
 | `Thost`               | Real    | Temperature of the surrounding liquid (host)              |
 | `diffcoefvap`         | Real    | Vapor diffusivity in the gas                              |
-| `rkck_adap_dt`        | Logical | Activates the adaptive rkck time stepping algorithm       |
-| `rkck_tolerance`      | Real    | Admissible error truncation tolerance in the rkck stepper  |
 
 - `nBubs_glb` Total number of bubbles. Their initial conditions need to be specified in the ./input/lag_bubbles.dat file. See the example cases for additional information.
 
@@ -805,8 +803,6 @@ When ``polytropic = 'F'``, the gas compression is modeled as non-polytropic due
 
 - `massTransfer_model` Activates the mass transfer model at the bubble's interface based on ([Preston et al., 2007](references.md)).
 
-- `rkck_adap_dt` Activates the adaptive 4th/5th order Runge—Kutta–Cash–Karp (RKCK) time-stepping algorithm (requires `time_stepper ==4`). A maximum error between the 4th and 5th order Runge-Kutta-Cash-Karp solutions for the same time step size is calculated. If the error is smaller than a tolerance (`rkck_tolerance`), then the algorithm employs the 5th order solution, while if not, both eulerian/lagrangian variables are re-calculated with a smaller time step size.
-
 ### 10. Velocity Field Setup
 
 | Parameter              | Type    | Description |
 
@@ -98,13 +98,13 @@ Learn more about NVIDIA Nsight Compute [here](https://docs.nvidia.com/nsight-com
 
 
 #### AMD GPUs
-- Rocprof (ROC): `./mfc.sh run ... -t simulation --roc --hip-trace [rocprof flags]` allows one to visualize MFC's system-wide performance with [Perfetto UI](https://ui.perfetto.dev/).
+- Rocprof Systems (RSYS): `./mfc.sh run ... -t simulation --rsys --hip-trace [rocprof flags]` allows one to visualize MFC's system-wide performance with [Perfetto UI](https://ui.perfetto.dev/).
 When used, `--roc` will run the simulation and generate files in the case directory for all targets.
 `results.json` can then be imported in [Perfetto's UI](https://ui.perfetto.dev/).
 Learn more about AMD Rocprof [here](https://rocm.docs.amd.com/projects/rocprofiler/en/docs-5.5.1/rocprof.html)
 It is best to run case files with few timesteps to keep the report file sizes manageable.
-- Omniperf (OMNI): `./mfc.sh run ... -t simulation --omni [omniperf flags]` allows one to conduct kernel-level profiling with [AMD's Omniperf](https://rocm.docs.amd.com/projects/omniperf/en/latest/index.html).
-When used, `--omni` will output profiling information for all subroutines, including rooflines, cache usage, register usage, and more, after the simulation is run.
+- Rocprof Compute (RCU): `./mfc.sh run ... -t simulation --rcu -n <name> [rocprof-compute flags]` allows one to conduct kernel-level profiling with [ROCm Compute Profiler](https://rocm.docs.amd.com/projects/rocprofiler-compute/en/latest/what-is-rocprof-compute.html).
+When used, `--rcu` will output profiling information for all subroutines, including rooflines, cache usage, register usage, and more, after the simulation is run.
 Adding this argument will moderately slow down the simulation and run the MFC executable several times.
 For this reason, it should only be used with case files with few timesteps.
 
 
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+import math
+import json
+
+# Bubble screen
+# Description: A planar acoustic wave interacts with a bubble cloud
+# in water. The background field is modeled in using an Eulerian framework,
+# while the bubbles are tracked using a Lagrangian framework.
+
+# Reference values for nondimensionalization
+x0 = 1.0e-03  # length - m
+rho0 = 1.0e03  # density - kg/m3
+c0 = 1475.0  # speed of sound - m/s
+p0 = rho0 * c0 * c0  # pressure - Pa
+T0 = 298  # temperature - K
+
+# Host properties (water)
+gamma_host = 2.7466  # Specific heat ratio
+pi_inf_host = 792.02e06  # Stiffness - Pa
+mu_host = 1e-3  # Dynamic viscosity - Pa.s
+c_host = 1475.0  # speed of sound - m/s
+rho_host = 1000  # density kg/m3
+T_host = 298  # temperature K
+
+# Lagrangian bubbles' properties
+R_uni = 8314  # Universal gas constant - J/kmol/K
+MW_g = 28.0  # Molar weight of the gas - kg/kmol
+MW_v = 18.0  # Molar weight of the vapor - kg/kmol
+gamma_g = 1.4  # Specific heat ratio of the gas
+gamma_v = 1.333  # Specific heat ratio of the vapor
+pv = 2350  # Vapor pressure of the host - Pa
+cp_g = 1.0e3  # Specific heat of the gas - J/kg/K
+cp_v = 2.1e3  # Specific heat of the vapor - J/kg/K
+k_g = 0.025  # Thermal conductivity of the gas - W/m/K
+k_v = 0.02  # Thermal conductivity of the vapor - W/m/K
+diffVapor = 2.5e-5  # Diffusivity coefficient of the vapor - m2/s
+sigBubble = 0.069  # Surface tension of the bubble - N/m
+mu_g = 1.48e-5
+
+# Acoustic source properties
+patm = 101325.0  # Atmospheric pressure - Pa
+pamp = 1.0e5  # Amplitude of the acoustic source - Pa
+freq = 300e03  # Source frequency - Hz
+wlen = c_host / freq  # Wavelength - m
+
+# Domain and time set up
+
+xb = -12.0e-3  # Domain boundaries - m (x direction)
+xe = 12.0e-3
+yb = -2.5e-3  # Domain boundaries - m (y direction)
+ye = 2.5e-3
+z_virtual = 5.0e-3  # Virtual depth (z direction)
+
+Nx = 240  # number of elements into x direction
+Ny = 50  # number of elements into y direction
+
+dt = 7.5e-9  # constant time-step - sec
+
+# Configuring case dictionary
+print(
+    json.dumps(
+        {
+            # Logistics
+            "run_time_info": "T",
+            # Computational Domain Parameters
+            "x_domain%beg": xb / x0,
+            "x_domain%end": xe / x0,
+            "y_domain%beg": yb / x0,
+            "y_domain%end": ye / x0,
+            "stretch_y": "F",
+            "stretch_x": "F",
+            "m": Nx,
+            "n": Ny,
+            "p": 0,
+            "dt": dt * (c0 / x0),
+            "t_step_start": 0,
+            "t_step_stop": 3000,
+            "t_step_save": 500,
+            # Simulation Algorithm Parameters
+            "model_eqns": 2,
+            "time_stepper": 3,
+            "num_fluids": 2,
+            "num_patches": 1,
+            "viscous": "T",
+            "mpp_lim": "F",
+            "weno_order": 5,
+            "weno_eps": 1.0e-16,
+            "mapped_weno": "T",
+            "riemann_solver": 2,
+            "wave_speeds": 1,
+            "avg_state": 2,
+            "bc_x%beg": -6,
+            "bc_x%end": -6,
+            "bc_y%beg": -1,
+            "bc_y%end": -1,
+            # Acoustic source
+            "acoustic_source": "T",
+            "num_source": 1,
+            "acoustic(1)%support": 2,
+            "acoustic(1)%pulse": 1,
+            "acoustic(1)%npulse": 1,
+            "acoustic(1)%mag": pamp / p0,
+            "acoustic(1)%wavelength": wlen / x0,
+            "acoustic(1)%length": 2 * (ye - yb) / x0,
+            "acoustic(1)%loc(1)": -7.0e-03 / x0,
+            "acoustic(1)%loc(2)": 0.0,
+            "acoustic(1)%dir": 0.0,
+            "acoustic(1)%delay": 0.0,
+            # Formatted Database Files Structure Parameters
+            "format": 1,
+            "precision": 2,
+            "prim_vars_wrt": "T",
+            "parallel_io": "T",
+            # Patch 1: Water (left)
+            "patch_icpp(1)%geometry": 3,
+            "patch_icpp(1)%x_centroid": 0.0,
+            "patch_icpp(1)%y_centroid": 0.0,
+            "patch_icpp(1)%length_x": 2 * (xe - xb) / x0,
+            "patch_icpp(1)%length_y": 2 * (ye - yb) / x0,
+            "patch_icpp(1)%vel(1)": 0.0,
+            "patch_icpp(1)%vel(2)": 0.0,
+            "patch_icpp(1)%pres": patm / p0,
+            "patch_icpp(1)%alpha_rho(1)": rho_host / rho0,
+            "patch_icpp(1)%alpha_rho(2)": 0.0,
+            "patch_icpp(1)%alpha(1)": 1.0,
+            "patch_icpp(1)%alpha(2)": 0.0,
+            # Lagrangian Bubbles
+            "bubbles_lagrange": "T",
+            "bubble_model": 2,  # Keller-Miksis model
+            "lag_params%nBubs_glb": 1194,  # Number of bubbles
+            "lag_params%solver_approach": 2,
+            "lag_params%cluster_type": 2,
+            "lag_params%pressure_corrector": "T",
+            "lag_params%smooth_type": 1,
+            "lag_params%heatTransfer_model": "T",
+            "lag_params%massTransfer_model": "T",
+            "lag_params%epsilonb": 1.0,
+            "lag_params%valmaxvoid": 0.9,
+            "lag_params%write_bubbles": "F",
+            "lag_params%write_bubbles_stats": "F",
+            "lag_params%c0": c0,
+            "lag_params%rho0": rho0,
+            "lag_params%T0": T0,
+            "lag_params%x0": x0,
+            "lag_params%diffcoefvap": diffVapor,
+            "lag_params%Thost": T_host,
+            "lag_params%charwidth": z_virtual / x0,
+            # Fluids Physical Parameters
+            # Host medium
+            "fluid_pp(1)%gamma": 1.0 / (gamma_host - 1.0),
+            "fluid_pp(1)%pi_inf": gamma_host * (pi_inf_host / p0) / (gamma_host - 1.0),
+            "fluid_pp(1)%Re(1)": 1.0 / (mu_host / (rho0 * c0 * x0)),
+            "fluid_pp(1)%mul0": mu_host,
+            "fluid_pp(1)%ss": sigBubble,
+            "fluid_pp(1)%pv": pv,
+            "fluid_pp(1)%gamma_v": gamma_v,
+            "fluid_pp(1)%M_v": MW_v,
+            "fluid_pp(1)%k_v": k_v,
+            "fluid_pp(1)%cp_v": cp_v,
+            # Bubble gas state
+            "fluid_pp(2)%gamma": 1.0 / (gamma_g - 1.0),
+            "fluid_pp(2)%pi_inf": 0.0e00,
+            "fluid_pp(2)%Re(1)": 1.0 / (mu_g / (rho0 * c0 * x0)),
+            "fluid_pp(2)%gamma_v": gamma_g,
+            "fluid_pp(2)%M_v": MW_g,
+            "fluid_pp(2)%k_v": k_g,
+            "fluid_pp(2)%cp_v": cp_g,
+        }
+    )
+)
@@ -0,0 +1,5 @@
+
+The user input file 'input/lag_bubbles.dat' contains the initial conditions of the lagrangian bubbles.
+Each row represents the initial state of one specific bubble, which are:
+
+xPosition/x0    yPosition/x0    zPosition/x0    xVel/c0     yVel/c0     zVel/c0     radius/x0       interfaceVelocity/c0
@@ -80,6 +80,7 @@
             "n": Ny,
             "p": Nz,
             "dt": round(dt * c0 / x0, 6),
+            "adap_dt": "T",
             "n_start": 0,
             "t_save": saveTime * (c0 / x0),
             "t_stop": stopTime * (c0 / x0),
@@ -89,7 +90,7 @@
             "num_patches": 1,
             "mpp_lim": "F",
             "viscous": "T",
-            "time_stepper": 4,  # 4th/5th RKCK
+            "time_stepper": 3,
             "weno_order": 5,
             "weno_eps": 1.0e-16,
             "mapped_weno": "T",
@@ -141,8 +142,6 @@
             # Lagrangian Bubbles
             "bubbles_lagrange": "T",
             "bubble_model": 2,  # Keller-Miksis model
-            "rkck_adap_dt": "T",  # Activate adaptive time stepper
-            "rkck_tolerance": 1.0e-05,
             "lag_params%nBubs_glb": 1,
             "lag_params%solver_approach": 2,  # Two-way coupled
             "lag_params%cluster_type": 2,