From 14706c235e5cc85dbf11219b29991064dd60dbc4 Mon Sep 17 00:00:00 2001
From: mohdsaid497566 <145478595+mohdsaid497566@users.noreply.github.com>
Date: Wed, 11 Jun 2025 11:03:39 -0400
Subject: [PATCH 01/15] added flag --rdma-mpi

---
 toolchain/mfc/args.py      | 17 +++++++++--------
 toolchain/mfc/test/case.py |  6 ++++--
 toolchain/mfc/test/test.py | 13 ++++++++++++-
 3 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py
index e476e0a209..6f2c885bc3 100644
--- a/toolchain/mfc/args.py
+++ b/toolchain/mfc/args.py
@@ -76,14 +76,15 @@ def add_common_arguments(p, mask = None):
     test.add_argument("-l", "--list",         action="store_true", help="List all available tests.")
     test.add_argument("-f", "--from",         default=test_cases[0].get_uuid(), type=str, help="First test UUID to run.")
     test.add_argument("-t", "--to",           default=test_cases[-1].get_uuid(), type=str, help="Last test UUID to run.")
-    test.add_argument("-o", "--only",         nargs="+", type=str, default=[], metavar="L", help="Only run tests with specified properties.")
-    test.add_argument("-a", "--test-all",     action="store_true", default=False, help="Run the Post Process Tests too.")
-    test.add_argument("-%", "--percent",      type=int, default=100, help="Percentage of tests to run.")
-    test.add_argument("-m", "--max-attempts", type=int, default=1, help="Maximum number of attempts to run a test.")
-    test.add_argument(      "--no-build",     action="store_true",                    default=False,      help="(Testing) Do not rebuild MFC.")
-    test.add_argument(      "--no-examples",  action="store_true",                    default=False,      help="Do not test example cases." )
-    test.add_argument("--case-optimization",  action="store_true", default=False, help="(GPU Optimization) Compile MFC targets with some case parameters hard-coded.")
-    test.add_argument(      "--dry-run",      action="store_true",                    default=False,      help="Build and generate case files but do not run tests.")
+    test.add_argument("-o", "--only",         nargs="+", type=str,     default=[], metavar="L", help="Only run tests with specified properties.")
+    test.add_argument("-a", "--test-all",     action="store_true",     default=False,     help="Run the Post Process Tests too.")
+    test.add_argument("-%", "--percent",      type=int,                default=100,       help="Percentage of tests to run.")
+    test.add_argument("-m", "--max-attempts", type=int,                default=1,         help="Maximum number of attempts to run a test.")
+    test.add_argument("-r", "--rdma-mpi",     action="store_true",     default=False,     help="Enable RDMA MPI for tests")
+    test.add_argument(      "--no-build",     action="store_true",     default=False,     help="(Testing) Do not rebuild MFC.")
+    test.add_argument(      "--no-examples",  action="store_true",     default=False,     help="Do not test example cases." )
+    test.add_argument("--case-optimization",  action="store_true",     default=False,     help="(GPU Optimization) Compile MFC targets with some case parameters hard-coded.")
+    test.add_argument(      "--dry-run",      action="store_true",     default=False,     help="Build and generate case files but do not run tests.")
 
     test_meg = test.add_mutually_exclusive_group()
     test_meg.add_argument("--generate",          action="store_true", default=False, help="(Test Generation) Generate golden files.")
diff --git a/toolchain/mfc/test/case.py b/toolchain/mfc/test/case.py
index 97d4fd121a..61d70772ab 100644
--- a/toolchain/mfc/test/case.py
+++ b/toolchain/mfc/test/case.py
@@ -133,7 +133,9 @@ def run(self, targets: List[Union[str, MFCTarget]], gpus: Set[int]) -> subproces
         filepath          = f'{self.get_dirpath()}/case.py'
         tasks             = ["-n", str(self.ppn)]
         jobs              = ["-j", str(ARG("jobs"))] if ARG("case_optimization") else []
-        case_optimization = ["--case-optimization"] if ARG("case_optimization") else []
+        case_optimization = ["--case-optimization"]  if ARG("case_optimization") else []
+        rdma_mpi_args     = ["--rdma-mpi"]           if ARG("rdma_mpi")          else []
+
 
         if self.params.get("bubbles_lagrange", 'F') == 'T':
             input_bubbles_lagrange(self)
@@ -144,7 +146,7 @@ def run(self, targets: List[Union[str, MFCTarget]], gpus: Set[int]) -> subproces
 
         command = [
             mfc_script, "run", filepath, "--no-build", *tasks, *case_optimization,
-            *jobs, "-t", *target_names, *gpus_select, *ARG("--")
+            *jobs, "-t", *target_names, *gpus_select, *rdma_mpi_args, *ARG("--")
         ]
 
         return common.system(command, print_cmd=False, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 65cd2803d2..123af36f1d 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -64,6 +64,10 @@ def __filter(cases_) -> typing.List[TestCase]:
             if any(label in case.trace for label in skip):
                 cases.remove(case)
 
+    for case in cases[:]:
+        if ARG("rdma_mpi") and case.ppn <= 1:
+            cases.remove(case)
+            skipped_cases.append(case)
 
     if ARG("no_examples"):
         cases = [case for case in cases if not "Example" in case.trace]
@@ -180,7 +184,10 @@ def _handle_case(case: TestCase, devices: typing.Set[int]):
         cons.print(f"  [bold magenta]{case.get_uuid()}[/bold magenta]     SKIP     {case.trace}")
         return
 
-    cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices)
+    if ARG("rdma_mpi"):
+        cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices, rdma_mpi=True)
+    else:
+        cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices)
     out_filepath = os.path.join(case.get_dirpath(), "out_pre_sim.txt")
 
     common.file_write(out_filepath, cmd.stdout)
@@ -224,6 +231,10 @@ def _handle_case(case: TestCase, devices: typing.Set[int]):
     if ARG("test_all"):
         case.delete_output()
         cmd = case.run([PRE_PROCESS, SIMULATION, POST_PROCESS], gpus=devices)
+        if ARG("rdma_mpi"):
+            cmd = case.run([PRE_PROCESS, SIMULATION, POST_PROCESS], gpus=devices, rdma_mpi=True)
+        else:
+            cmd = case.run([PRE_PROCESS, SIMULATION, POST_PROCESS], gpus=devices)
         out_filepath = os.path.join(case.get_dirpath(), "out_post.txt")
         common.file_write(out_filepath, cmd.stdout)
 

From 0dbef83595d4bef6f15fdd1840029fe084c8f75c Mon Sep 17 00:00:00 2001
From: mohdsaid497566 <145478595+mohdsaid497566@users.noreply.github.com>
Date: Wed, 11 Jun 2025 15:23:39 -0400
Subject: [PATCH 02/15] modifications on the flag use

---
 toolchain/mfc/test/case.py  |  4 +---
 toolchain/mfc/test/cases.py |  4 +++-
 toolchain/mfc/test/test.py  | 12 +++++++-----
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/toolchain/mfc/test/case.py b/toolchain/mfc/test/case.py
index 61d70772ab..12bbc8fb07 100644
--- a/toolchain/mfc/test/case.py
+++ b/toolchain/mfc/test/case.py
@@ -134,8 +134,6 @@ def run(self, targets: List[Union[str, MFCTarget]], gpus: Set[int]) -> subproces
         tasks             = ["-n", str(self.ppn)]
         jobs              = ["-j", str(ARG("jobs"))] if ARG("case_optimization") else []
         case_optimization = ["--case-optimization"]  if ARG("case_optimization") else []
-        rdma_mpi_args     = ["--rdma-mpi"]           if ARG("rdma_mpi")          else []
-
 
         if self.params.get("bubbles_lagrange", 'F') == 'T':
             input_bubbles_lagrange(self)
@@ -146,7 +144,7 @@ def run(self, targets: List[Union[str, MFCTarget]], gpus: Set[int]) -> subproces
 
         command = [
             mfc_script, "run", filepath, "--no-build", *tasks, *case_optimization,
-            *jobs, "-t", *target_names, *gpus_select, *rdma_mpi_args, *ARG("--")
+            *jobs, "-t", *target_names, *gpus_select, *ARG("--")
         ]
 
         return common.system(command, print_cmd=False, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
diff --git a/toolchain/mfc/test/cases.py b/toolchain/mfc/test/cases.py
index 77659d7da6..11a9830ff3 100644
--- a/toolchain/mfc/test/cases.py
+++ b/toolchain/mfc/test/cases.py
@@ -3,6 +3,7 @@
 
 from mfc   import common
 from .case import Nt, define_case_d, define_case_f, CaseGeneratorStack, TestCaseBuilder
+from ..state import ARG
 
 def get_bc_mods(bc: int, dimInfo):
     params = {}
@@ -320,9 +321,10 @@ def alter_3d():
     def alter_ppn(dimInfo):
         if len(dimInfo[0]) == 3:
             cases.append(define_case_d(stack, '2 MPI Ranks', {'m': 29, 'n': 29, 'p': 49}, ppn=2))
+            cases.append(define_case_d(stack, '2 MPI Ranks -> RDMA MPI', {'m': 29, 'n': 29, 'p': 49, 'rdma_mpi': 'T'}, ppn=2))
         else:
             cases.append(define_case_d(stack, '2 MPI Ranks', {}, ppn=2))
-
+            cases.append(define_case_d(stack, '2 MPI Ranks -> RDMA MPI', {'rdma_mpi': 'T'}, ppn=2))
 
     def alter_ib(dimInfo, six_eqn_model=False):
         for slip in [True, False]:
diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 123af36f1d..734083e2ad 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -56,7 +56,12 @@ def __filter(cases_) -> typing.List[TestCase]:
         if case.ppn > 1 and not ARG("mpi"):
             cases.remove(case)
             skipped_cases.append(case)
-    
+
+    for case in cases[:]:
+        if "RDMA MPI" in case.trace and not ARG("rdma_mpi"):
+            cases.remove(case)
+            skipped_cases.append(case)
+
     for case in cases[:]:
         if ARG("single"):
             skip = ['low_Mach', 'Hypoelasticity', 'teno', 'Chemistry', 'Phase Change model 6'
@@ -184,10 +189,7 @@ def _handle_case(case: TestCase, devices: typing.Set[int]):
         cons.print(f"  [bold magenta]{case.get_uuid()}[/bold magenta]     SKIP     {case.trace}")
         return
 
-    if ARG("rdma_mpi"):
-        cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices, rdma_mpi=True)
-    else:
-        cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices)
+    cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices)
     out_filepath = os.path.join(case.get_dirpath(), "out_pre_sim.txt")
 
     common.file_write(out_filepath, cmd.stdout)

From 3c492d421dfab07e4be796ad922f5116410a0abf Mon Sep 17 00:00:00 2001
From: malmahrouqi3 <malmahrouqi3@gatech.edu>
Date: Wed, 11 Jun 2025 16:22:41 -0400
Subject: [PATCH 03/15] flag use changes

---
 toolchain/mfc/test/case.py  | 5 +++--
 toolchain/mfc/test/cases.py | 1 -
 toolchain/mfc/test/test.py  | 7 ++++++-
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/toolchain/mfc/test/case.py b/toolchain/mfc/test/case.py
index 12bbc8fb07..f8120f37bc 100644
--- a/toolchain/mfc/test/case.py
+++ b/toolchain/mfc/test/case.py
@@ -134,7 +134,8 @@ def run(self, targets: List[Union[str, MFCTarget]], gpus: Set[int]) -> subproces
         tasks             = ["-n", str(self.ppn)]
         jobs              = ["-j", str(ARG("jobs"))] if ARG("case_optimization") else []
         case_optimization = ["--case-optimization"]  if ARG("case_optimization") else []
-
+        rdma_mpi_args     = ["--rdma-mpi"]           if ARG("rdma_mpi")          else []
+        
         if self.params.get("bubbles_lagrange", 'F') == 'T':
             input_bubbles_lagrange(self)
 
@@ -144,7 +145,7 @@ def run(self, targets: List[Union[str, MFCTarget]], gpus: Set[int]) -> subproces
 
         command = [
             mfc_script, "run", filepath, "--no-build", *tasks, *case_optimization,
-            *jobs, "-t", *target_names, *gpus_select, *ARG("--")
+            *jobs, "-t", *target_names, *gpus_select, *rdma_mpi_args, *ARG("--")
         ]
 
         return common.system(command, print_cmd=False, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
diff --git a/toolchain/mfc/test/cases.py b/toolchain/mfc/test/cases.py
index 11a9830ff3..26e082567a 100644
--- a/toolchain/mfc/test/cases.py
+++ b/toolchain/mfc/test/cases.py
@@ -3,7 +3,6 @@
 
 from mfc   import common
 from .case import Nt, define_case_d, define_case_f, CaseGeneratorStack, TestCaseBuilder
-from ..state import ARG
 
 def get_bc_mods(bc: int, dimInfo):
     params = {}
diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 734083e2ad..331ae63a25 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -188,8 +188,13 @@ def _handle_case(case: TestCase, devices: typing.Set[int]):
     if ARG("dry_run"):
         cons.print(f"  [bold magenta]{case.get_uuid()}[/bold magenta]     SKIP     {case.trace}")
         return
+   
+    if ARG("rdma_mpi"):
+        cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices)
+        cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices, rdma_mpi=True)
+    else:
+        cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices)
 
-    cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices)
     out_filepath = os.path.join(case.get_dirpath(), "out_pre_sim.txt")
 
     common.file_write(out_filepath, cmd.stdout)

From 8e27e4181245f53ea1f14c7dba6b25b8d9063037 Mon Sep 17 00:00:00 2001
From: malmahrouqi3 <malmahrouqi3@gatech.edu>
Date: Wed, 11 Jun 2025 16:27:59 -0400
Subject: [PATCH 04/15] lint

---
 toolchain/mfc/test/case.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/toolchain/mfc/test/case.py b/toolchain/mfc/test/case.py
index f8120f37bc..2a9171586d 100644
--- a/toolchain/mfc/test/case.py
+++ b/toolchain/mfc/test/case.py
@@ -135,7 +135,7 @@ def run(self, targets: List[Union[str, MFCTarget]], gpus: Set[int]) -> subproces
         jobs              = ["-j", str(ARG("jobs"))] if ARG("case_optimization") else []
         case_optimization = ["--case-optimization"]  if ARG("case_optimization") else []
         rdma_mpi_args     = ["--rdma-mpi"]           if ARG("rdma_mpi")          else []
-        
+
         if self.params.get("bubbles_lagrange", 'F') == 'T':
             input_bubbles_lagrange(self)
 

From 7fe5f32c2e35e672b758cffec0b50e8506822374 Mon Sep 17 00:00:00 2001
From: mohdsaid497566 <145478595+mohdsaid497566@users.noreply.github.com>
Date: Wed, 11 Jun 2025 16:42:11 -0400
Subject: [PATCH 05/15] updated testing docs

---
 docs/documentation/testing.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/documentation/testing.md b/docs/documentation/testing.md
index 15b1ca3618..ea294ffbca 100644
--- a/docs/documentation/testing.md
+++ b/docs/documentation/testing.md
@@ -16,6 +16,7 @@ A test is considered passing when our error tolerances are met in order to maint
 - `--percent` (`%`) to specify a percentage of the test suite to select at random and test
 - `--max-attempts` (`-m`) the maximum number of attempts to make on a test before considering it failed
 - `--no-examples` skips the testing of cases in the examples folder
+- `--rdma-mpi` (`-r`) runs additional tests where RDMA MPI is enabled.
 
 To specify a computer, pass the `-c` flag to `./mfc.sh run` like so:
 ```shell

From f788dfede915b0c3771c7fee95bb5495fea107eb Mon Sep 17 00:00:00 2001
From: mohdsaid497566 <145478595+mohdsaid497566@users.noreply.github.com>
Date: Wed, 11 Jun 2025 16:46:50 -0400
Subject: [PATCH 06/15] RDMAP MPI added to Frontier CI

---
 .github/workflows/frontier/test.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh
index 539166e055..4064c33c22 100644
--- a/.github/workflows/frontier/test.sh
+++ b/.github/workflows/frontier/test.sh
@@ -4,7 +4,7 @@ gpus=`rocm-smi --showid | awk '{print $1}' | grep -Eo '[0-9]+' | uniq | tr '\n'
 ngpus=`echo "$gpus" | tr -d '[:space:]' | wc -c`
 
 if [ "$job_device" == "gpu" ]; then
-    ./mfc.sh test --max-attempts 3 -j $ngpus -- -c frontier
+    ./mfc.sh test -a --rdma-mpi --max-attempts 3 -j $ngpus -- -c frontier
 else
-    ./mfc.sh test --max-attempts 3 -j 32 -- -c frontier
+    ./mfc.sh test -a --rdma-mpi --max-attempts 3 -j 32 -- -c frontier
 fi

From 6aac583f077e027ca31fb1bd8086d3b77341d068 Mon Sep 17 00:00:00 2001
From: Mohammed Said Hamed Humaid Al-Mahrouqi
 <malmahrouqi3@atl1-1-02-002-27-1.pace.gatech.edu>
Date: Sun, 15 Jun 2025 14:27:16 -0400
Subject: [PATCH 07/15] fixed stuff

---
 docs/documentation/testing.md | 2 +-
 toolchain/mfc/args.py         | 2 +-
 toolchain/mfc/test/case.py    | 7 +++----
 toolchain/mfc/test/test.py    | 8 +-------
 4 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/docs/documentation/testing.md b/docs/documentation/testing.md
index ea294ffbca..e139c5b201 100644
--- a/docs/documentation/testing.md
+++ b/docs/documentation/testing.md
@@ -16,7 +16,7 @@ A test is considered passing when our error tolerances are met in order to maint
 - `--percent` (`%`) to specify a percentage of the test suite to select at random and test
 - `--max-attempts` (`-m`) the maximum number of attempts to make on a test before considering it failed
 - `--no-examples` skips the testing of cases in the examples folder
-- `--rdma-mpi` (`-r`) runs additional tests where RDMA MPI is enabled.
+- `--rdma-mpi` runs additional tests where RDMA MPI is enabled.
 
 To specify a computer, pass the `-c` flag to `./mfc.sh run` like so:
 ```shell
diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py
index 6f2c885bc3..11f766894e 100644
--- a/toolchain/mfc/args.py
+++ b/toolchain/mfc/args.py
@@ -80,7 +80,7 @@ def add_common_arguments(p, mask = None):
     test.add_argument("-a", "--test-all",     action="store_true",     default=False,     help="Run the Post Process Tests too.")
     test.add_argument("-%", "--percent",      type=int,                default=100,       help="Percentage of tests to run.")
     test.add_argument("-m", "--max-attempts", type=int,                default=1,         help="Maximum number of attempts to run a test.")
-    test.add_argument("-r", "--rdma-mpi",     action="store_true",     default=False,     help="Enable RDMA MPI for tests")
+    test.add_argument(      "--rdma-mpi",     action="store_true",     default=False,     help="Run tests with RDMA MPI enabled")
     test.add_argument(      "--no-build",     action="store_true",     default=False,     help="(Testing) Do not rebuild MFC.")
     test.add_argument(      "--no-examples",  action="store_true",     default=False,     help="Do not test example cases." )
     test.add_argument("--case-optimization",  action="store_true",     default=False,     help="(GPU Optimization) Compile MFC targets with some case parameters hard-coded.")
diff --git a/toolchain/mfc/test/case.py b/toolchain/mfc/test/case.py
index 2a9171586d..51705e2492 100644
--- a/toolchain/mfc/test/case.py
+++ b/toolchain/mfc/test/case.py
@@ -124,7 +124,7 @@ def __init__(self, trace: str, mods: dict, ppn: int = None, override_tol: float
         self.override_tol = override_tol
         super().__init__({**BASE_CFG.copy(), **mods})
 
-    def run(self, targets: List[Union[str, MFCTarget]], gpus: Set[int]) -> subprocess.CompletedProcess:
+    def run(self, targets: List[Union[str, MFCTarget]], gpus: Set[int], rdma_mpi: bool = False) -> subprocess.CompletedProcess:
         if gpus is not None and len(gpus) != 0:
             gpus_select = ["--gpus"] + [str(_) for _ in gpus]
         else:
@@ -134,7 +134,6 @@ def run(self, targets: List[Union[str, MFCTarget]], gpus: Set[int]) -> subproces
         tasks             = ["-n", str(self.ppn)]
         jobs              = ["-j", str(ARG("jobs"))] if ARG("case_optimization") else []
         case_optimization = ["--case-optimization"]  if ARG("case_optimization") else []
-        rdma_mpi_args     = ["--rdma-mpi"]           if ARG("rdma_mpi")          else []
 
         if self.params.get("bubbles_lagrange", 'F') == 'T':
             input_bubbles_lagrange(self)
@@ -145,9 +144,9 @@ def run(self, targets: List[Union[str, MFCTarget]], gpus: Set[int]) -> subproces
 
         command = [
             mfc_script, "run", filepath, "--no-build", *tasks, *case_optimization,
-            *jobs, "-t", *target_names, *gpus_select, *rdma_mpi_args, *ARG("--")
+            *jobs, "-t", *target_names, *gpus_select, *ARG("--")
         ]
-
+        
         return common.system(command, print_cmd=False, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 
     def get_trace(self) -> str:
diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 331ae63a25..288c5a2b39 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -69,11 +69,6 @@ def __filter(cases_) -> typing.List[TestCase]:
             if any(label in case.trace for label in skip):
                 cases.remove(case)
 
-    for case in cases[:]:
-        if ARG("rdma_mpi") and case.ppn <= 1:
-            cases.remove(case)
-            skipped_cases.append(case)
-
     if ARG("no_examples"):
         cases = [case for case in cases if not "Example" in case.trace]
 
@@ -189,8 +184,7 @@ def _handle_case(case: TestCase, devices: typing.Set[int]):
         cons.print(f"  [bold magenta]{case.get_uuid()}[/bold magenta]     SKIP     {case.trace}")
         return
    
-    if ARG("rdma_mpi"):
-        cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices)
+    if "RDMA MPI" in case.trace:
         cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices, rdma_mpi=True)
     else:
         cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices)

From 902b1ea34f0afcf906a7fcb29597069e02b3fd63 Mon Sep 17 00:00:00 2001
From: mohdsaid497566 <mohdsaid497566@gmail.com>
Date: Sun, 15 Jun 2025 16:32:16 -0400
Subject: [PATCH 08/15] re-run with lint/format

---
 toolchain/mfc/test/case.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/toolchain/mfc/test/case.py b/toolchain/mfc/test/case.py
index 51705e2492..12bbc8fb07 100644
--- a/toolchain/mfc/test/case.py
+++ b/toolchain/mfc/test/case.py
@@ -124,7 +124,7 @@ def __init__(self, trace: str, mods: dict, ppn: int = None, override_tol: float
         self.override_tol = override_tol
         super().__init__({**BASE_CFG.copy(), **mods})
 
-    def run(self, targets: List[Union[str, MFCTarget]], gpus: Set[int], rdma_mpi: bool = False) -> subprocess.CompletedProcess:
+    def run(self, targets: List[Union[str, MFCTarget]], gpus: Set[int]) -> subprocess.CompletedProcess:
         if gpus is not None and len(gpus) != 0:
             gpus_select = ["--gpus"] + [str(_) for _ in gpus]
         else:
@@ -146,7 +146,7 @@ def run(self, targets: List[Union[str, MFCTarget]], gpus: Set[int], rdma_mpi: bo
             mfc_script, "run", filepath, "--no-build", *tasks, *case_optimization,
             *jobs, "-t", *target_names, *gpus_select, *ARG("--")
         ]
-        
+
         return common.system(command, print_cmd=False, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 
     def get_trace(self) -> str:

From c9b159913e6e687183df49423e5a15693aee32f4 Mon Sep 17 00:00:00 2001
From: mohdsaid497566 <145478595+mohdsaid497566@users.noreply.github.com>
Date: Sun, 15 Jun 2025 20:47:21 -0400
Subject: [PATCH 09/15] removed duplicate assignment of CMD

---
 toolchain/mfc/test/test.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 288c5a2b39..b788fec69b 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -183,7 +183,7 @@ def _handle_case(case: TestCase, devices: typing.Set[int]):
     if ARG("dry_run"):
         cons.print(f"  [bold magenta]{case.get_uuid()}[/bold magenta]     SKIP     {case.trace}")
         return
-   
+
     if "RDMA MPI" in case.trace:
         cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices, rdma_mpi=True)
     else:
@@ -231,7 +231,6 @@ def _handle_case(case: TestCase, devices: typing.Set[int]):
 
     if ARG("test_all"):
         case.delete_output()
-        cmd = case.run([PRE_PROCESS, SIMULATION, POST_PROCESS], gpus=devices)
         if ARG("rdma_mpi"):
             cmd = case.run([PRE_PROCESS, SIMULATION, POST_PROCESS], gpus=devices, rdma_mpi=True)
         else:

From 783991674c7d93f6d798900ecd3bd4706fb003fe Mon Sep 17 00:00:00 2001
From: malmahrouqi3 <malmahrouqi3@gatech.edu>
Date: Mon, 16 Jun 2025 10:13:45 -0400
Subject: [PATCH 10/15] redundant code

---
 toolchain/mfc/test/test.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index b788fec69b..28c4445462 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -231,10 +231,10 @@ def _handle_case(case: TestCase, devices: typing.Set[int]):
 
     if ARG("test_all"):
         case.delete_output()
-        if ARG("rdma_mpi"):
-            cmd = case.run([PRE_PROCESS, SIMULATION, POST_PROCESS], gpus=devices, rdma_mpi=True)
-        else:
-            cmd = case.run([PRE_PROCESS, SIMULATION, POST_PROCESS], gpus=devices)
+        # if ARG("rdma_mpi"):
+        #     cmd = case.run([PRE_PROCESS, SIMULATION, POST_PROCESS], gpus=devices, rdma_mpi=True)
+        # else:
+        cmd = case.run([PRE_PROCESS, SIMULATION, POST_PROCESS], gpus=devices)
         out_filepath = os.path.join(case.get_dirpath(), "out_post.txt")
         common.file_write(out_filepath, cmd.stdout)
 

From 76b1060cbca729ed427ea873a187a17be6f175d2 Mon Sep 17 00:00:00 2001
From: malmahrouqi3 <malmahrouqi3@gatech.edu>
Date: Sat, 28 Jun 2025 03:57:05 -0400
Subject: [PATCH 11/15] added rdma flag to frontier dry run

---
 .github/workflows/frontier/build.sh | 2 +-
 toolchain/mfc/test/test.py          | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/.github/workflows/frontier/build.sh b/.github/workflows/frontier/build.sh
index 4aa0ffe64e..1f442a70b3 100644
--- a/.github/workflows/frontier/build.sh
+++ b/.github/workflows/frontier/build.sh
@@ -6,4 +6,4 @@ if [ "$1" == "gpu" ]; then
 fi
 
 . ./mfc.sh load -c f -m g
-./mfc.sh test --dry-run -j 8 $build_opts
+./mfc.sh test --dry-run --rdma-mpi -j 8 $build_opts
diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 28c4445462..5d59db1b06 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -231,9 +231,6 @@ def _handle_case(case: TestCase, devices: typing.Set[int]):
 
     if ARG("test_all"):
         case.delete_output()
-        # if ARG("rdma_mpi"):
-        #     cmd = case.run([PRE_PROCESS, SIMULATION, POST_PROCESS], gpus=devices, rdma_mpi=True)
-        # else:
         cmd = case.run([PRE_PROCESS, SIMULATION, POST_PROCESS], gpus=devices)
         out_filepath = os.path.join(case.get_dirpath(), "out_post.txt")
         common.file_write(out_filepath, cmd.stdout)

From ed07060e704e36b77016bcd6750472652653c586 Mon Sep 17 00:00:00 2001
From: Malmahrouqi3 <almahrou@r002.ib.bridges2.psc.edu>
Date: Sun, 29 Jun 2025 00:55:28 -0400
Subject: [PATCH 12/15] dissociate forcefully rdma_mpi & test_all

---
 .github/workflows/frontier/test.sh | 2 +-
 toolchain/mfc/test/test.py         | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh
index 4064c33c22..0f86f6df32 100644
--- a/.github/workflows/frontier/test.sh
+++ b/.github/workflows/frontier/test.sh
@@ -6,5 +6,5 @@ ngpus=`echo "$gpus" | tr -d '[:space:]' | wc -c`
 if [ "$job_device" == "gpu" ]; then
     ./mfc.sh test -a --rdma-mpi --max-attempts 3 -j $ngpus -- -c frontier
 else
-    ./mfc.sh test -a --rdma-mpi --max-attempts 3 -j 32 -- -c frontier
+    ./mfc.sh test -a --max-attempts 3 -j 32 -- -c frontier
 fi
diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 5d59db1b06..0a7067de2c 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -5,7 +5,7 @@
 
 from ..printer import cons
 from ..        import common
-from ..state   import ARG
+from ..state   import ARG, ARGS
 from .case     import TestCase
 from .cases    import list_cases
 from ..        import sched
@@ -88,7 +88,7 @@ def test():
     global errors
 
     cases = list_cases()
-
+    
     # Delete UUIDs that are not in the list of cases from tests/
     if ARG("remove_old_tests"):
         dir_uuids = set(os.listdir(common.MFC_TEST_DIR))
@@ -119,6 +119,9 @@ def test():
     # Some cases require a specific build of MFC for features like Chemistry,
     # Analytically defined patches, and --case-optimization. Here, we build all
     # the unique versions of MFC we need to run cases.
+    if ARG("rdma_mpi") == True:
+        ARGS()["test_all"] = False
+
     codes = [PRE_PROCESS, SIMULATION] + ([POST_PROCESS] if ARG('test_all') else [])
     unique_builds = set()
     for case, code in itertools.product(cases, codes):

From de73a43d339b025c19e468eab222bbecfc89649e Mon Sep 17 00:00:00 2001
From: Malmahrouqi3 <mohdsaid497566@gmail.com>
Date: Sun, 29 Jun 2025 00:58:30 -0400
Subject: [PATCH 13/15] dissociate forcefully rdma_mpi from test_all

---
 .github/workflows/frontier/test.sh | 4 ++--
 toolchain/mfc/run/run.py           | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh
index 0f86f6df32..9962090a3c 100644
--- a/.github/workflows/frontier/test.sh
+++ b/.github/workflows/frontier/test.sh
@@ -4,7 +4,7 @@ gpus=`rocm-smi --showid | awk '{print $1}' | grep -Eo '[0-9]+' | uniq | tr '\n'
 ngpus=`echo "$gpus" | tr -d '[:space:]' | wc -c`
 
 if [ "$job_device" == "gpu" ]; then
-    ./mfc.sh test -a --rdma-mpi --max-attempts 3 -j $ngpus -- -c frontier
+    ./mfc.sh test --rdma-mpi --max-attempts 3 -j $ngpus -- -c frontier
 else
-    ./mfc.sh test -a --max-attempts 3 -j 32 -- -c frontier
+    ./mfc.sh test --max-attempts 3 -j 32 -- -c frontier
 fi
diff --git a/toolchain/mfc/run/run.py b/toolchain/mfc/run/run.py
index fb7d528ff9..00a977fcfe 100644
--- a/toolchain/mfc/run/run.py
+++ b/toolchain/mfc/run/run.py
@@ -13,8 +13,7 @@
 from ..common  import format_list_to_string, file_dump_yaml
 
 from . import queues, input
-
-
+    
 def __validate_job_options() -> None:
     if not ARG("mpi") and any({ARG("nodes") > 1, ARG("tasks_per_node") > 1}):
         raise MFCException("RUN: Cannot run on more than one rank with --no-mpi.")

From a201637352139a68571dfe77b0bfc66a40f04841 Mon Sep 17 00:00:00 2001
From: Malmahrouqi3 <mohdsaid497566@gmail.com>
Date: Sun, 29 Jun 2025 01:29:46 -0400
Subject: [PATCH 14/15] lint and quick fix

---
 toolchain/mfc/run/run.py   | 3 ++-
 toolchain/mfc/test/test.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/toolchain/mfc/run/run.py b/toolchain/mfc/run/run.py
index 00a977fcfe..fb7d528ff9 100644
--- a/toolchain/mfc/run/run.py
+++ b/toolchain/mfc/run/run.py
@@ -13,7 +13,8 @@
 from ..common  import format_list_to_string, file_dump_yaml
 
 from . import queues, input
-    
+
+
 def __validate_job_options() -> None:
     if not ARG("mpi") and any({ARG("nodes") > 1, ARG("tasks_per_node") > 1}):
         raise MFCException("RUN: Cannot run on more than one rank with --no-mpi.")
diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 0a7067de2c..aa0baf21cd 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -119,7 +119,7 @@ def test():
     # Some cases require a specific build of MFC for features like Chemistry,
     # Analytically defined patches, and --case-optimization. Here, we build all
     # the unique versions of MFC we need to run cases.
-    if ARG("rdma_mpi") == True:
+    if ARG("rdma_mpi") and ARG("dry_run"):
         ARGS()["test_all"] = False
 
     codes = [PRE_PROCESS, SIMULATION] + ([POST_PROCESS] if ARG('test_all') else [])

From fd71293b6ddb6f3e9bb22715db07f281c2eed569 Mon Sep 17 00:00:00 2001
From: mohdsaid497566 <mohdsaid497566@gmail.com>
Date: Thu, 10 Jul 2025 21:15:22 -0400
Subject: [PATCH 15/15] quick fix for rdma_mpi not recognized

---
 toolchain/mfc/test/test.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 4e202e01bd..5e605be84d 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -192,10 +192,7 @@ def _handle_case(case: TestCase, devices: typing.Set[int]):
         cons.print(f"  [bold magenta]{case.get_uuid()}[/bold magenta]     SKIP     {case.trace}")
         return
 
-    if "RDMA MPI" in case.trace:
-        cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices, rdma_mpi=True)
-    else:
-        cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices)
+    cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices)
 
     out_filepath = os.path.join(case.get_dirpath(), "out_pre_sim.txt")