From b55a5439eaf22a8e5eaa2c035bd6725c688d38c9 Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Wed, 26 Mar 2025 15:32:11 +0530 Subject: [PATCH 1/2] CI: run POT3D validation with MPI rank 4 as well --- tests/pot3d/validate.sh | 49 +++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/tests/pot3d/validate.sh b/tests/pot3d/validate.sh index c8f10a4..0f463f4 100755 --- a/tests/pot3d/validate.sh +++ b/tests/pot3d/validate.sh @@ -7,36 +7,23 @@ MPIEXEC=${CONDA_PREFIX}/bin/mpiexec cp ${POT3D_HOME}/testsuite/${TEST}/input/* ${POT3D_HOME}/testsuite/${TEST}/run/ cd ${POT3D_HOME}/testsuite/${TEST}/run -echo "Running POT3D with 1 MPI rank..." -${MPIEXEC} -np 1 ${POT3D_HOME}/bin/pot3d 1> pot3d.log 2>pot3d.err -echo "Done!" - -runtime=($(tail -n 5 timing.out | head -n 1)) -echo "Wall clock time: ${runtime[6]} seconds" -echo " " - -#Validate run: -${POT3D_HOME}/scripts/pot3d_validation.sh pot3d.out ${POT3D_HOME}/testsuite/${TEST}/validation/pot3d.out -if [ $? -ne 0 ]; then - echo "Validation failed for 1 MPI rank. Exiting..." - exit 1 -fi +for np in 1 2 4; do + rm -rf pot3d.log pot3d.out timing.out + echo "Running POT3D with $np MPI rank..." + ${MPIEXEC} -np ${np} ${POT3D_HOME}/bin/pot3d 1> pot3d.log 2>pot3d.err + echo "Done!" + + runtime=($(tail -n 5 timing.out | head -n 1)) + echo "Wall clock time: ${runtime[6]} seconds" + echo " " + + #Validate run: + ${POT3D_HOME}/scripts/pot3d_validation.sh pot3d.out ${POT3D_HOME}/testsuite/${TEST}/validation/pot3d.out + if [ $? -ne 0 ]; then + echo "Validation failed for $np MPI rank. Exiting..." + exit 1 + fi + echo "" +done -rm pot3d.log pot3d.out timing.out -echo " " -echo "Running POT3D with 2 MPI ranks..." -${MPIEXEC} -np 2 ${POT3D_HOME}/bin/pot3d 1> pot3d.log 2>pot3d.err echo "Done!" - -runtime=($(tail -n 5 timing.out | head -n 1)) -echo "Wall clock time: ${runtime[6]} seconds" -echo " " - -#Validate run: -${POT3D_HOME}/scripts/pot3d_validation.sh pot3d.out ${POT3D_HOME}/testsuite/${TEST}/validation/pot3d.out -if [ $? -ne 0 ]; then - echo "Validation failed for 2 MPI rank. Exiting..." - exit 1 -fi -echo " " - From 19be2b431d3ba133d7eb0415d6759f7f582f3ebf Mon Sep 17 00:00:00 2001 From: Gaurav Dhingra Date: Wed, 26 Mar 2025 15:49:24 +0530 Subject: [PATCH 2/2] use `--oversubscribe` with Open MPI when running with MPI rank 4 --- tests/pot3d/validate.sh | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tests/pot3d/validate.sh b/tests/pot3d/validate.sh index 0f463f4..7dfe013 100755 --- a/tests/pot3d/validate.sh +++ b/tests/pot3d/validate.sh @@ -7,10 +7,34 @@ MPIEXEC=${CONDA_PREFIX}/bin/mpiexec cp ${POT3D_HOME}/testsuite/${TEST}/input/* ${POT3D_HOME}/testsuite/${TEST}/run/ cd ${POT3D_HOME}/testsuite/${TEST}/run +# detect MPI implementation +MPI_VERSION=$($MPIEXEC --version 2>&1) +if echo "$MPI_VERSION" | grep -q "Open MPI"; then + MPI_TYPE="openmpi" +elif echo "$MPI_VERSION" | grep -q "MPICH"; then + MPI_TYPE="mpich" +else + # we don't yet support other MPI implementations + # like intel MPI etc.0 + echo -e "${RED}Unknown MPI implementation!${NC}" + exit 1 +fi + for np in 1 2 4; do rm -rf pot3d.log pot3d.out timing.out + + # set MPIEXEC_ARGS based on MPI type and number of ranks + # `--oversubscribe` isn't needed with Open MPI when running + # with 1 or 2 ranks + if [[ "$MPI_TYPE" == "openmpi" && $np -gt 2 ]]; then + MPIEXEC_ARGS="--oversubscribe" + else + MPIEXEC_ARGS="" + fi + echo "Running POT3D with $np MPI rank..." - ${MPIEXEC} -np ${np} ${POT3D_HOME}/bin/pot3d 1> pot3d.log 2>pot3d.err + + ${MPIEXEC} -np ${np} ${MPIEXEC_ARGS} ${POT3D_HOME}/bin/pot3d 1> pot3d.log 2>pot3d.err echo "Done!" runtime=($(tail -n 5 timing.out | head -n 1))