From 8e9eb464838123b1cad812e2b286f70d414c8f73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9renger=20Berthoul?= Date: Mon, 24 Mar 2025 17:32:48 +0100 Subject: [PATCH] Shell and SLURM scheduler: give an absolute error log file. Shell: test if MPI works with sub-processes. Added exception.py --- pytest_parallel/exception.py | 18 ++++++++++++++ pytest_parallel/plugin.py | 29 +++++++++++------------ pytest_parallel/shell_static_scheduler.py | 11 +++++++++ pytest_parallel/slurm_scheduler.py | 2 ++ 4 files changed, 45 insertions(+), 15 deletions(-) create mode 100644 pytest_parallel/exception.py diff --git a/pytest_parallel/exception.py b/pytest_parallel/exception.py new file mode 100644 index 0000000..7f9ad1f --- /dev/null +++ b/pytest_parallel/exception.py @@ -0,0 +1,18 @@ + +def _to_bold_red(s): + red = '\x1b[31m' + bold = '\x1b[1m' + reset = '\x1b[0m' + return red + bold + s + reset + +class PytestParallelInternalError(Exception): + def __init__(self, msg): + Exception.__init__(self, _to_bold_red('pytest_parallel internal error')+'\n' + msg) + +class PytestParallelUsageError(Exception): + def __init__(self, msg): + Exception.__init__(self, _to_bold_red('You are calling pytest_parallel incorrectly')+'\n' + msg) + +class PytestParallelEnvError(Exception): + def __init__(self, msg): + Exception.__init__(self, _to_bold_red('pytest_parallel environment error:')+'\n' + msg) diff --git a/pytest_parallel/plugin.py b/pytest_parallel/plugin.py index 2dca271..08ac9cf 100644 --- a/pytest_parallel/plugin.py +++ b/pytest_parallel/plugin.py @@ -11,8 +11,7 @@ import pytest from _pytest.terminal import TerminalReporter -class PytestParallelError(ValueError): - pass +from .exception import PytestParallelUsageError, PytestParallelInternalError # -------------------------------------------------------------------------- def pytest_addoption(parser): @@ -58,13 +57,13 @@ def pytest_addoption(parser): ' (because importing mpi4py.MPI makes the current process look like and MPI process,' \ ' and SLURM does not like that)' if os.getenv('I_MPI_MPIRUN') is not None: - err_msg = 'Internal pytest_parallel error: the environment variable I_MPI_MPIRUN is set' \ + err_msg = 'The environment variable I_MPI_MPIRUN is set' \ f' (it has value "{os.getenv("I_MPI_MPIRUN")}"),\n' \ ' while pytest was invoked with "--scheduler=slurm".\n' \ ' This indicates that pytest was run through MPI, and SLURM generally does not like that.\n' \ ' With "--scheduler=slurm", just run `pytest` directly, not through `mpirun/mpiexec/srun`,\n' \ ' because it will launch MPI itself (you may want to use --n-workers=).' - raise PytestParallelError(err_msg) + raise PytestParallelInternalError(err_msg) r = subprocess.run(['env','--null'], stdout=subprocess.PIPE) # `--null`: end each output line with NUL, required by `sbatch --export-file` @@ -109,16 +108,16 @@ def pytest_configure(config): assert not is_worker, f'Internal pytest_parallel error `--_worker` not available with`--scheduler={scheduler}`' if scheduler in ['slurm', 'shell'] and not is_worker: if n_workers is None: - raise PytestParallelError(f'You need to specify `--n-workers` when `--scheduler={scheduler}`') + raise PytestParallelUsageError(f'You need to specify `--n-workers` when `--scheduler={scheduler}`') if scheduler != 'slurm': if slurm_options is not None: - raise PytestParallelError('Option `--slurm-options` only available when `--scheduler=slurm`') + raise PytestParallelUsageError('Option `--slurm-options` only available when `--scheduler=slurm`') if slurm_srun_options is not None: - raise PytestParallelError('Option `--slurms-run-options` only available when `--scheduler=slurm`') + raise PytestParallelUsageError('Option `--slurms-run-options` only available when `--scheduler=slurm`') if slurm_init_cmds is not None: - raise PytestParallelError('Option `--slurm-init-cmds` only available when `--scheduler=slurm`') + raise PytestParallelUsageError('Option `--slurm-init-cmds` only available when `--scheduler=slurm`') if slurm_file is not None: - raise PytestParallelError('Option `--slurm-file` only available when `--scheduler=slurm`') + raise PytestParallelUsageError('Option `--slurm-file` only available when `--scheduler=slurm`') if scheduler in ['shell', 'slurm'] and not is_worker: from mpi4py import MPI @@ -126,24 +125,24 @@ def pytest_configure(config): err_msg = 'Do not launch `pytest_parallel` on more that one process when `--scheduler=shell` or `--scheduler=slurm`.\n' \ '`pytest_parallel` will spawn MPI processes itself.\n' \ f'You may want to use --n-workers={MPI.COMM_WORLD.size}.' - raise PytestParallelError(err_msg) + raise PytestParallelUsageError(err_msg) if scheduler == 'slurm' and not is_worker: if slurm_options is None and slurm_file is None: - raise PytestParallelError('You need to specify either `--slurm-options` or `--slurm-file` when `--scheduler=slurm`') + raise PytestParallelUsageError('You need to specify either `--slurm-options` or `--slurm-file` when `--scheduler=slurm`') if slurm_options: if slurm_file: - raise PytestParallelError('You need to specify either `--slurm-options` or `--slurm-file`, but not both') + raise PytestParallelUsageError('You need to specify either `--slurm-options` or `--slurm-file`, but not both') if slurm_file: if slurm_options: - raise PytestParallelError('You need to specify either `--slurm-options` or `--slurm-file`, but not both') + raise PytestParallelUsageError('You need to specify either `--slurm-options` or `--slurm-file`, but not both') if slurm_init_cmds: - raise PytestParallelError('You cannot specify `--slurm-init-cmds` together with `--slurm-file`') + raise PytestParallelUsageError('You cannot specify `--slurm-init-cmds` together with `--slurm-file`') if '-n=' in slurm_options or '--ntasks=' in slurm_options: - raise PytestParallelError('Do not specify `-n/--ntasks` in `--slurm-options` (it is deduced from the `--n-worker` value).') + raise PytestParallelUsageError('Do not specify `-n/--ntasks` in `--slurm-options` (it is deduced from the `--n-worker` value).') from .slurm_scheduler import SlurmScheduler diff --git a/pytest_parallel/shell_static_scheduler.py b/pytest_parallel/shell_static_scheduler.py index bc8b4a3..7069a3a 100644 --- a/pytest_parallel/shell_static_scheduler.py +++ b/pytest_parallel/shell_static_scheduler.py @@ -3,6 +3,7 @@ import subprocess import socket import pickle +from pathlib import Path import pytest from mpi4py import MPI @@ -12,6 +13,7 @@ from .utils.items import add_n_procs, run_item_test, mark_original_index, mark_skip from .utils.file import remove_exotic_chars, create_folders from .static_scheduler_utils import group_items_by_parallel_steps +from .exception import PytestParallelEnvError def mpi_command(current_proc, n_proc): mpi_vendor = MPI.get_vendor()[0] @@ -39,6 +41,7 @@ def submit_items(items_to_run, SCHEDULER_IP_ADDRESS, port, session_folder, main_ for item in items: test_idx = item.original_index test_out_file = f'.pytest_parallel/{session_folder}/{remove_exotic_chars(item.nodeid)}' + test_out_file = str(Path(test_out_file).absolute()) # easier to find the file if absolute cmd = '(' cmd += mpi_command(current_proc, item.n_proc) cmd += f' python3 -u -m pytest -s --_worker {socket_flags} {main_invoke_params} --_test_idx={test_idx} {item.config.rootpath}/{item.nodeid}' @@ -106,6 +109,14 @@ def __init__(self, main_invoke_params, ntasks, detach): self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # TODO close at the end + # Check that MPI can be called in a subprocess (not the case with OpenMPI 4.0.5, see #17) + p = subprocess.run('mpirun -np 1 echo mpi_can_be_called_from_subprocess', shell=True) + if p.returncode != 0: + raise PytestParallelEnvError( + "Your MPI implementation does not handle MPI being called from a sub-process\n" + "Either update your MPI version or use another scheduler. See https://github.com/onera/pytest_parallel/issues/17" + ) + @pytest.hookimpl(tryfirst=True) def pytest_pyfunc_call(self, pyfuncitem): # This is where the test is normally run. diff --git a/pytest_parallel/slurm_scheduler.py b/pytest_parallel/slurm_scheduler.py index 73964e5..15b94a3 100644 --- a/pytest_parallel/slurm_scheduler.py +++ b/pytest_parallel/slurm_scheduler.py @@ -1,6 +1,7 @@ import subprocess import socket import pickle +from pathlib import Path import pytest @@ -44,6 +45,7 @@ def submit_items(items_to_run, socket, session_folder, main_invoke_params, ntask for item in items: test_idx = item.original_index test_out_file = f'.pytest_parallel/{session_folder}/{remove_exotic_chars(item.nodeid)}' + test_out_file = str(Path(test_out_file).absolute()) # easier to find the file if absolute cmd = '(' cmd += f'srun {srun_options}' cmd += ' --exclusive'