From 91cb8e7f04db11896ed2e26127446c5b39c353c6 Mon Sep 17 00:00:00 2001 From: Giacomo Marciani Date: Thu, 20 Mar 2025 15:21:35 -0400 Subject: [PATCH 1/2] [Test] Reduce noise from metadata reporting, moving logs from info to debug and fixing error message. --- tests/integration-tests/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration-tests/conftest.py b/tests/integration-tests/conftest.py index 1fee1f7e46..42a25fb19f 100644 --- a/tests/integration-tests/conftest.py +++ b/tests/integration-tests/conftest.py @@ -330,11 +330,11 @@ def pytest_collection_finish(session): region = [unmarshal_az_override(az) for az in region] # Use the first element of the list of regions, since there must be at least one reporting_region = get_reporting_region(region[0]) - logging.info(f"Metadata reporting region {reporting_region}") + logging.debug(f"Metadata reporting region {reporting_region}") # Setup the metadata table in case it doesn't exist MetadataTableManager(reporting_region, METADATA_TABLE).create_metadata_table() except Exception as exc: - logging.info(f"There was a '{type(exc)}' error with '{exc}' when creating the table!") + logging.error(f"There was a '{type(exc)}' error with '{exc}' when creating the table!") def _log_collected_tests(session): From 01e9fa7b2bdb729e0acd4b95aa75b81a52b1d51f Mon Sep 17 00:00:00 2001 From: Giacomo Marciani Date: Thu, 20 Mar 2025 12:34:24 -0400 Subject: [PATCH 2/2] [Test] In test_scontrol_reboot, increase the timeout to wait for compute nodes to reboot from 400s to 500s in AL23, Rocky and RHEL as we observed an increase in bootstrap time. --- tests/integration-tests/tests/schedulers/test_slurm.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/integration-tests/tests/schedulers/test_slurm.py b/tests/integration-tests/tests/schedulers/test_slurm.py index 41c2ed9ba0..cc9fdf9a8a 100644 --- a/tests/integration-tests/tests/schedulers/test_slurm.py +++ b/tests/integration-tests/tests/schedulers/test_slurm.py @@ -782,6 +782,7 @@ def test_scontrol_reboot( clusters_factory, test_datadir, scheduler_commands_factory, + os, ): cluster_config = pcluster_config_reader() cluster = clusters_factory(cluster_config) @@ -798,11 +799,14 @@ def test_scontrol_reboot( slots=2, constraint="dynamic", ) + # TOFIX We observe in 3.13.0 an increase in the bootstrap time for AL2023, Rocky and RHEL. + # We must address it and restore the default wait time to 400s. + stop_max_delay_secs = 500 if (os == "alinux2023" or os.startswith("rocky") or os.startswith("rhel")) else 400 wait_for_compute_nodes_states( slurm_commands, ["queue1-dy-cr1-1", "queue1-dy-cr1-2"], "idle", - stop_max_delay_secs=400, + stop_max_delay_secs=stop_max_delay_secs, ) # Test that idle static and dynamic nodes can be rebooted