Skip to content

Commit b0f1094

Browse files
[Test] Run StarCCM test multiple times and use average of the result to reduce noise of individual runs
1 parent 6b9abc7 commit b0f1094

File tree

1 file changed

+21
-26
lines changed

1 file changed

+21
-26
lines changed

tests/integration-tests/tests/performance_tests/test_starccm.py

Lines changed: 21 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -99,35 +99,30 @@ def test_starccm(
9999

100100
# Copy additional files in advanced to avoid conflict when running 8 and 16 nodes tests in parallel
101101
remote_command_executor._copy_additional_files([str(test_datadir / "starccm.slurm.sh")])
102-
# Run 8 and 16 node tests in parallel
103-
result_8 = remote_command_executor.run_remote_command(
104-
f'sbatch --ntasks={number_of_nodes[0] * TASK_VCPUS} starccm.slurm.sh "{podkey}" "{licpath}"'
105-
)
106-
logging.info(f"Submitting StarCCM+ job with {number_of_nodes[0]} nodes")
107-
result_16 = remote_command_executor.run_remote_command(
108-
f'sbatch --ntasks={number_of_nodes[1] * TASK_VCPUS} starccm.slurm.sh "{podkey}" "{licpath}"'
109-
)
110-
logging.info(f"Submitting StarCCM+ job with {number_of_nodes[1]} nodes")
111-
observed_value_8 = calculate_observed_value(
112-
result_8, remote_command_executor, scheduler_commands, test_datadir, number_of_nodes[0]
113-
)
114-
observed_value_16 = calculate_observed_value(
115-
result_16, remote_command_executor, scheduler_commands, test_datadir, number_of_nodes[1]
116-
)
117102

118-
# Run 32 node test
119-
result_32 = remote_command_executor.run_remote_command(
120-
f'sbatch --ntasks={number_of_nodes[2] * TASK_VCPUS} starccm.slurm.sh "{podkey}" "{licpath}"'
121-
)
122-
logging.info(f"Submitting StarCCM+ job with {number_of_nodes[2]} nodes")
123-
observed_value_32 = calculate_observed_value(
124-
result_32, remote_command_executor, scheduler_commands, test_datadir, number_of_nodes[2]
125-
)
103+
max_node_num = max(number_of_nodes)
104+
final_result = []
105+
for num_of_nodes in number_of_nodes:
106+
parallelism = int(max_node_num / num_of_nodes)
107+
result = []
108+
logging.info(f"Submitting StarCCM+ job with {num_of_nodes} nodes")
109+
run_command = f'sbatch --ntasks={num_of_nodes * TASK_VCPUS} starccm.slurm.sh "{podkey}" "{licpath}"'
110+
multiple_runs = []
111+
# Run at least twice up to whatever parallelism allows to maximize usage of available nodes
112+
number_of_runs = max(parallelism, 2)
113+
for _ in range(number_of_runs):
114+
multiple_runs.append(remote_command_executor.run_remote_command(run_command))
115+
for run in multiple_runs:
116+
result.append(
117+
calculate_observed_value(run, remote_command_executor, scheduler_commands, test_datadir, num_of_nodes)
118+
)
119+
final_result.append((num_of_nodes, sum(result) / len(result))) # Use average to reduce noise of each runs.
120+
logging.info(f"Finished StarCCM+ job with {num_of_nodes} nodes")
121+
122+
push_result_to_dynamodb("StarCCM", final_result, instance, os)
126123

127124
# Check results and log performance degradation
128-
result = list(zip(number_of_nodes, [observed_value_8, observed_value_16, observed_value_32]))
129-
push_result_to_dynamodb("StarCCM", result, instance, os)
130-
for node, observed_value in result:
125+
for node, observed_value in final_result:
131126
baseline_value = BASELINE_CLUSTER_SIZE_ELAPSED_SECONDS[os][node]
132127
_log_output_performance_difference(node, performance_degradation, observed_value, baseline_value)
133128

0 commit comments

Comments
 (0)