Skip to content

Commit 3ea5186

Browse files
committed
Group test_starccm and openfoam, and improve logging message output
- Move duplicated codes to common file - Modify two tests' config files to make sure they are the same and can be used by both tests - Add dependencies.install.sh in openfoam testdir as well to avoid failures - Create a new shared_performance_test_cluster fixture, to group two tests in the same stack - Now, if openfoam and starccm are not installed, it will not log errors - Now, when percentage_difference is 0, will log matching baseline message
1 parent d756150 commit 3ea5186

File tree

7 files changed

+117
-64
lines changed

7 files changed

+117
-64
lines changed

tests/integration-tests/tests/performance_tests/common.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
PYTEST_PARAMETERIZE_VALUES = [(NUM_COMPUTE_NODES, 1)]
2828
TEST_RUNNER_SCRIPT = "/shared/assets/workloads/scale-test/run-scale-test.sh"
2929
ROUND_UP_FACTOR = 100_000_000
30+
PERF_TEST_DIFFERENCE_TOLERANCE = 3
3031

3132
METRICS = [
3233
dict(name="jobRunTime", unit="ms"),
@@ -222,3 +223,29 @@ def write_results_to_output_dir(
222223
paths["baseline"]["statistics.json"],
223224
paths[candidate_configuration]["statistics.json"],
224225
)
226+
227+
228+
def perf_test_difference(observed_value, baseline_value):
229+
percentage_difference = 100 * (observed_value - baseline_value) / baseline_value
230+
return percentage_difference
231+
232+
233+
def _log_output_performance_difference(node, performance_degradation, observed_value, baseline_value):
234+
percentage_difference = perf_test_difference(observed_value, baseline_value)
235+
if percentage_difference < 0:
236+
outcome = "improvement"
237+
elif percentage_difference == 0:
238+
outcome = "matching baseline"
239+
elif percentage_difference <= PERF_TEST_DIFFERENCE_TOLERANCE:
240+
outcome = "degradation (within tolerance)"
241+
else:
242+
outcome = "degradation (above tolerance)"
243+
performance_degradation[node] = {
244+
"baseline": baseline_value,
245+
"observed": observed_value,
246+
"percentage_difference": percentage_difference,
247+
}
248+
logging.info(
249+
f"Nodes: {node}, Baseline: {baseline_value} seconds, Observed: {observed_value} seconds, "
250+
f"Percentage difference: {percentage_difference}%, Outcome: {outcome}"
251+
)
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License").
4+
# You may not use this file except in compliance with the License.
5+
# A copy of the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "LICENSE.txt" file accompanying this file.
10+
# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied.
11+
# See the License for the specific language governing permissions and limitations under the License.
12+
13+
import logging
14+
15+
import boto3
16+
import pytest
17+
18+
OSS_REQUIRING_EXTRA_DEPS = ["alinux2023", "rhel8", "rocky8"]
19+
NUMBER_OF_NODES = [8, 16, 32]
20+
21+
22+
@pytest.fixture(scope="session")
23+
def shared_performance_test_cluster(
24+
vpc_stack, pcluster_config_reader, clusters_factory, test_datadir, s3_bucket_factory
25+
):
26+
27+
def _shared_performance_test_cluster(instance, os, region, scheduler):
28+
bucket_name = s3_bucket_factory()
29+
s3 = boto3.client("s3")
30+
s3.upload_file(str(test_datadir / "dependencies.install.sh"), bucket_name, "scripts/dependencies.install.sh")
31+
32+
cluster_config = pcluster_config_reader(
33+
bucket_name=bucket_name,
34+
install_extra_deps=os in OSS_REQUIRING_EXTRA_DEPS,
35+
number_of_nodes=max(NUMBER_OF_NODES),
36+
)
37+
cluster = clusters_factory(cluster_config)
38+
logging.info("Cluster Created")
39+
return cluster
40+
41+
return _shared_performance_test_cluster

tests/integration-tests/tests/performance_tests/test_openfoam.py

Lines changed: 7 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import pytest
55
from remote_command_executor import RemoteCommandExecutionError, RemoteCommandExecutor
66

7+
from tests.performance_tests.common import _log_output_performance_difference
8+
79
# timeout in seconds
810
OPENFOAM_INSTALLATION_TIMEOUT = 300
911
OPENFOAM_JOB_TIMEOUT = 5400 # Takes long time because during the first time, it's not only execute the job but also
@@ -17,20 +19,15 @@
1719
"rhel8": {8: 742, 16: 376, 32: 185}, # v3.6.0 just a placeholder, RHEL8 not supported
1820
"rocky8": {8: 742, 16: 376, 32: 185}, # v3.8.0 just a placeholder, Rocky8 not supported
1921
}
20-
PERF_TEST_DIFFERENCE_TOLERANCE = 3
21-
22-
23-
def perf_test_difference(observed_value, baseline_value):
24-
percentage_difference = 100 * (observed_value - baseline_value) / baseline_value
25-
return percentage_difference
2622

2723

2824
def openfoam_installed(headnode):
2925
cmd = '[ -d "/shared/SubspaceBenchmarks" ]'
3026
try:
31-
headnode.run_remote_command(cmd)
27+
headnode.run_remote_command(cmd, log_error=False)
3228
return True
3329
except RemoteCommandExecutionError:
30+
logging.info("OpenFOAM is not installed on the head node.")
3431
return False
3532

3633

@@ -61,13 +58,11 @@ def test_openfoam(
6158
region,
6259
scheduler,
6360
pcluster_config_reader,
64-
clusters_factory,
61+
shared_performance_test_cluster,
6562
number_of_nodes,
6663
test_datadir,
6764
):
68-
cluster_config = pcluster_config_reader(number_of_nodes=max(number_of_nodes))
69-
cluster = clusters_factory(cluster_config)
70-
logging.info("Cluster Created")
65+
cluster = shared_performance_test_cluster(instance, os, region, scheduler)
7166
remote_command_executor = RemoteCommandExecutor(cluster)
7267
if not openfoam_installed(remote_command_executor):
7368
logging.info("Installing OpenFOAM")
@@ -92,22 +87,7 @@ def test_openfoam(
9287
# Check results and log performance degradation
9388
for node, observed_value in zip(number_of_nodes, [observed_value_8, observed_value_16, observed_value_32]):
9489
baseline_value = BASELINE_CLUSTER_SIZE_ELAPSED_SECONDS[os][node]
95-
percentage_difference = perf_test_difference(observed_value, baseline_value)
96-
if percentage_difference < 0:
97-
outcome = "improvement"
98-
elif percentage_difference <= PERF_TEST_DIFFERENCE_TOLERANCE:
99-
outcome = "degradation (within tolerance)"
100-
else:
101-
outcome = "degradation (above tolerance)"
102-
performance_degradation[node] = {
103-
"baseline": baseline_value,
104-
"observed": observed_value,
105-
"percentage_difference": percentage_difference,
106-
}
107-
logging.info(
108-
f"Nodes: {node}, Baseline: {baseline_value} seconds, Observed: {observed_value} seconds, "
109-
f"Percentage difference: {percentage_difference}%, Outcome: {outcome}"
110-
)
90+
_log_output_performance_difference(node, performance_degradation, observed_value, baseline_value)
11191

11292
if performance_degradation:
11393
pytest.fail(f"Performance degradation detected: {performance_degradation}")
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#!/bin/bash
2+
# This script installs the necessary software stack for StarCCM+.
3+
# Note: The same cluster is shared by both test_openfoam and test_starccm.
4+
# The cluster will be created by whichever test (test_openfoam or test_starccm) is executed first.
5+
# If test_openfoam is executed first, it will also need to install the required dependencies.
6+
set -ex
7+
8+
sudo yum install -y libnsl

tests/integration-tests/tests/performance_tests/test_openfoam/test_openfoam/pcluster.config.yaml

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,23 @@ HeadNode:
1616
- BucketName: performance-tests-resources-for-parallelcluster
1717
KeyName: openfoam/*
1818
EnableWriteAccess: false
19+
- BucketName: performance-tests-resources-for-parallelcluster
20+
KeyName: starccm/*
21+
EnableWriteAccess: false
22+
{% if install_extra_deps %}
23+
- BucketName: {{ bucket_name }}
24+
KeyName: scripts/dependencies.install.sh
25+
EnableWriteAccess: false
26+
CustomActions:
27+
OnNodeConfigured:
28+
Script: s3://{{ bucket_name }}/scripts/dependencies.install.sh
29+
{% endif %}
1930
Scheduling:
2031
Scheduler: slurm
2132
SlurmQueues:
2233
- Name: q1
2334
ComputeResources:
24-
- Name: c5n18xl-efa
35+
- Name: c5n-18xl-efa
2536
InstanceType: {{ instance }}
2637
MinCount: {{ number_of_nodes }}
2738
MaxCount: {{ number_of_nodes }}
@@ -37,6 +48,15 @@ Scheduling:
3748
Iam:
3849
AdditionalIamPolicies:
3950
- Policy: arn:{{partition}}:iam::aws:policy/AmazonSSMManagedInstanceCore # Required to report patching status
51+
{% if install_extra_deps %}
52+
S3Access:
53+
- BucketName: {{ bucket_name }}
54+
KeyName: scripts/dependencies.install.sh
55+
EnableWriteAccess: false
56+
CustomActions:
57+
OnNodeConfigured:
58+
Script: s3://{{ bucket_name }}/scripts/dependencies.install.sh
59+
{% endif %}
4060
SharedStorage:
4161
- MountDir: /shared
4262
Name: shared-fsx

tests/integration-tests/tests/performance_tests/test_starccm.py

Lines changed: 6 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from remote_command_executor import RemoteCommandExecutionError, RemoteCommandExecutor
88

99
from tests.common.utils import assert_no_file_handler_leak, get_compute_ip_to_num_files
10+
from tests.performance_tests.common import _log_output_performance_difference
1011

1112
# timeout in seconds
1213
STARCCM_INSTALLATION_TIMEOUT = 1800
@@ -22,7 +23,6 @@
2223
"rhel8": {8: 66.494, 16: 36.154, 32: 20.347}, # v3.6.0
2324
"rocky8": {8: 66.859, 16: 36.184, 32: 21.090}, # v3.8.0
2425
}
25-
PERF_TEST_DIFFERENCE_TOLERANCE = 3
2626

2727
OSS_REQUIRING_EXTRA_DEPS = ["alinux2023", "rhel8", "rocky8"]
2828

@@ -34,17 +34,13 @@ def get_starccm_secrets(region_name):
3434
return secrets["podkey"], secrets["licpath"]
3535

3636

37-
def perf_test_difference(observed_value, baseline_value):
38-
percentage_difference = 100 * (observed_value - baseline_value) / baseline_value
39-
return percentage_difference
40-
41-
4237
def starccm_installed(headnode):
4338
cmd = "/shared/STAR-CCM+/18.02.008/STAR-CCM+18.02.008/star/bin/starccm+ --version"
4439
try:
45-
headnode.run_remote_command(cmd)
40+
headnode.run_remote_command(cmd, log_error=False)
4641
return True
4742
except RemoteCommandExecutionError:
43+
logging.info("STAR-CCM+ is not installed on the head node.")
4844
return False
4945

5046

@@ -76,24 +72,13 @@ def test_starccm(
7672
region,
7773
scheduler,
7874
pcluster_config_reader,
79-
clusters_factory,
75+
shared_performance_test_cluster,
8076
number_of_nodes,
8177
test_datadir,
8278
scheduler_commands_factory,
8379
s3_bucket_factory,
8480
):
85-
# Create S3 bucket for custom actions scripts
86-
bucket_name = s3_bucket_factory()
87-
s3 = boto3.client("s3")
88-
s3.upload_file(str(test_datadir / "dependencies.install.sh"), bucket_name, "scripts/dependencies.install.sh")
89-
90-
cluster_config = pcluster_config_reader(
91-
bucket_name=bucket_name,
92-
install_extra_deps=os in OSS_REQUIRING_EXTRA_DEPS,
93-
number_of_nodes=max(number_of_nodes),
94-
)
95-
cluster = clusters_factory(cluster_config)
96-
logging.info("Cluster Created")
81+
cluster = shared_performance_test_cluster(instance, os, region, scheduler)
9782
remote_command_executor = RemoteCommandExecutor(cluster)
9883
scheduler_commands = scheduler_commands_factory(remote_command_executor)
9984
init_num_files = get_compute_ip_to_num_files(remote_command_executor, scheduler_commands)
@@ -126,22 +111,7 @@ def test_starccm(
126111
# Check results and log performance degradation
127112
for node, observed_value in zip(number_of_nodes, [observed_value_8, observed_value_16, observed_value_32]):
128113
baseline_value = BASELINE_CLUSTER_SIZE_ELAPSED_SECONDS[os][node]
129-
percentage_difference = perf_test_difference(observed_value, baseline_value)
130-
if percentage_difference < 0:
131-
outcome = "improvement"
132-
elif percentage_difference <= PERF_TEST_DIFFERENCE_TOLERANCE:
133-
outcome = "degradation (within tolerance)"
134-
else:
135-
outcome = "degradation (above tolerance)"
136-
performance_degradation[node] = {
137-
"baseline": baseline_value,
138-
"observed": observed_value,
139-
"percentage_difference": percentage_difference,
140-
}
141-
logging.info(
142-
f"Nodes: {node}, Baseline: {baseline_value} seconds, Observed: {observed_value} seconds, "
143-
f"Percentage difference: {percentage_difference}%, Outcome: {outcome}"
144-
)
114+
_log_output_performance_difference(node, performance_degradation, observed_value, baseline_value)
145115

146116
assert_no_file_handler_leak(init_num_files, remote_command_executor, scheduler_commands)
147117

tests/integration-tests/tests/performance_tests/test_starccm/test_starccm/pcluster.config.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
Region: {{ region }}
22
Image:
33
Os: {{ os }}
4+
Imds:
5+
ImdsSupport: v2.0
46
HeadNode:
57
InstanceType: {{ instance }}
68
Networking:
@@ -11,6 +13,9 @@ HeadNode:
1113
AdditionalIamPolicies:
1214
- Policy: arn:{{partition}}:iam::aws:policy/AmazonSSMManagedInstanceCore #Required to report patching status
1315
S3Access:
16+
- BucketName: performance-tests-resources-for-parallelcluster
17+
KeyName: openfoam/*
18+
EnableWriteAccess: false
1419
- BucketName: performance-tests-resources-for-parallelcluster
1520
KeyName: starccm/*
1621
EnableWriteAccess: false
@@ -59,5 +64,7 @@ SharedStorage:
5964
FsxLustreSettings:
6065
StorageCapacity: 2400
6166
DeploymentType: PERSISTENT_1
67+
AutomaticBackupRetentionDays: 30
68+
DailyAutomaticBackupStartTime: 00:00
6269
PerUnitStorageThroughput: 100
6370
StorageType: SSD

0 commit comments

Comments
 (0)