From a3cc362cafd3b99f8517ba69c174ae8922e19c9a Mon Sep 17 00:00:00 2001 From: hanwenli Date: Wed, 11 Jun 2025 08:46:00 -0700 Subject: [PATCH 1/2] Do not error exit when capacity reservation from the existing cluster config has been deleted --- cli/src/pcluster/config/cluster_config.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cli/src/pcluster/config/cluster_config.py b/cli/src/pcluster/config/cluster_config.py index eb631adf2f..32f54fb2f3 100644 --- a/cli/src/pcluster/config/cluster_config.py +++ b/cli/src/pcluster/config/cluster_config.py @@ -2446,9 +2446,13 @@ def _instance_type_from_capacity_reservation(self): self.capacity_reservation_target.capacity_reservation_id if self.capacity_reservation_target else None ) if capacity_reservation_id: - capacity_reservations = AWSApi.instance().ec2.describe_capacity_reservations([capacity_reservation_id]) - if capacity_reservations: - instance_type = capacity_reservations[0].instance_type() + try: + capacity_reservations = AWSApi.instance().ec2.describe_capacity_reservations([capacity_reservation_id]) + if capacity_reservations: + instance_type = capacity_reservations[0].instance_type() + except AWSClientError: + # In case the CR has expired and we are unable to retrieve the instance type + instance_type = None return instance_type From ae87438bd581293402ee62528f61da577be4db0f Mon Sep 17 00:00:00 2001 From: hanwenli Date: Wed, 11 Jun 2025 12:02:08 -0700 Subject: [PATCH 2/2] Improve integration test to test capacity reservation update without instance type specified --- .../test_on_demand_capacity_reservation.py | 62 +++++++- .../pcluster.config.update.yaml | 143 ++++++++++++++++++ .../pcluster.config.yaml | 1 - 3 files changed, 203 insertions(+), 3 deletions(-) create mode 100644 tests/integration-tests/tests/capacity_reservations/test_on_demand_capacity_reservation/test_on_demand_capacity_reservation/pcluster.config.update.yaml diff --git a/tests/integration-tests/tests/capacity_reservations/test_on_demand_capacity_reservation.py b/tests/integration-tests/tests/capacity_reservations/test_on_demand_capacity_reservation.py index b732ffd19a..179d26dc66 100644 --- a/tests/integration-tests/tests/capacity_reservations/test_on_demand_capacity_reservation.py +++ b/tests/integration-tests/tests/capacity_reservations/test_on_demand_capacity_reservation.py @@ -10,11 +10,13 @@ # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging +import os +import subprocess import boto3 import pytest from assertpy import assert_that -from utils import describe_cluster_instances, retrieve_cfn_resources +from utils import describe_cluster_instances, retrieve_cfn_resources, wait_for_computefleet_changed @pytest.mark.usefixtures("os", "region") @@ -40,7 +42,50 @@ def test_on_demand_capacity_reservation( pg_capacity_reservation_id=odcr_resources["integTestsPgOdcr"], pg_capacity_reservation_arn=resource_group_arn, ) - cluster = clusters_factory(cluster_config) + + # Apply patch to the repo + logging.info("Applying patch to the repository") + repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../..")) + s3_bucket_file = os.path.join(repo_root, "cli/src/pcluster/models/s3_bucket.py") + + # Backup the original file + with open(s3_bucket_file, "r") as f: + original_content = f.read() + + try: + # Apply the patch - inject the bug that replaces capacity reservation IDs + with open(s3_bucket_file, "r") as f: + content = f.read() + + # Add the bug injection line after the upload_config method definition + modified_content = content.replace( + " def upload_config(self, config, config_name, format=S3FileFormat.YAML):\n" + ' """Upload config file to S3 bucket."""', + " def upload_config(self, config, config_name, format=S3FileFormat.YAML):\n" + ' """Upload config file to S3 bucket."""\n' + ' if config_name == "cluster-config.yaml":\n' + " config = re.sub(r'cr-[0-9a-f]{17}', 'cr-11111111111111111', config)", + ) + + # Write the modified content back + with open(s3_bucket_file, "w") as f: + f.write(modified_content) + + # Install the CLI + logging.info("Installing CLI from local repository") + subprocess.run(["pip", "install", "./cli"], cwd=repo_root, check=True) + + # Create the cluster + cluster = clusters_factory(cluster_config) + finally: + # Revert the patch by restoring the original file + logging.info("Reverting patch from the repository") + with open(s3_bucket_file, "w") as f: + f.write(original_content) + + # Reinstall the CLI + logging.info("Reinstalling CLI from local repository") + subprocess.run(["pip", "install", "./cli"], cwd=repo_root, check=True) _assert_instance_in_capacity_reservation(cluster, region, "open-odcr-id-cr", odcr_resources["integTestsOpenOdcr"]) _assert_instance_in_capacity_reservation(cluster, region, "open-odcr-arn-cr", odcr_resources["integTestsOpenOdcr"]) @@ -64,6 +109,19 @@ def test_on_demand_capacity_reservation( ) _assert_instance_in_capacity_reservation(cluster, region, "pg-odcr-id-cr", odcr_resources["integTestsPgOdcr"]) _assert_instance_in_capacity_reservation(cluster, region, "pg-odcr-arn-cr", odcr_resources["integTestsPgOdcr"]) + cluster.stop() + wait_for_computefleet_changed(cluster, "STOPPED") + updated_config_file = pcluster_config_reader( + config_file="pcluster.config.update.yaml", + placement_group=placement_group_stack.cfn_resources["PlacementGroup"], + open_capacity_reservation_id=odcr_resources["integTestsOpenOdcr"], + open_capacity_reservation_arn=resource_group_arn, + target_capacity_reservation_id=odcr_resources["integTestsTargetOdcr"], + target_capacity_reservation_arn=resource_group_arn, + pg_capacity_reservation_id=odcr_resources["integTestsPgOdcr"], + pg_capacity_reservation_arn=resource_group_arn, + ) + cluster.update(str(updated_config_file)) def _assert_instance_in_capacity_reservation(cluster, region, compute_resource_name, expected_reservation): diff --git a/tests/integration-tests/tests/capacity_reservations/test_on_demand_capacity_reservation/test_on_demand_capacity_reservation/pcluster.config.update.yaml b/tests/integration-tests/tests/capacity_reservations/test_on_demand_capacity_reservation/test_on_demand_capacity_reservation/pcluster.config.update.yaml new file mode 100644 index 0000000000..3b5112eb55 --- /dev/null +++ b/tests/integration-tests/tests/capacity_reservations/test_on_demand_capacity_reservation/test_on_demand_capacity_reservation/pcluster.config.update.yaml @@ -0,0 +1,143 @@ +Image: + Os: {{ os }} +HeadNode: + InstanceType: r5.xlarge + Networking: + SubnetId: {{ public_subnet_id }} + Ssh: + KeyName: {{ key_name }} +Scheduling: + Scheduler: slurm + SlurmQueues: + - Name: open-odcr-q + ComputeResources: + - Name: open-odcr-id-cr + InstanceType: m5.2xlarge + MinCount: 0 + MaxCount: 1 + CapacityReservationTarget: + CapacityReservationId: {{ open_capacity_reservation_id }} + - Name: open-odcr-arn-cr + InstanceType: m5.2xlarge + MinCount: 0 + MaxCount: 1 + CapacityReservationTarget: + CapacityReservationResourceGroupArn: {{ open_capacity_reservation_arn }} + - Name: open-odcr-arn-fl-cr + Instances: + - InstanceType: m5.2xlarge + MinCount: 0 + MaxCount: 1 + CapacityReservationTarget: + CapacityReservationResourceGroupArn: {{ open_capacity_reservation_arn }} + - Name: open-odcr-id-pg-cr + InstanceType: m5.2xlarge + MinCount: 0 + MaxCount: 1 + Networking: + PlacementGroup: + Enabled: true + CapacityReservationTarget: + CapacityReservationId: {{ open_capacity_reservation_id }} + - Name: open-odcr-arn-pg-cr + InstanceType: m5.2xlarge + MinCount: 0 + MaxCount: 1 + Networking: + PlacementGroup: + Enabled: true + CapacityReservationTarget: + CapacityReservationResourceGroupArn: {{ open_capacity_reservation_arn }} + - Name: open-odcr-arn-pg-fl-cr + Instances: + - InstanceType: m5.2xlarge + MinCount: 0 + MaxCount: 1 + Networking: + PlacementGroup: + Enabled: true + CapacityReservationTarget: + CapacityReservationResourceGroupArn: {{ open_capacity_reservation_arn }} + Networking: + SubnetIds: + - {{ public_subnet_id }} + - Name: target-odcr-q + ComputeResources: + - Name: target-odcr-id-cr + InstanceType: r5.xlarge + MinCount: 0 + MaxCount: 1 + CapacityReservationTarget: + CapacityReservationId: {{ target_capacity_reservation_id }} + - Name: target-odcr-arn-cr + InstanceType: r5.xlarge + MinCount: 0 + MaxCount: 1 + CapacityReservationTarget: + CapacityReservationResourceGroupArn: {{ target_capacity_reservation_arn }} + - Name: target-odcr-arn-fl-cr + Instances: + - InstanceType: r5.xlarge + MinCount: 0 + MaxCount: 1 + CapacityReservationTarget: + CapacityReservationResourceGroupArn: {{ target_capacity_reservation_arn }} + - Name: target-odcr-id-pg-cr + InstanceType: r5.xlarge + MinCount: 0 + MaxCount: 1 + Networking: + PlacementGroup: + Enabled: true + CapacityReservationTarget: + CapacityReservationId: {{ target_capacity_reservation_id }} + - Name: target-odcr-arn-pg-cr + InstanceType: r5.xlarge + MinCount: 0 + MaxCount: 1 + CapacityReservationTarget: + CapacityReservationResourceGroupArn: {{ target_capacity_reservation_arn }} + - Name: target-odcr-arn-pg-fl-cr + Instances: + - InstanceType: r5.xlarge + MinCount: 0 + MaxCount: 1 + CapacityReservationTarget: + CapacityReservationResourceGroupArn: {{ target_capacity_reservation_arn }} + Networking: + SubnetIds: + - {{ public_subnet_id }} + - Name: pg-odcr-q + ComputeResources: + - Name: pg-odcr-id-cr + InstanceType: m5.xlarge + MinCount: 0 + MaxCount: 1 + Networking: + PlacementGroup: + Name: {{ placement_group }} + CapacityReservationTarget: + CapacityReservationId: {{ pg_capacity_reservation_id }} + - Name: pg-odcr-arn-cr + InstanceType: m5.xlarge + MinCount: 0 + MaxCount: 1 + Networking: + PlacementGroup: + Name: {{ placement_group }} + CapacityReservationTarget: + CapacityReservationResourceGroupArn: {{ pg_capacity_reservation_arn }} + - Name: pg-odcr-arn-fleet-cr + Instances: + - InstanceType: m5.xlarge + MinCount: 0 + MaxCount: 1 + Networking: + PlacementGroup: + Name: {{ placement_group }} + CapacityReservationTarget: + CapacityReservationResourceGroupArn: {{ pg_capacity_reservation_arn }} + Networking: + SubnetIds: + - {{ public_subnet_id }} + diff --git a/tests/integration-tests/tests/capacity_reservations/test_on_demand_capacity_reservation/test_on_demand_capacity_reservation/pcluster.config.yaml b/tests/integration-tests/tests/capacity_reservations/test_on_demand_capacity_reservation/test_on_demand_capacity_reservation/pcluster.config.yaml index 08094c3a41..43838e4883 100644 --- a/tests/integration-tests/tests/capacity_reservations/test_on_demand_capacity_reservation/test_on_demand_capacity_reservation/pcluster.config.yaml +++ b/tests/integration-tests/tests/capacity_reservations/test_on_demand_capacity_reservation/test_on_demand_capacity_reservation/pcluster.config.yaml @@ -12,7 +12,6 @@ Scheduling: - Name: open-odcr-q ComputeResources: - Name: open-odcr-id-cr - InstanceType: m5.2xlarge MinCount: 1 MaxCount: 1 CapacityReservationTarget: