diff --git a/CHANGELOG.md b/CHANGELOG.md index 3bbcfa0cee..78c13bd5de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ CHANGELOG - Support DCV on Amazon Linux 2023. - Upgrade Python runtime used by Lambda functions to python3.12 (from python3.9). - Remove `berkshelf`. All cookbooks are local and do not need `berkshelf` dependency management. +- Introduce default global build-image cleanup IAM versioning roles to prevent build-image CloudFormation stacks from failing to be automatically deleted after images are either successfully built or fail to build. **BUG FIXES** - Fix an issue where Security Group validation failed when a rule contained both IPv4 ranges (IpRanges) and security group references (UserIdGroupPairs). diff --git a/cli/src/pcluster/api/controllers/image_operations_controller.py b/cli/src/pcluster/api/controllers/image_operations_controller.py index e31ed8eef5..5d855bd2e3 100644 --- a/cli/src/pcluster/api/controllers/image_operations_controller.py +++ b/cli/src/pcluster/api/controllers/image_operations_controller.py @@ -10,6 +10,8 @@ import logging import os as os_lib +import yaml + from pcluster.api.controllers.common import ( assert_supported_operation, configure_aws_region, @@ -52,6 +54,7 @@ from pcluster.aws.common import AWSClientError from pcluster.aws.ec2 import Ec2Client from pcluster.constants import SUPPORTED_ARCHITECTURES, SUPPORTED_OSES, Operation +from pcluster.imagebuilder_utils import ensure_default_build_image_stack_cleanup_role from pcluster.models.imagebuilder import ( BadRequestImageBuilderActionError, ConfigValidationError, @@ -59,7 +62,7 @@ NonExistingImageError, ) from pcluster.models.imagebuilder_resources import ImageBuilderStack, NonExistingStackError -from pcluster.utils import get_installed_version, to_utc_datetime +from pcluster.utils import get_installed_version, get_partition, to_utc_datetime from pcluster.validators.common import FailureLevel LOGGER = logging.getLogger(__name__) @@ -105,6 +108,29 @@ def build_image( validation_failure_level = validation_failure_level or ValidationLevel.ERROR dryrun = dryrun or False + raw_cfg_str = build_image_request_content["imageConfiguration"] + cfg_dict = yaml.safe_load(raw_cfg_str) or {} + # If CleanupLambdaRole exists in the config, skip ensure_default_build_image_stack_cleanup_role + has_custom_cleanup_role = cfg_dict.get("Build", {}).get("Iam", {}).get("CleanupLambdaRole") + # If LambdaFunctionsVpcConfig exists in the config, attach the AWS-managed LambdaVPCAccess policy + has_lambda_functions_vpc_config = cfg_dict.get("DeploymentSettings", {}).get("LambdaFunctionsVpcConfig") + + if not has_custom_cleanup_role: + try: + account_id = AWSApi.instance().sts.get_account_id() + ensure_default_build_image_stack_cleanup_role( + account_id, get_partition(), attach_vpc_access_policy=bool(has_lambda_functions_vpc_config) + ) + except AWSClientError as e: + if e.error_code in ("AccessDenied", "AccessDeniedException", "UnauthorizedOperation"): + raise BadRequestException( + "Current principal lacks permissions to create or update the ParallelCluster build-image " + "cleanup IAM role. " + "Either pass `Build/Iam/CleanupLambdaRole` or grant the missing permissions to continue. " + "For detailed instructions, please refer to our public documentation." + ) + raise + build_image_request_content = BuildImageRequestContent.from_dict(build_image_request_content) try: diff --git a/cli/src/pcluster/aws/iam.py b/cli/src/pcluster/aws/iam.py index 54320823f9..5736202bb6 100644 --- a/cli/src/pcluster/aws/iam.py +++ b/cli/src/pcluster/aws/iam.py @@ -32,3 +32,23 @@ def get_role(self, role_name): def get_instance_profile(self, instance_profile_name): """Get instance profile information.""" return self._client.get_instance_profile(InstanceProfileName=instance_profile_name) + + @AWSExceptionHandler.handle_client_exception + def create_role(self, **kwargs): + """Create IAM role.""" + return self._client.create_role(**kwargs) + + @AWSExceptionHandler.handle_client_exception + def attach_role_policy(self, role_name, policy_arn): + """Attach a managed policy to the given role.""" + return self._client.attach_role_policy(RoleName=role_name, PolicyArn=policy_arn) + + @AWSExceptionHandler.handle_client_exception + def put_role_policy(self, role_name, policy_name, policy_document): + """Create or replace the specified inline policy on a role.""" + return self._client.put_role_policy(RoleName=role_name, PolicyName=policy_name, PolicyDocument=policy_document) + + @AWSExceptionHandler.handle_client_exception + def tag_role(self, role_name, tags): + """Add or overwrite one or more tags for the specified role.""" + return self._client.tag_role(RoleName=role_name, Tags=tags) diff --git a/cli/src/pcluster/constants.py b/cli/src/pcluster/constants.py index 8a93f15527..30ae56d5cf 100644 --- a/cli/src/pcluster/constants.py +++ b/cli/src/pcluster/constants.py @@ -335,3 +335,8 @@ class Operation(Enum): PCLUSTER_BUCKET_PROTECTED_FOLDER = "parallelcluster" PCLUSTER_BUCKET_PROTECTED_PREFIX = f"{PCLUSTER_BUCKET_PROTECTED_FOLDER}/" PCLUSTER_BUCKET_REQUIRED_BOOTSTRAP_FEATURES = ["basic", "export-logs"] + +PCLUSTER_BUILD_IMAGE_CLEANUP_ROLE_PREFIX = "PClusterBuildImageCleanupRole" +# Tag key & expected revision (increment when policy widens) +PCLUSTER_BUILD_IMAGE_CLEANUP_ROLE_REVISION = 1 +PCLUSTER_BUILD_IMAGE_CLEANUP_ROLE_BOOTSTRAP_TAG_KEY = "parallelcluster:build-image-cleanup-role-bootstrapped" diff --git a/cli/src/pcluster/imagebuilder_utils.py b/cli/src/pcluster/imagebuilder_utils.py index 2ece6b61fc..ccdd8bb5b2 100644 --- a/cli/src/pcluster/imagebuilder_utils.py +++ b/cli/src/pcluster/imagebuilder_utils.py @@ -8,12 +8,21 @@ # or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES # OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and # limitations under the License. +import json +import logging import os import yaml from pcluster.aws.aws_api import AWSApi -from pcluster.utils import get_url_scheme, yaml_load +from pcluster.aws.common import AWSClientError +from pcluster.constants import ( + IAM_ROLE_PATH, + PCLUSTER_BUILD_IMAGE_CLEANUP_ROLE_BOOTSTRAP_TAG_KEY, + PCLUSTER_BUILD_IMAGE_CLEANUP_ROLE_PREFIX, + PCLUSTER_BUILD_IMAGE_CLEANUP_ROLE_REVISION, +) +from pcluster.utils import generate_string_hash, get_url_scheme, yaml_load ROOT_VOLUME_TYPE = "gp3" PCLUSTER_RESERVED_VOLUME_SIZE = 37 @@ -65,3 +74,175 @@ def _generate_action(action_name, commands): """Generate action in imagebuilder components.""" action = {"name": action_name, "action": "ExecuteBash", "inputs": {"commands": [commands]}} return action + + +def get_cleanup_role_name(account_id: str) -> str: + """Return the role name including a revision number.""" + hashed_account_id = generate_string_hash(account_id) + return ( + f"{PCLUSTER_BUILD_IMAGE_CLEANUP_ROLE_PREFIX}-{hashed_account_id}" + f"-revision-{PCLUSTER_BUILD_IMAGE_CLEANUP_ROLE_REVISION}" + ) + + +def _expected_inline_policy(account_id: str, partition: str): + """Return the inline policy document (JSON-serialised string).""" + return json.dumps( + { + "Version": "2012-10-17", + "Statement": [ + { + "Action": ["iam:DetachRolePolicy", "iam:DeleteRole", "iam:DeleteRolePolicy"], + "Resource": f"arn:{partition}:iam::{account_id}:role/parallelcluster/*", + "Effect": "Allow", + }, + { + "Action": ["iam:DeleteInstanceProfile", "iam:RemoveRoleFromInstanceProfile"], + "Resource": f"arn:{partition}:iam::{account_id}:instance-profile/parallelcluster/*", + "Effect": "Allow", + }, + { + "Action": "imagebuilder:DeleteInfrastructureConfiguration", + "Resource": f"arn:{partition}:imagebuilder:*:{account_id}:infrastructure-configuration/" + f"parallelclusterimage-*", + "Effect": "Allow", + }, + { + "Action": ["imagebuilder:DeleteComponent"], + "Resource": [f"arn:{partition}:imagebuilder:*:{account_id}:component/parallelclusterimage-*/*"], + "Effect": "Allow", + }, + { + "Action": "imagebuilder:DeleteImageRecipe", + "Resource": f"arn:{partition}:imagebuilder:*:{account_id}:image-recipe/parallelclusterimage-*/*", + "Effect": "Allow", + }, + { + "Action": "imagebuilder:DeleteDistributionConfiguration", + "Resource": f"arn:{partition}:imagebuilder:*:{account_id}:distribution-configuration/" + f"parallelclusterimage-*", + "Effect": "Allow", + }, + { + "Action": ["imagebuilder:DeleteImage", "imagebuilder:GetImage", "imagebuilder:CancelImageCreation"], + "Resource": f"arn:{partition}:imagebuilder:*:{account_id}:image/parallelclusterimage-*/*", + "Effect": "Allow", + }, + { + "Action": "cloudformation:DeleteStack", + "Resource": f"arn:{partition}:cloudformation:*:{account_id}:stack/*/*", + "Condition": { + "ForAnyValue:StringLike": {"cloudformation:ResourceTag/parallelcluster:image_id": "*"} + }, + "Effect": "Allow", + }, + # The below two permissions are required for the DeleteStackFunction Lambda to tag the + # created AMI with 'parallelcluster:build_status' and 'parallelcluster:parent_image' tags + {"Action": "ec2:CreateTags", "Resource": f"arn:{partition}:ec2:*::image/*", "Effect": "Allow"}, + {"Action": "tag:TagResources", "Resource": f"arn:{partition}:ec2:*::image/*", "Effect": "Allow"}, + { + "Action": ["lambda:DeleteFunction", "lambda:RemovePermission"], + "Resource": f"arn:{partition}:lambda:*:{account_id}:function:ParallelClusterImage-*", + "Effect": "Allow", + }, + { + "Action": "logs:DeleteLogGroup", + "Resource": f"arn:{partition}:logs:*:{account_id}:log-group:/aws/lambda/ParallelClusterImage-*:*", + "Effect": "Allow", + }, + { + "Action": [ + "SNS:GetTopicAttributes", + "SNS:DeleteTopic", + "SNS:GetSubscriptionAttributes", + "SNS:Unsubscribe", + ], + "Resource": f"arn:{partition}:sns:*:{account_id}:ParallelClusterImage-*", + "Effect": "Allow", + }, + ], + } + ) + + +def ensure_default_build_image_stack_cleanup_role( + account_id: str, partition="aws", attach_vpc_access_policy: bool = False +) -> str: + """ + Ensure the global (account-wide) cleanup role exists and is at the expected revision. + + The function follows a safe order: + 1. If the role does not exist, create it without the bootstrapped tag. + 2. If LambdaFunctionsVpcConfig exists in the config, attach the AWS-managed LambdaVPCAccess policy. + 3. Attach the AWS-managed Lambda basic policy. + 4. Update/write the inline policy (least-privilege cleanup policy). + 5. Only after the inline policy succeeds, set the bootstrapped tag. + + This way, if step 2, 3 or 4 fails (e.g., lack of iam:PutRolePolicy permission), + future invocations will keep retrying. + """ + iam = AWSApi.instance().iam + role_name = get_cleanup_role_name(account_id) + role_arn = f"arn:{partition}:iam::{account_id}:role{IAM_ROLE_PATH}{role_name}" + + # Assume-role trust policy + assume_doc = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": {"Service": "lambda.amazonaws.com"}, + "Action": "sts:AssumeRole", + "Condition": { + "ArnLike": { + "aws:SourceArn": f"arn:{partition}:lambda:*:{account_id}:function:ParallelClusterImage-*" + } + }, + } + ], + } + # Check whether the role already exists + try: + resp = iam.get_role(role_name=role_name) + tags = {t["Key"]: t["Value"] for t in resp["Role"].get("Tags", [])} + already_bootstrapped = tags.get(PCLUSTER_BUILD_IMAGE_CLEANUP_ROLE_BOOTSTRAP_TAG_KEY, "").lower() == "true" + except AWSClientError as e: + if e.error_code == "NoSuchEntity": + logging.info("Creating default build-image stack cleanup role %s because it does not exists.", role_name) + iam.create_role( + RoleName=role_name, + Path=IAM_ROLE_PATH, + AssumeRolePolicyDocument=json.dumps(assume_doc), + Description="AWS ParallelCluster build-image cleanup Lambda execution role", + ) + already_bootstrapped = False + else: + raise + + # Attach AWSLambdaVPCAccessExecutionRole + if attach_vpc_access_policy: + iam.attach_role_policy( + role_name, + f"arn:{partition}:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole", + ) + + if already_bootstrapped: + return role_arn + + # Attach AWSLambdaBasicExecutionRole + cleanup_role_basic_managed_policy = f"arn:{partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" + iam.attach_role_policy(role_name, cleanup_role_basic_managed_policy) + + # Put inline policy + iam.put_role_policy( + role_name=role_name, + policy_name="ParallelClusterCleanupInline", + policy_document=_expected_inline_policy(account_id, partition), + ) + + # Set bootstrapped tag after policy write succeeds + iam.tag_role( + role_name=role_name, + tags=[{"Key": PCLUSTER_BUILD_IMAGE_CLEANUP_ROLE_BOOTSTRAP_TAG_KEY, "Value": "true"}], + ) + return role_arn diff --git a/cli/src/pcluster/templates/imagebuilder_stack.py b/cli/src/pcluster/templates/imagebuilder_stack.py index 12b7b6c1ba..49bfce6aef 100644 --- a/cli/src/pcluster/templates/imagebuilder_stack.py +++ b/cli/src/pcluster/templates/imagebuilder_stack.py @@ -18,7 +18,6 @@ import copy import json import os -from typing import List import yaml from aws_cdk import aws_iam as iam @@ -36,7 +35,6 @@ from pcluster.constants import ( IAM_ROLE_PATH, IMAGEBUILDER_RESOURCE_NAME_PREFIX, - LAMBDA_VPC_ACCESS_MANAGED_POLICY, PCLUSTER_IMAGE_BUILD_LOG_TAG, PCLUSTER_IMAGE_CONFIG_TAG, PCLUSTER_IMAGE_ID_TAG, @@ -251,7 +249,6 @@ def _add_resources(self): # Get ami tags information ami_tags = self._get_image_tags() - lambda_cleanup_policy_statements = [] resource_dependency_list = [] # InstanceRole and InstanceProfile @@ -260,60 +257,37 @@ def _add_resources(self): resource_dependency_list.append( self._add_instance_profile( instance_role=self.custom_instance_role, - cleanup_policy_statements=lambda_cleanup_policy_statements, ) ) elif self.custom_instance_profile: instance_profile_name = self.custom_instance_profile.split("/")[-1] else: - resource_dependency_list.append( - self._add_default_instance_role(lambda_cleanup_policy_statements, build_tags_list) - ) - resource_dependency_list.append( - self._add_instance_profile(cleanup_policy_statements=lambda_cleanup_policy_statements) - ) + resource_dependency_list.append(self._add_default_instance_role(build_tags_list)) + resource_dependency_list.append(self._add_instance_profile()) - self._add_imagebuilder_resources( - build_tags_map, ami_tags, instance_profile_name, lambda_cleanup_policy_statements, resource_dependency_list - ) + self._add_imagebuilder_resources(build_tags_map, ami_tags, instance_profile_name, resource_dependency_list) - lambda_cleanup, permission, lambda_cleanup_execution_role, lambda_log = self._add_lambda_cleanup( - lambda_cleanup_policy_statements, build_tags_list - ) + lambda_cleanup, permission, lambda_log = self._add_lambda_cleanup(build_tags_list) resource_dependency_list.extend([lambda_cleanup, permission, lambda_log]) resource_dependency_list.extend(self._add_sns_topic_and_subscription(lambda_cleanup, build_tags_list)) - if lambda_cleanup_execution_role: - for resource in resource_dependency_list: - resource.add_depends_on(lambda_cleanup_execution_role) - - def _add_imagebuilder_resources( - self, build_tags, ami_tags, instance_profile_name, lambda_cleanup_policy_statements, resource_dependency_list - ): + def _add_imagebuilder_resources(self, build_tags, ami_tags, instance_profile_name, resource_dependency_list): resource_dependency_list.append( - self._add_imagebuilder_infrastructure_configuration( - build_tags, instance_profile_name, lambda_cleanup_policy_statements - ) + self._add_imagebuilder_infrastructure_configuration(build_tags, instance_profile_name) ) - components, components_resources = self._add_imagebuilder_components( - build_tags, lambda_cleanup_policy_statements - ) + components, components_resources = self._add_imagebuilder_components(build_tags) resource_dependency_list.extend(components_resources) - resource_dependency_list.append( - self._add_imagebuilder_image_recipe(build_tags, components, lambda_cleanup_policy_statements) - ) + resource_dependency_list.append(self._add_imagebuilder_image_recipe(build_tags, components)) - resource_dependency_list.append( - self._add_imagebuilder_distribution_configuration(ami_tags, build_tags, lambda_cleanup_policy_statements) - ) + resource_dependency_list.append(self._add_imagebuilder_distribution_configuration(ami_tags, build_tags)) - resource_dependency_list.append(self._add_imagebuilder_image(build_tags, lambda_cleanup_policy_statements)) + resource_dependency_list.append(self._add_imagebuilder_image(build_tags)) - def _add_imagebuilder_image(self, build_tags, lambda_cleanup_policy_statements): + def _add_imagebuilder_image(self, build_tags): # ImageBuilderImage image_resource = imagebuilder.CfnImage( self, @@ -324,22 +298,9 @@ def _add_imagebuilder_image(self, build_tags, lambda_cleanup_policy_statements): distribution_configuration_arn=Fn.ref("DistributionConfiguration"), enhanced_image_metadata_enabled=False, ) - if not self.custom_cleanup_lambda_role: - self._add_resource_delete_policy( - lambda_cleanup_policy_statements, - ["imagebuilder:DeleteImage", "imagebuilder:GetImage", "imagebuilder:CancelImageCreation"], - [ - self.format_arn( - service="imagebuilder", - resource="image", - resource_name="{0}/*".format(self._build_image_recipe_name(to_lower=True)), - ) - ], - ) - return image_resource - def _add_imagebuilder_distribution_configuration(self, ami_tags, build_tags, lambda_cleanup_policy_statements): + def _add_imagebuilder_distribution_configuration(self, ami_tags, build_tags): # ImageBuilderDistributionConfiguration ami_distribution_configuration = { "Name": (self.config.image.name if self.config.image and self.config.image.name else self.image_id) @@ -368,24 +329,9 @@ def _add_imagebuilder_distribution_configuration(self, ami_tags, build_tags, lam tags=build_tags, distributions=distributions, ) - if not self.custom_cleanup_lambda_role: - self._add_resource_delete_policy( - lambda_cleanup_policy_statements, - ["imagebuilder:DeleteDistributionConfiguration"], - [ - self.format_arn( - service="imagebuilder", - resource="distribution-configuration", - resource_name="{0}".format( - self._build_resource_name(IMAGEBUILDER_RESOURCE_NAME_PREFIX, to_lower=True) - ), - ) - ], - ) - return distribution_configuration_resource - def _add_imagebuilder_image_recipe(self, build_tags, components, lambda_cleanup_policy_statements): + def _add_imagebuilder_image_recipe(self, build_tags, components): # ImageBuilderImageRecipe image_recipe_resource = imagebuilder.CfnImageRecipe( self, @@ -402,22 +348,9 @@ def _add_imagebuilder_image_recipe(self, build_tags, components, lambda_cleanup_ ) ], ) - if not self.custom_cleanup_lambda_role: - self._add_resource_delete_policy( - lambda_cleanup_policy_statements, - ["imagebuilder:DeleteImageRecipe"], - [ - self.format_arn( - service="imagebuilder", - resource="image-recipe", - resource_name="{0}/*".format(self._build_image_recipe_name(to_lower=True)), - ) - ], - ) - return image_recipe_resource - def _add_imagebuilder_components(self, build_tags, lambda_cleanup_policy_statements): + def _add_imagebuilder_components(self, build_tags): imagebuilder_resources_dir = os.path.join(imagebuilder_utils.get_resources_directory(), "imagebuilder") # ImageBuilderComponents @@ -438,22 +371,6 @@ def _add_imagebuilder_components(self, build_tags, lambda_cleanup_policy_stateme imagebuilder.CfnImageRecipe.ComponentConfigurationProperty(component_arn=Fn.ref("UpdateOSComponent")) ) components_resources.append(update_os_component_resource) - if not self.custom_cleanup_lambda_role: - self._add_resource_delete_policy( - lambda_cleanup_policy_statements, - ["imagebuilder:DeleteComponent"], - [ - self.format_arn( - service="imagebuilder", - resource="component", - resource_name="{0}/*".format( - self._build_resource_name( - IMAGEBUILDER_RESOURCE_NAME_PREFIX + "-UpdateOS", to_lower=True - ) - ), - ) - ], - ) disable_pcluster_component = ( self.config.dev_settings.disable_pcluster_component @@ -477,20 +394,6 @@ def _add_imagebuilder_components(self, build_tags, lambda_cleanup_policy_stateme ) ) components_resources.append(parallelcluster_component_resource) - if not self.custom_cleanup_lambda_role: - self._add_resource_delete_policy( - lambda_cleanup_policy_statements, - ["imagebuilder:DeleteComponent"], - [ - self.format_arn( - service="imagebuilder", - resource="component", - resource_name="{0}/*".format( - self._build_resource_name(IMAGEBUILDER_RESOURCE_NAME_PREFIX, to_lower=True) - ), - ) - ], - ) tag_component_resource = imagebuilder.CfnComponent( self, @@ -508,23 +411,9 @@ def _add_imagebuilder_components(self, build_tags, lambda_cleanup_policy_stateme ) ) components_resources.append(tag_component_resource) - if not self.custom_cleanup_lambda_role: - self._add_resource_delete_policy( - lambda_cleanup_policy_statements, - ["imagebuilder:DeleteComponent"], - [ - self.format_arn( - service="imagebuilder", - resource="component", - resource_name="{0}/*".format( - self._build_resource_name(IMAGEBUILDER_RESOURCE_NAME_PREFIX + "-Tag", to_lower=True) - ), - ) - ], - ) if self.config.build.components: - self._add_custom_components(components, lambda_cleanup_policy_statements, components_resources) + self._add_custom_components(components, components_resources) disable_validate_and_test_component = ( self.config.dev_settings.disable_validate_and_test @@ -549,26 +438,10 @@ def _add_imagebuilder_components(self, build_tags, lambda_cleanup_policy_stateme ) ) components_resources.append(test_component_resource) - if not self.custom_cleanup_lambda_role: - self._add_resource_delete_policy( - lambda_cleanup_policy_statements, - ["imagebuilder:DeleteComponent"], - [ - self.format_arn( - service="imagebuilder", - resource="component", - resource_name="{0}/*".format( - self._build_resource_name(IMAGEBUILDER_RESOURCE_NAME_PREFIX + "-Test", to_lower=True) - ), - ) - ], - ) return components, components_resources - def _add_imagebuilder_infrastructure_configuration( - self, build_tags, instance_profile_name, lambda_cleanup_policy_statements - ): + def _add_imagebuilder_infrastructure_configuration(self, build_tags, instance_profile_name): # ImageBuilderInfrastructureConfiguration infrastructure_configuration_resource = imagebuilder.CfnInfrastructureConfiguration( self, @@ -590,175 +463,15 @@ def _add_imagebuilder_infrastructure_configuration( http_tokens=get_http_tokens_setting(self.config.build.imds.imds_support) ), ) - if not self.custom_cleanup_lambda_role: - self._add_resource_delete_policy( - lambda_cleanup_policy_statements, - ["imagebuilder:DeleteInfrastructureConfiguration"], - [ - self.format_arn( - service="imagebuilder", - resource="infrastructure-configuration", - resource_name="{0}".format( - self._build_resource_name(IMAGEBUILDER_RESOURCE_NAME_PREFIX, to_lower=True) - ), - ) - ], - ) - return infrastructure_configuration_resource - def _add_lambda_cleanup(self, policy_statements, build_tags): - lambda_cleanup_execution_role = None + def _add_lambda_cleanup(self, build_tags): + # Determine execution_role ARN if self.custom_cleanup_lambda_role: execution_role = self.custom_cleanup_lambda_role else: - # LambdaCleanupPolicies - self._add_resource_delete_policy( - policy_statements, - ["cloudformation:DeleteStack"], - [ - self.format_arn( - service="cloudformation", - resource="stack", - resource_name="{0}/{1}".format(self.image_id, self._stack_unique_id()), - ) - ], - ) - - self._add_resource_delete_policy( - policy_statements, - ["ec2:CreateTags"], - [ - self.format_arn( - service="ec2", - account="", - resource="image", - region=region, - resource_name="*", - ) - for region in self._get_distribution_regions() - ], - ) - - self._add_resource_delete_policy( - policy_statements, - ["tag:TagResources"], - ["*"], - ) - - self._add_resource_delete_policy( - policy_statements, - ["iam:DetachRolePolicy", "iam:DeleteRole", "iam:DeleteRolePolicy"], - [ - self.format_arn( - service="iam", - resource="role", - region="", - resource_name="{0}/{1}".format( - IAM_ROLE_PATH.strip("/"), - self._build_resource_name(IMAGEBUILDER_RESOURCE_NAME_PREFIX + "Cleanup"), - ), - ) - ], - ) - - self._add_resource_delete_policy( - policy_statements, - ["lambda:DeleteFunction", "lambda:RemovePermission"], - [ - self.format_arn( - service="lambda", - resource="function", - sep=":", - resource_name=self._build_resource_name(IMAGEBUILDER_RESOURCE_NAME_PREFIX), - ) - ], - ) - - self._add_resource_delete_policy( - policy_statements, - ["logs:DeleteLogGroup"], - [ - self.format_arn( - service="logs", - resource="log-group", - sep=":", - resource_name="/aws/lambda/{0}:*".format( - self._build_resource_name(IMAGEBUILDER_RESOURCE_NAME_PREFIX) - ), - ) - ], - ) - - self._add_resource_delete_policy( - policy_statements, - ["iam:RemoveRoleFromInstanceProfile"], - [ - self.format_arn( - service="iam", - resource="instance-profile", - region="", - resource_name="{0}/{1}".format( - IAM_ROLE_PATH.strip("/"), - self._build_resource_name(IMAGEBUILDER_RESOURCE_NAME_PREFIX), - ), - ) - ], - ) - - self._add_resource_delete_policy( - policy_statements, - ["iam:DetachRolePolicy", "iam:DeleteRolePolicy"], - [ - self.format_arn( - service="iam", - resource="role", - region="", - resource_name="{0}/{1}".format( - IAM_ROLE_PATH.strip("/"), - self._build_resource_name(IMAGEBUILDER_RESOURCE_NAME_PREFIX), - ), - ) - ], - ) - - self._add_resource_delete_policy( - policy_statements, - ["SNS:GetTopicAttributes", "SNS:DeleteTopic", "SNS:Unsubscribe"], - [ - self.format_arn( - service="sns", - resource="{0}".format(self._build_resource_name(IMAGEBUILDER_RESOURCE_NAME_PREFIX)), - ) - ], - ) - - policy_document = iam.PolicyDocument(statements=policy_statements) - managed_lambda_policy = [ - Fn.sub("arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"), - ] - - if self.config.lambda_functions_vpc_config: - managed_lambda_policy.append(Fn.sub(LAMBDA_VPC_ACCESS_MANAGED_POLICY)) - - # LambdaCleanupExecutionRole - lambda_cleanup_execution_role = iam.CfnRole( - self, - "DeleteStackFunctionExecutionRole", - managed_policy_arns=managed_lambda_policy, - assume_role_policy_document=get_assume_role_policy_document("lambda.amazonaws.com"), - path=IAM_ROLE_PATH, - policies=[ - iam.CfnRole.PolicyProperty( - policy_document=policy_document, - policy_name="LambdaCleanupPolicy", - ), - ], - tags=build_tags, - role_name=self._build_resource_name(IMAGEBUILDER_RESOURCE_NAME_PREFIX + "Cleanup"), - ) - - execution_role = lambda_cleanup_execution_role.attr_arn + role_name = imagebuilder_utils.get_cleanup_role_name(AWSApi.instance().sts.get_account_id()) + execution_role = Fn.sub(f"arn:${{AWS::Partition}}:iam::${{AWS::AccountId}}:role{IAM_ROLE_PATH}{role_name}") # LambdaCleanupEnv lambda_env = awslambda.CfnFunction.EnvironmentProperty(variables={"IMAGE_STACK_ARN": self.stack_id}) @@ -806,7 +519,8 @@ def _add_lambda_cleanup(self, policy_statements, build_tags): ) lambda_cleanup.add_depends_on(lambda_log) - return lambda_cleanup, permission, lambda_cleanup_execution_role, lambda_log + # No stack-local execution role created; return None placeholder + return lambda_cleanup, permission, lambda_log def _add_sns_topic_and_subscription(self, lambda_cleanup, build_tags): # SNSTopic @@ -827,7 +541,7 @@ def _add_sns_topic_and_subscription(self, lambda_cleanup, build_tags): return sns_subscription_resource, sns_topic_resource - def _add_default_instance_role(self, cleanup_policy_statements, build_tags): + def _add_default_instance_role(self, build_tags): """Set default instance role in imagebuilder cfn template.""" managed_policy_arns = [ Fn.sub("arn:${AWS::Partition}:iam::aws:policy/AmazonSSMManagedInstanceCore"), @@ -894,26 +608,9 @@ def _add_default_instance_role(self, cleanup_policy_statements, build_tags): tags=build_tags, role_name=self._build_resource_name(IMAGEBUILDER_RESOURCE_NAME_PREFIX), ) - if not self.custom_cleanup_lambda_role: - self._add_resource_delete_policy( - cleanup_policy_statements, - ["iam:DeleteRole"], - [ - self.format_arn( - service="iam", - region="", - resource="role", - resource_name="{0}/{1}".format( - IAM_ROLE_PATH.strip("/"), - self._build_resource_name(IMAGEBUILDER_RESOURCE_NAME_PREFIX), - ), - ) - ], - ) - return instance_role_resource - def _add_instance_profile(self, cleanup_policy_statements, instance_role=None): + def _add_instance_profile(self, instance_role=None): """Set default instance profile in imagebuilder cfn template.""" instance_profile_resource = iam.CfnInstanceProfile( self, @@ -922,27 +619,9 @@ def _add_instance_profile(self, cleanup_policy_statements, instance_role=None): roles=[instance_role.split("/")[-1] if instance_role else Fn.ref("InstanceRole")], instance_profile_name=self._build_resource_name(IMAGEBUILDER_RESOURCE_NAME_PREFIX), ) - - if not self.custom_cleanup_lambda_role: - self._add_resource_delete_policy( - cleanup_policy_statements, - ["iam:DeleteInstanceProfile"], - [ - self.format_arn( - service="iam", - region="", - resource="instance-profile", - resource_name="{0}/{1}".format( - IAM_ROLE_PATH.strip("/"), - self._build_resource_name(IMAGEBUILDER_RESOURCE_NAME_PREFIX), - ), - ) - ], - ) - return instance_profile_resource - def _add_custom_components(self, components, policy_statements, components_resources): + def _add_custom_components(self, components, components_resources): """Set custom component in imagebuilder cfn template.""" initial_components_len = len(components) arn_components_len = 0 @@ -972,24 +651,6 @@ def _add_custom_components(self, components, policy_statements, components_resou imagebuilder.CfnImageRecipe.ComponentConfigurationProperty(component_arn=Fn.ref(component_id)) ) components_resources.append(custom_component_resource) - if not self.custom_cleanup_lambda_role: - self._add_resource_delete_policy( - policy_statements, - ["imagebuilder:DeleteComponent"], - [ - self.format_arn( - service="imagebuilder", - resource="component", - resource_name="{0}/*".format( - self._build_resource_name( - IMAGEBUILDER_RESOURCE_NAME_PREFIX - + "-Script-{0}".format(str(custom_components_len)), - to_lower=True, - ) - ), - ) - ], - ) def _set_ebs_volume(self): """Set ebs root volume in imagebuilder cfn template.""" @@ -1022,16 +683,6 @@ def _set_ebs_volume(self): return ebs - @staticmethod - def _add_resource_delete_policy(policy_statements, actions: List[str], resources: List[str]): - policy_statements.append( - iam.PolicyStatement( - actions=actions, - effect=iam.Effect.ALLOW, - resources=resources, - ) - ) - def _load_yaml(source_dir, file_name): """Get string data from yaml file.""" diff --git a/cli/src/pcluster/utils.py b/cli/src/pcluster/utils.py index a40dfb4ea3..57049e10ba 100644 --- a/cli/src/pcluster/utils.py +++ b/cli/src/pcluster/utils.py @@ -23,6 +23,7 @@ import urllib import zipfile from concurrent.futures import ThreadPoolExecutor +from hashlib import sha256 from importlib.metadata import version from importlib.resources import files # nosemgrep: python.lang.compatibility.python37.python37-compatibility-importlib2 from io import BytesIO @@ -597,3 +598,8 @@ def get_service_principal(service_name: str, partition: str, region: str = None, def format_arn(partition: str, service: str, region: str, account: str, resource: str) -> str: """Format an ARN string.""" return f"arn:{partition}:{service}:{region}:{account}:{resource}" + + +def generate_string_hash(input_string: str, hash_length: int = 12) -> str: + """Return first input length hex chars of sha256(str).""" + return sha256(input_string.encode()).hexdigest()[:hash_length] diff --git a/cli/tests/pcluster/api/controllers/test_image_operations_controller.py b/cli/tests/pcluster/api/controllers/test_image_operations_controller.py index bf123ace00..47bdfbee06 100644 --- a/cli/tests/pcluster/api/controllers/test_image_operations_controller.py +++ b/cli/tests/pcluster/api/controllers/test_image_operations_controller.py @@ -531,6 +531,11 @@ def _send_test_request(self, client, dryrun=None, suppress_validators=None, roll def test_build_image_success( self, client, mocker, suppress_validators, suppressed_validation_errors, rollback_on_failure ): + mocker.patch("pcluster.aws.sts.StsClient.get_account_id", return_value="fake-id") + mocker.patch( + "pcluster.api.controllers.image_operations_controller.ensure_default_build_image_stack_cleanup_role", + return_value="arn:aws:iam::123456789012:role/cleanup", + ) mocked_call = mocker.patch( "pcluster.models.imagebuilder.ImageBuilder.create", return_value=suppressed_validation_errors, @@ -602,6 +607,11 @@ def test_build_image_success( ], ) def test_dryrun(self, client, mocker, validation_errors, error_code, expected_response): + mocker.patch("pcluster.aws.sts.StsClient.get_account_id", return_value="fake-id") + mocker.patch( + "pcluster.api.controllers.image_operations_controller.ensure_default_build_image_stack_cleanup_role", + return_value="arn:aws:iam::123456789012:role/cleanup", + ) if validation_errors: mocker.patch( "pcluster.models.imagebuilder.ImageBuilder.validate_create_request", side_effect=validation_errors @@ -634,6 +644,11 @@ def test_dryrun(self, client, mocker, validation_errors, error_code, expected_re ], ) def test_that_errors_are_converted(self, client, mocker, error, error_code): + mocker.patch("pcluster.aws.sts.StsClient.get_account_id", return_value="fake-id") + mocker.patch( + "pcluster.api.controllers.image_operations_controller.ensure_default_build_image_stack_cleanup_role", + return_value="arn:aws:iam::123456789012:role/cleanup", + ) mocker.patch("pcluster.models.imagebuilder.ImageBuilder.create", side_effect=error) expected_error = {"message": "test error"} @@ -652,6 +667,11 @@ def test_that_errors_are_converted(self, client, mocker, error, error_code): assert_that(response.get_json()).is_equal_to(expected_error) def test_parse_config_error(self, client, mocker): + mocker.patch("pcluster.aws.sts.StsClient.get_account_id", return_value="fake-id") + mocker.patch( + "pcluster.api.controllers.image_operations_controller.ensure_default_build_image_stack_cleanup_role", + return_value="arn:aws:iam::123456789012:role/cleanup", + ) mocker.patch("pcluster.aws.ec2.Ec2Client.image_exists", return_value=False) mocker.patch("pcluster.aws.cfn.CfnClient.stack_exists", return_value=False) mocker.patch("marshmallow.Schema.load", side_effect=ValidationError(message={"Error": "error"})) diff --git a/cli/tests/pcluster/cli/test_build_image.py b/cli/tests/pcluster/cli/test_build_image.py index 531d086566..2824a29325 100644 --- a/cli/tests/pcluster/cli/test_build_image.py +++ b/cli/tests/pcluster/cli/test_build_image.py @@ -6,13 +6,25 @@ # OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and # limitations under the License. import itertools +import json +from collections import deque +from unittest.mock import PropertyMock import pytest from assertpy import assert_that +from pcluster.api.controllers.image_operations_controller import build_image from pcluster.api.models import BuildImageResponseContent +from pcluster.aws.common import AWSClientError from pcluster.cli.entrypoint import run from pcluster.cli.exceptions import APIOperationException +from pcluster.imagebuilder_utils import ( + PCLUSTER_BUILD_IMAGE_CLEANUP_ROLE_BOOTSTRAP_TAG_KEY, + PCLUSTER_BUILD_IMAGE_CLEANUP_ROLE_REVISION, + _expected_inline_policy, + ensure_default_build_image_stack_cleanup_role, + get_cleanup_role_name, +) class TestBuildImageCommand: @@ -117,6 +129,178 @@ def test_nodejs_wrong_version_error(self, mocker, test_datadir): self.run_build_image_command(test_datadir) assert_that(exc_info.value.data.get("message")).matches("requires Node.js version >=") + def test_get_cleanup_role_name(self): + fake_account = "123456789012" + role = get_cleanup_role_name(fake_account) + assert_that(role).starts_with("PClusterBuildImageCleanupRole-") + assert_that(role).ends_with(f"-revision-{PCLUSTER_BUILD_IMAGE_CLEANUP_ROLE_REVISION}") + + @pytest.mark.parametrize( + "cleanup_role_in_cfg, vpc_cfg_present, expect_call, expect_vpc_flag", + [ + (None, False, True, False), + (None, True, True, True), + ("arn:aws:iam::123456789012:role/AlreadyProvided", False, False, False), + ("arn:aws:iam::123456789012:role/AlreadyProvided", True, False, False), + ], + ) + def test_enable_cleanup_role_call_and_vpc_flag( + self, + mocker, + aws_api_mock, + cleanup_role_in_cfg, + vpc_cfg_present, + expect_call, + expect_vpc_flag, + ): + """Validate following things. + + 1. when (and only when) no custom CleanupLambdaRole is given call ensure_default_build_image_stack_cleanup_role. + 2. attach_vpc_access_policy flag reflects presence of DeploymentSettings/LambdaFunctionsVpcConfig. + """ + ensure_mock = mocker.patch( + "pcluster.api.controllers.image_operations_controller.ensure_default_build_image_stack_cleanup_role", + return_value="arn:aws:iam::123456789012:role/cleanup", + ) + + cfg_lines = [ + "Build:", + " InstanceType: fake-instance", + " ParentImage: ami-0123456789abcdef0", + ] + if cleanup_role_in_cfg: + cfg_lines += [ + " Iam:", + f" CleanupLambdaRole: {cleanup_role_in_cfg}", + ] + + if vpc_cfg_present: + cfg_lines += [ + "DeploymentSettings:", + " LambdaFunctionsVpcConfig:", + " SubnetIds: subnet-12345678", + " SecurityGroupIds: sg-xxxxxx", + ] + + cfg = "\n".join(cfg_lines) + "\n" + + aws_api_mock.sts.get_account_id.return_value = "fake-account" + + mocker.patch("pcluster.models.imagebuilder.ImageBuilder.create", return_value=[], autospec=True) + mocker.patch( + "pcluster.models.imagebuilder.ImageBuilder.stack", + new_callable=PropertyMock, + return_value=mocker.MagicMock( + pcluster_image_id="fake-image-id", + status="CREATE_IN_PROGRESS", + id="arn:stack/fake", + version="fake-pcluster-version", + ), + ) + + build_image( + build_image_request_content={"imageConfiguration": cfg, "imageId": "fake-image-id"}, + region="us-east-1", + ) + + if expect_call: + ensure_mock.assert_called_once() + _, kwargs = ensure_mock.call_args + assert kwargs["attach_vpc_access_policy"] is expect_vpc_flag + else: + ensure_mock.assert_not_called() + + @pytest.mark.parametrize( + "vpc_cfg_present", + [False, True], + ) + def test_ensure_default_build_image_stack_cleanup_role_bootstrap_flow(self, aws_api_mock, vpc_cfg_present): + """ + If the cleanup IAM role exist and have an old revision. + + The IAM API call execution order must be: + 1. If vpc_cfg_present, attach the AWS-managed LambdaVPCAccess policy + 2. Attach the AWS-managed Lambda basic policy. + 3. Update/write the inline policy. + 4. Only after the inline policy succeeds, set or bump the revision tag. + """ + call_seq = deque() + + def record(name): + return lambda *a, **k: call_seq.append(name) + + resp_outdated = {"Role": {"RoleName": "dummy", "Tags": []}} + aws_api_mock.iam.get_role.return_value = resp_outdated + aws_api_mock.iam.attach_role_policy.side_effect = record("attach") + aws_api_mock.iam.put_role_policy.side_effect = record("put") + aws_api_mock.iam.tag_role.side_effect = record("tag") + + ensure_default_build_image_stack_cleanup_role("fake-account", "aws", vpc_cfg_present) + assert list(call_seq) == ["attach", "attach", "put", "tag"] if vpc_cfg_present else ["attach", "put", "tag"] + + def test_ensure_default_build_image_stack_cleanup_role_skip_when_already_bootstrapped( + self, + mocker, + aws_api_mock, + ): + current_resp = { + "Role": { + "RoleName": "dummy", + "Tags": [ + { + "Key": PCLUSTER_BUILD_IMAGE_CLEANUP_ROLE_BOOTSTRAP_TAG_KEY, + "Value": "true", + } + ], + } + } + + aws_api_mock.iam.get_role.return_value = current_resp + + ensure_default_build_image_stack_cleanup_role("fake-account-id", "fake-partition") + aws_api_mock.iam.get_role.assert_called() + aws_api_mock.iam.put_role_policy.assert_not_called() + aws_api_mock.iam.tag_role.assert_not_called() + + def test_ensure_default_build_image_stack_cleanup_role_permission_denied(self, aws_api_mock): + """put_role_policy AccessDenied → It should throw an error and the cleanup IAM role should not be tagged.""" + resp_outdated = {"Role": {"RoleName": "dummy", "Tags": []}} + aws_api_mock.iam.get_role.return_value = resp_outdated + + aws_api_mock.iam.put_role_policy.side_effect = AWSClientError( + function_name="put_role_policy", + message="Access denied", + error_code="AccessDenied", + ) + + with pytest.raises(AWSClientError) as exc: + ensure_default_build_image_stack_cleanup_role("fake-account", "aws") + assert_that(exc.value.error_code).is_equal_to("AccessDenied") + # tag_role should not be called + aws_api_mock.iam.tag_role.assert_not_called() + + @pytest.mark.parametrize( + "account_id, partition", + [ + ("123456789012", "aws"), + ("000000000000", "aws-us-gov"), + ], + ) + def test_expected_inline_policy_dynamic_fields(self, account_id, partition): + raw = _expected_inline_policy(account_id, partition) + policy = json.loads(raw) + assert policy["Version"] == "2012-10-17" + assert len(policy["Statement"]) == 13 + for statement in policy["Statement"]: + resources = statement["Resource"] + resources = resources if isinstance(resources, list) else [resources] + for res in resources: + if res == "*": + continue + assert f"arn:{partition}" in res + if not res == f"arn:{partition}:ec2:*::image/*": + assert f":{account_id}:" in res + def _build_args(self, args): args = [[k, v] if v is not None else [k] for k, v in args.items()] return list(itertools.chain(*args)) diff --git a/cli/tests/pcluster/templates/test_imagebuilder_stack.py b/cli/tests/pcluster/templates/test_imagebuilder_stack.py index 79cd975e8d..721d03d23c 100644 --- a/cli/tests/pcluster/templates/test_imagebuilder_stack.py +++ b/cli/tests/pcluster/templates/test_imagebuilder_stack.py @@ -742,7 +742,6 @@ def _test_resources(generated_resources, expected_resources): }, { "Type": "AWS::IAM::Role", - "DependsOn": ["DeleteStackFunctionExecutionRole"], "Properties": { "RoleName": { "Fn::Join": [ @@ -808,7 +807,6 @@ def _test_resources(generated_resources, expected_resources): }, { "Type": "AWS::IAM::InstanceProfile", - "DependsOn": ["DeleteStackFunctionExecutionRole"], "Properties": { "Roles": [{"Ref": "InstanceRole"}], "Path": "/parallelcluster/", @@ -850,7 +848,6 @@ def _test_resources(generated_resources, expected_resources): }, { "Type": "AWS::IAM::Role", - "DependsOn": ["DeleteStackFunctionExecutionRole"], "Properties": { "RoleName": { "Fn::Join": [ @@ -917,7 +914,6 @@ def _test_resources(generated_resources, expected_resources): }, { "Type": "AWS::IAM::InstanceProfile", - "DependsOn": ["DeleteStackFunctionExecutionRole"], "Properties": { "Roles": [{"Ref": "InstanceRole"}], "Path": "/parallelcluster/", @@ -960,7 +956,6 @@ def _test_resources(generated_resources, expected_resources): None, { "Type": "AWS::IAM::InstanceProfile", - "DependsOn": ["DeleteStackFunctionExecutionRole"], "Properties": { "Roles": ["test-InstanceRole"], "Path": "/parallelcluster/", @@ -1037,968 +1032,6 @@ def test_imagebuilder_instance_role( ).is_equal_to(expected_instance_profile_in_infrastructure_configuration) -@pytest.mark.parametrize( - "resource, response, expected_execution_role, expected_execution_role_in_lambda_function", - [ - ( - { - "imagebuilder": { - "build": { - "parent_image": "ami-0185634c5a8a37250", - "instance_type": "c5.xlarge", - "components": [ - {"type": "script", "value": "s3://test/post_install.sh"}, - {"type": "script", "value": "s3://test/post_install2.sh"}, - ], - "update_os_packages": {"enabled": True}, - }, - } - }, - { - "Architecture": "x86_64", - "BlockDeviceMappings": [ - { - "DeviceName": "/dev/xvda", - "Ebs": { - "VolumeSize": 50, - }, - } - ], - }, - { - "Type": "AWS::IAM::Role", - "Properties": { - "RoleName": { - "Fn::Join": [ - "", - [ - "ParallelClusterImageCleanup-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - "AssumeRolePolicyDocument": { - "Statement": [ - { - "Action": "sts:AssumeRole", - "Effect": "Allow", - "Principal": {"Service": "lambda.amazonaws.com"}, - } - ], - "Version": "2012-10-17", - }, - "ManagedPolicyArns": [ - {"Fn::Sub": "arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"}, - ], - "Path": "/parallelcluster/", - "Policies": [ - { - "PolicyDocument": { - "Statement": [ - { - "Action": "iam:DeleteRole", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":iam::", - {"Ref": "AWS::AccountId"}, - ":role/parallelcluster/ParallelClusterImage-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - }, - { - "Action": "iam:DeleteInstanceProfile", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":iam::", - {"Ref": "AWS::AccountId"}, - ":instance-profile/parallelcluster/ParallelClusterImage-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - }, - { - "Action": "imagebuilder:DeleteInfrastructureConfiguration", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":imagebuilder:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":infrastructure-configuration/parallelclusterimage-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - }, - { - "Action": "imagebuilder:DeleteComponent", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":imagebuilder:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":component/parallelclusterimage-updateos-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - "/*", - ], - ] - }, - }, - { - "Action": "imagebuilder:DeleteComponent", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":imagebuilder:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":component/parallelclusterimage-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - "/*", - ], - ] - }, - }, - { - "Action": "imagebuilder:DeleteComponent", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":imagebuilder:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":component/parallelclusterimage-tag-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - "/*", - ], - ] - }, - }, - { - "Action": "imagebuilder:DeleteComponent", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":imagebuilder:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":component/parallelclusterimage-script-0-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - "/*", - ], - ] - }, - }, - { - "Action": "imagebuilder:DeleteComponent", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":imagebuilder:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":component/parallelclusterimage-script-1-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - "/*", - ], - ] - }, - }, - { - "Action": "imagebuilder:DeleteImageRecipe", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":imagebuilder:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":image-recipe/parallelclusterimage-my-image/*", - ], - ] - }, - }, - { - "Action": "imagebuilder:DeleteDistributionConfiguration", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":imagebuilder:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":distribution-configuration/parallelclusterimage-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - }, - { - "Action": [ - "imagebuilder:DeleteImage", - "imagebuilder:GetImage", - "imagebuilder:CancelImageCreation", - ], - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":imagebuilder:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":image/parallelclusterimage-my-image/*", - ], - ] - }, - }, - { - "Action": "cloudformation:DeleteStack", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":cloudformation:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":stack/My-Image/", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - }, - { - "Action": "ec2:CreateTags", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":ec2:", - {"Ref": "AWS::Region"}, - "::image/*", - ], - ] - }, - }, - { - "Action": "tag:TagResources", - "Effect": "Allow", - "Resource": "*", - }, - { - "Action": ["iam:DetachRolePolicy", "iam:DeleteRole", "iam:DeleteRolePolicy"], - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":iam::", - {"Ref": "AWS::AccountId"}, - ":role/parallelcluster/ParallelClusterImageCleanup-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - }, - { - "Action": ["lambda:DeleteFunction", "lambda:RemovePermission"], - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":lambda:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":function:ParallelClusterImage-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - }, - { - "Action": "logs:DeleteLogGroup", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":logs:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":log-group:/aws/lambda/ParallelClusterImage-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ":*", - ], - ] - }, - }, - { - "Action": "iam:RemoveRoleFromInstanceProfile", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":iam::", - {"Ref": "AWS::AccountId"}, - ":instance-profile/parallelcluster/ParallelClusterImage-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - }, - { - "Action": ["iam:DetachRolePolicy", "iam:DeleteRolePolicy"], - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":iam::", - {"Ref": "AWS::AccountId"}, - ":role/parallelcluster/ParallelClusterImage-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - }, - { - "Action": [ - "SNS:GetTopicAttributes", - "SNS:DeleteTopic", - "SNS:Unsubscribe", - ], - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":sns:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":ParallelClusterImage-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - }, - ], - "Version": "2012-10-17", - }, - "PolicyName": "LambdaCleanupPolicy", - } - ], - "Tags": [ - { - "Key": "parallelcluster:image_id", - "Value": "My-Image", - }, - { - "Key": "parallelcluster:image_name", - "Value": "My-Image", - }, - ], - }, - }, - {"Fn::GetAtt": ["DeleteStackFunctionExecutionRole", "Arn"]}, - ), - ( - { - "imagebuilder": { - "build": { - "parent_image": "ami-0185634c5a8a37250", - "instance_type": "c5.xlarge", - "components": [ - {"type": "script", "value": "s3://test/post_install.sh"}, - {"type": "script", "value": "s3://test/post_install2.sh"}, - ], - "update_os_packages": {"enabled": True}, - }, - "dev_settings": { - "disable_validate_and_test": False, - }, - } - }, - { - "Architecture": "x86_64", - "BlockDeviceMappings": [ - { - "DeviceName": "/dev/xvda", - "Ebs": { - "VolumeSize": 50, - }, - } - ], - }, - { - "Type": "AWS::IAM::Role", - "Properties": { - "RoleName": { - "Fn::Join": [ - "", - [ - "ParallelClusterImageCleanup-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - "AssumeRolePolicyDocument": { - "Statement": [ - { - "Action": "sts:AssumeRole", - "Effect": "Allow", - "Principal": {"Service": "lambda.amazonaws.com"}, - } - ], - "Version": "2012-10-17", - }, - "ManagedPolicyArns": [ - {"Fn::Sub": "arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"}, - ], - "Path": "/parallelcluster/", - "Policies": [ - { - "PolicyDocument": { - "Statement": [ - { - "Action": "iam:DeleteRole", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":iam::", - {"Ref": "AWS::AccountId"}, - ":role/parallelcluster/ParallelClusterImage-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - }, - { - "Action": "iam:DeleteInstanceProfile", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":iam::", - {"Ref": "AWS::AccountId"}, - ":instance-profile/parallelcluster/ParallelClusterImage-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - }, - { - "Action": "imagebuilder:DeleteInfrastructureConfiguration", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":imagebuilder:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":infrastructure-configuration/parallelclusterimage-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - }, - { - "Action": "imagebuilder:DeleteComponent", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":imagebuilder:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":component/parallelclusterimage-updateos-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - "/*", - ], - ] - }, - }, - { - "Action": "imagebuilder:DeleteComponent", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":imagebuilder:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":component/parallelclusterimage-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - "/*", - ], - ] - }, - }, - { - "Action": "imagebuilder:DeleteComponent", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":imagebuilder:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":component/parallelclusterimage-tag-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - "/*", - ], - ] - }, - }, - { - "Action": "imagebuilder:DeleteComponent", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":imagebuilder:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":component/parallelclusterimage-script-0-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - "/*", - ], - ] - }, - }, - { - "Action": "imagebuilder:DeleteComponent", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":imagebuilder:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":component/parallelclusterimage-script-1-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - "/*", - ], - ] - }, - }, - { - "Action": "imagebuilder:DeleteComponent", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":imagebuilder:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":component/parallelclusterimage-test-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - "/*", - ], - ] - }, - }, - { - "Action": "imagebuilder:DeleteImageRecipe", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":imagebuilder:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":image-recipe/parallelclusterimage-my-image/*", - ], - ] - }, - }, - { - "Action": "imagebuilder:DeleteDistributionConfiguration", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":imagebuilder:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":distribution-configuration/parallelclusterimage-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - }, - { - "Action": [ - "imagebuilder:DeleteImage", - "imagebuilder:GetImage", - "imagebuilder:CancelImageCreation", - ], - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":imagebuilder:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":image/parallelclusterimage-my-image/*", - ], - ] - }, - }, - { - "Action": "cloudformation:DeleteStack", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":cloudformation:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":stack/My-Image/", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - }, - { - "Action": "ec2:CreateTags", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":ec2:", - {"Ref": "AWS::Region"}, - "::image/*", - ], - ] - }, - }, - { - "Action": "tag:TagResources", - "Effect": "Allow", - "Resource": "*", - }, - { - "Action": ["iam:DetachRolePolicy", "iam:DeleteRole", "iam:DeleteRolePolicy"], - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":iam::", - {"Ref": "AWS::AccountId"}, - ":role/parallelcluster/ParallelClusterImageCleanup-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - }, - { - "Action": ["lambda:DeleteFunction", "lambda:RemovePermission"], - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":lambda:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":function:ParallelClusterImage-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - }, - { - "Action": "logs:DeleteLogGroup", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":logs:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":log-group:/aws/lambda/ParallelClusterImage-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ":*", - ], - ] - }, - }, - { - "Action": "iam:RemoveRoleFromInstanceProfile", - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":iam::", - {"Ref": "AWS::AccountId"}, - ":instance-profile/parallelcluster/ParallelClusterImage-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - }, - { - "Action": ["iam:DetachRolePolicy", "iam:DeleteRolePolicy"], - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":iam::", - {"Ref": "AWS::AccountId"}, - ":role/parallelcluster/ParallelClusterImage-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - }, - { - "Action": [ - "SNS:GetTopicAttributes", - "SNS:DeleteTopic", - "SNS:Unsubscribe", - ], - "Effect": "Allow", - "Resource": { - "Fn::Join": [ - "", - [ - "arn:", - {"Ref": "AWS::Partition"}, - ":sns:", - {"Ref": "AWS::Region"}, - ":", - {"Ref": "AWS::AccountId"}, - ":ParallelClusterImage-", - {"Fn::Select": [2, {"Fn::Split": ["/", {"Ref": "AWS::StackId"}]}]}, - ], - ] - }, - }, - ], - "Version": "2012-10-17", - }, - "PolicyName": "LambdaCleanupPolicy", - } - ], - "Tags": [ - { - "Key": "parallelcluster:image_id", - "Value": "My-Image", - }, - { - "Key": "parallelcluster:image_name", - "Value": "My-Image", - }, - ], - }, - }, - {"Fn::GetAtt": ["DeleteStackFunctionExecutionRole", "Arn"]}, - ), - ( - { - "imagebuilder": { - "build": { - "parent_image": "ami-0185634c5a8a37250", - "instance_type": "c5.xlarge", - "iam": { - "cleanup_lambda_role": "arn:aws:iam::346106133209:role/custom_lambda_cleanup_role", - }, - }, - } - }, - { - "Architecture": "x86_64", - "BlockDeviceMappings": [ - { - "DeviceName": "/dev/xvda", - "Ebs": { - "VolumeSize": 50, - }, - } - ], - }, - None, - "arn:aws:iam::346106133209:role/custom_lambda_cleanup_role", - ), - ], -) -def test_imagebuilder_lambda_execution_role( - mocker, - resource, - response, - expected_execution_role, - expected_execution_role_in_lambda_function, -): - mock_aws_api(mocker) - mocker.patch("pcluster.imagebuilder_utils.get_ami_id", return_value="ami-0185634c5a8a37250") - mocker.patch( - "pcluster.aws.ec2.Ec2Client.describe_image", - return_value=ImageInfo(response), - ) - # mock bucket initialization parameters - mock_bucket(mocker) - - imagebuild = imagebuilder_factory(resource).get("imagebuilder") - generated_template = CDKTemplateBuilder().build_imagebuilder_template( - imagebuild, "My-Image", dummy_imagebuilder_bucket() - ) - assert_that(generated_template.get("Resources").get("DeleteStackFunctionExecutionRole")).is_equal_to( - expected_execution_role - ) - assert_that( - generated_template.get("Resources").get("DeleteStackFunction").get("Properties").get("Role") - ).is_equal_to(expected_execution_role_in_lambda_function) - - @pytest.mark.parametrize( "resource, response, expected_components", [ diff --git a/cli/tests/pcluster/utils.py b/cli/tests/pcluster/utils.py index 7ee5d1ab47..8e31c445ce 100644 --- a/cli/tests/pcluster/utils.py +++ b/cli/tests/pcluster/utils.py @@ -113,14 +113,20 @@ def assert_lambdas_have_expected_vpc_config_and_managed_policy(generated_templat resources = generated_template.get("Resources") for lambda_function in _get_lambda_functions(resources): - role = resources.get(_get_role_name(lambda_function)) + role_name = _get_role_name(lambda_function) + if isinstance(role_name, str) and role_name.__contains__("arn"): + role = None + else: + role = resources.get(_get_role_name(lambda_function)) if expected_vpc_config: assert_that(_get_vpc_config(lambda_function)).is_equal_to(expected_vpc_config) - assert_that(_get_managed_policy_arns(role)).contains(LAMBDA_VPC_ACCESS_MANAGED_POLICY) + if role: + assert_that(_get_managed_policy_arns(role)).contains(LAMBDA_VPC_ACCESS_MANAGED_POLICY) else: assert_that(_get_vpc_config(lambda_function)).is_none() - assert_that(_get_managed_policy_arns(role)).does_not_contain(LAMBDA_VPC_ACCESS_MANAGED_POLICY) + if role: + assert_that(_get_managed_policy_arns(role)).does_not_contain(LAMBDA_VPC_ACCESS_MANAGED_POLICY) def _get_vpc_config(lambda_function): @@ -128,7 +134,12 @@ def _get_vpc_config(lambda_function): def _get_role_name(lambda_function): - return lambda_function.get("Properties").get("Role").get("Fn::GetAtt")[0] + role_prop = lambda_function.get("Properties").get("Role") + + if "Fn::GetAtt" in role_prop: + return role_prop.get("Fn::GetAtt")[0] + + return role_prop.get("Fn::Sub") def _get_lambda_functions(resources): diff --git a/tests/integration-tests/tests/common/assertions.py b/tests/integration-tests/tests/common/assertions.py index 9aa23d9d67..7775b21a1d 100644 --- a/tests/integration-tests/tests/common/assertions.py +++ b/tests/integration-tests/tests/common/assertions.py @@ -13,6 +13,7 @@ from typing import List, Union import boto3 +import pytest from assertpy import assert_that, soft_assertions from constants import NodeType from remote_command_executor import RemoteCommandExecutor @@ -390,3 +391,34 @@ def assert_instance_config_version_on_ddb( f"Verified that all {n_nodes} nodes ({node_type}) stored the expected config version on DDB: " f"{expected_cluster_config_version}" ) + + +def _assert_build_image_stack_deleted(stack_name, region, timeout_seconds=600, poll_interval=30): + """ + Poll the CloudFormation stack status to ensure it is deleted successfully. + + - Stack does not exist or StackStatus==DELETE_COMPLETE ⇒ return immediately + - StackStatus==DELETE_FAILED ⇒ assert failure immediately + - Timeout if not deleted ⇒ assert failure + """ + cfn = boto3.client("cloudformation", region_name=region) + deadline = time.time() + timeout_seconds + last_status = None + + while time.time() < deadline: + try: + last_status = cfn.describe_stacks(StackName=stack_name)["Stacks"][0]["StackStatus"] + except cfn.exceptions.ClientError as e: + if "does not exist" in str(e): + return + raise + + if last_status == "DELETE_COMPLETE": + return + + if last_status == "DELETE_FAILED": + pytest.fail(f"Image stack {stack_name} entered DELETE_FAILED") + + time.sleep(poll_interval) + + pytest.fail(f"Timed-out waiting for stack {stack_name} deletion (last status: {last_status})") diff --git a/tests/integration-tests/tests/createami/test_createami.py b/tests/integration-tests/tests/createami/test_createami.py index 0b985514f5..f1d2dfa5a3 100644 --- a/tests/integration-tests/tests/createami/test_createami.py +++ b/tests/integration-tests/tests/createami/test_createami.py @@ -30,6 +30,7 @@ from utils import generate_stack_name, get_arn_partition, get_gpu_count from tests.common.assertions import ( + _assert_build_image_stack_deleted, assert_head_node_is_running, assert_instance_has_desired_imds_v2_setting, assert_instance_has_desired_tags, @@ -114,6 +115,7 @@ def test_build_image( In the cluster config there is DisableValidateAndTest:False to enable kitchen tests in the validate phase. The created AMI is also used for a cluster. Also check that the build instance has the desired ImdsSupport setting (v2.0, so IMDSv2 is required). + Also check that the build-image stack can be fully self deleted. """ image_id = generate_stack_name("integ-tests-build-image", request.config.getoption("stackname_suffix")) @@ -184,6 +186,7 @@ def test_build_image( _test_cluster_creation( image.ec2_image_id, pcluster_config_reader, region, clusters_factory, scheduler_commands_factory ) + _assert_build_image_stack_deleted(image.image_id, region, 600, 30) def _test_cluster_creation(image_id, pcluster_config_reader, region, clusters_factory, scheduler_commands_factory):