Skip to content

Commit f8cacff

Browse files
authored
[LoginNodes] Improve login node security posture (#6377)
1 parent d4d31c9 commit f8cacff

File tree

17 files changed

+326
-128
lines changed

17 files changed

+326
-128
lines changed

CHANGELOG.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@ CHANGELOG
99
- Add support for custom actions on login nodes.
1010
- Allow DCV connection on login nodes.
1111
- Add support for ap-southeast-3 region.
12+
- Add security groups to login node network load balancer.
13+
- Add `AllowedIps` configuration for login nodes.
1214

1315
**BUG FIXES**
1416
- Fix validator `EfaPlacementGroupValidator` so that it does not suggest to configure a Placement Group when Capacity Blocks are used.
1517
- Fix sporadic cluster creation failures with managed FSx for Lustre.
18+
- Fix issue with login nodes being marked unhealthy when restricting SSH access.
1619
- Fix `retrieve_supported_regions` so that it can get the correct S3 url.
17-
1820
3.10.1
1921
------
2022

cli/src/pcluster/config/cluster_config.py

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -833,13 +833,11 @@ def __init__(self, allowed_ips: str = None, **kwargs):
833833
class LoginNodesSsh(_BaseSsh):
834834
"""Represent the SSH configuration for LoginNodes."""
835835

836-
def __init__(self, allowed_ips=CIDR_ALL_IPS, **kwargs):
836+
def __init__(self, allowed_ips: str = None, **kwargs):
837837
super().__init__(**kwargs)
838-
# This is an internal parameter not yet exposed in the configuration
839-
# By default is initalized to allow SSH from everywhere.
840-
# Will be aligned to the setting defined in the HeadNode to allow restricting SSH access
841-
# from a specific CIDR or prefix-list.
842-
self.allowed_ips = allowed_ips
838+
# If AllowedIPs is not specified for the login node pool, then allowed_ips will default to the same settings
839+
# in the HeadNode to restrict SSH access from a specific CIDR or prefix-list.
840+
self.allowed_ips = Resource.init_param(allowed_ips)
843841

844842

845843
class Dcv(Resource):
@@ -1408,14 +1406,6 @@ def __init__(
14081406
super().__init__(**kwargs)
14091407
self.pools = pools
14101408

1411-
@property
1412-
def has_dcv_enabled(self):
1413-
"""Returns True if there is a pool in the cluster with DCV enabled."""
1414-
for pool in self.pools:
1415-
if pool.has_dcv_enabled:
1416-
return True
1417-
return False
1418-
14191409

14201410
class HeadNode(Resource):
14211411
"""Represent the Head Node resource."""
@@ -2903,12 +2893,10 @@ def __init__(
29032893
pool.ssh.key_name = self.head_node.ssh.key_name
29042894
elif not pool.ssh:
29052895
pool.ssh = LoginNodesSsh(key_name=self.head_node.ssh.key_name)
2906-
# Forces the source allowed IP for the SSH connection to the one defined in the HeadNode
2907-
# This parameter is not yet exposed to customers through the config.
2908-
# We may want to change this once we block access to the HeadNode to regular users.
2909-
# This will only be set at creation time, if the cluster is updated and the HeadNode configuration
2910-
# changes, the cange will not be reflected into the LoginNode SecurityGroup
2911-
pool.ssh.allowed_ips = self.head_node.ssh.allowed_ips
2896+
# If the login node pool allowed IPs is not specified, forces the source allowed IP for the
2897+
# SSH connection to the one defined in the HeadNode
2898+
if not pool.ssh.allowed_ips:
2899+
pool.ssh.allowed_ips = self.head_node.ssh.allowed_ips
29122900

29132901
self.__image_dict = None
29142902
# Cache capacity reservations information together to reduce number of boto3 calls.

cli/src/pcluster/schemas/cluster_schema.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1384,6 +1384,7 @@ class LoginNodesSshSchema(BaseSshSchema):
13841384
"""Represent the Ssh schema of LoginNodes."""
13851385

13861386
key_name = fields.Str(metadata={"update_policy": UpdatePolicy.LOGIN_NODES_STOP})
1387+
allowed_ips = fields.Str(validate=is_cidr_or_prefix_list, metadata={"update_policy": UpdatePolicy.LOGIN_NODES_STOP})
13871388

13881389
@post_load
13891390
def make_resource(self, data, **kwargs):

cli/src/pcluster/templates/cdk_builder_utils.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ def get_login_nodes_security_groups_full(
274274
managed_login_security_group: ec2.CfnSecurityGroup,
275275
pool: LoginNodesPool,
276276
):
277-
"""Return full security groups to be used for the login node, default plus additional ones."""
277+
"""Return security groups to be used for the login nodes and network load balancer, default plus additional ones."""
278278
login_nodes_security_groups = []
279279

280280
# Default security groups, created by us or provided by the user
@@ -290,6 +290,16 @@ def get_login_nodes_security_groups_full(
290290
return login_nodes_security_groups
291291

292292

293+
def get_source_ingress_rule(setting):
294+
"""Return security group ingress property depending on whether the input setting is a prefix list or CIDR ip."""
295+
if setting.startswith("pl"):
296+
return ec2.CfnSecurityGroup.IngressProperty(
297+
ip_protocol="tcp", from_port=22, to_port=22, source_prefix_list_id=setting
298+
)
299+
else:
300+
return ec2.CfnSecurityGroup.IngressProperty(ip_protocol="tcp", from_port=22, to_port=22, cidr_ip=setting)
301+
302+
293303
def add_cluster_iam_resource_prefix(stack_name, config, name: str, iam_type: str):
294304
"""Return a path and Name prefix from the Resource prefix config option."""
295305
full_resource_path = None

cli/src/pcluster/templates/cluster_stack.py

Lines changed: 64 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@
9898
get_log_group_deletion_policy,
9999
get_shared_storage_ids_by_type,
100100
get_slurm_specific_dna_json_for_head_node,
101+
get_source_ingress_rule,
101102
get_user_data_content,
102103
to_comma_separated_string,
103104
)
@@ -193,9 +194,10 @@ def _get_login_security_groups(self):
193194
if isinstance(self.config, SlurmClusterConfig) and self.config.login_nodes
194195
else []
195196
)
196-
197+
# Add the managed login node security groups
197198
if self._login_security_group:
198-
login_security_groups.append(self._login_security_group.ref)
199+
for _, managed_security_group in self._login_security_group.items():
200+
login_security_groups.append(managed_security_group.ref)
199201

200202
return login_security_groups
201203

@@ -467,7 +469,7 @@ def _add_login_nodes_resources(self):
467469
shared_storage_infos=self.shared_storage_infos,
468470
shared_storage_mount_dirs=self.shared_storage_mount_dirs,
469471
shared_storage_attributes=self.shared_storage_attributes,
470-
login_security_group=self._login_security_group,
472+
login_security_groups=self._login_security_group,
471473
head_eni=self._head_eni,
472474
cluster_hosted_zone=self.scheduler_resources.cluster_hosted_zone if self.scheduler_resources else None,
473475
cluster_bucket=self.bucket,
@@ -574,11 +576,8 @@ def _add_head_eni(self):
574576

575577
def _add_security_groups(self):
576578
head_node_security_groups, managed_head_security_group = self._head_security_groups()
577-
(
578-
login_security_groups,
579-
managed_login_security_group,
580-
custom_login_security_groups,
581-
) = self._login_security_groups()
579+
login_security_groups, managed_login_security_groups = self._login_security_groups()
580+
582581
(
583582
compute_security_groups,
584583
managed_compute_security_group,
@@ -590,13 +589,12 @@ def _add_security_groups(self):
590589
custom_compute_security_groups,
591590
head_node_security_groups,
592591
login_security_groups,
593-
custom_login_security_groups,
594592
managed_compute_security_group,
595593
managed_head_security_group,
596-
managed_login_security_group,
594+
managed_login_security_groups,
597595
)
598596

599-
return managed_head_security_group, managed_compute_security_group, managed_login_security_group
597+
return managed_head_security_group, managed_compute_security_group, managed_login_security_groups
600598

601599
def _head_security_groups(self):
602600
managed_head_security_group = None
@@ -609,7 +607,7 @@ def _head_security_groups(self):
609607
return head_node_security_groups, managed_head_security_group
610608

611609
def _login_security_groups(self):
612-
managed_login_security_group = None
610+
managed_login_security_groups = dict()
613611
custom_login_security_groups = set()
614612
managed_login_security_group_required = False
615613
if self._condition_is_slurm() and self.config.login_nodes:
@@ -622,9 +620,11 @@ def _login_security_groups(self):
622620
managed_login_security_group_required = True
623621
login_security_groups = list(custom_login_security_groups)
624622
if managed_login_security_group_required:
625-
managed_login_security_group = self._add_login_nodes_security_group()
626-
login_security_groups.append(managed_login_security_group.ref)
627-
return login_security_groups, managed_login_security_group, custom_login_security_groups
623+
managed_login_security_groups = self._add_login_nodes_security_group()
624+
login_security_groups.extend(
625+
[security_group.ref for security_group in managed_login_security_groups.values()]
626+
)
627+
return login_security_groups, managed_login_security_groups
628628

629629
def _compute_security_groups(self):
630630
managed_compute_security_group = None
@@ -649,20 +649,18 @@ def _add_inbounds_to_managed_security_groups(
649649
custom_compute_security_groups,
650650
head_node_security_groups,
651651
login_security_groups,
652-
custom_login_security_groups,
653652
managed_compute_security_group,
654653
managed_head_security_group,
655-
managed_login_security_group,
654+
managed_login_security_groups,
656655
):
657656
self._add_inbounds_to_managed_head_security_group(
658657
compute_security_groups, login_security_groups, managed_head_security_group
659658
)
660659

661-
self._add_inbounds_to_managed_login_security_group(
660+
self._add_inbounds_to_managed_login_security_groups(
662661
head_node_security_groups,
663662
compute_security_groups,
664-
custom_login_security_groups,
665-
managed_login_security_group,
663+
managed_login_security_groups,
666664
)
667665

668666
self._add_inbounds_to_managed_compute_security_group(
@@ -698,29 +696,28 @@ def _add_inbounds_to_managed_head_security_group(
698696
port=NFS_PORT,
699697
)
700698

701-
def _add_inbounds_to_managed_login_security_group(
699+
def _add_inbounds_to_managed_login_security_groups(
702700
self,
703701
head_node_security_groups,
704702
compute_security_groups,
705-
custom_login_security_groups,
706-
managed_login_security_group,
703+
managed_login_security_groups,
707704
):
708-
if managed_login_security_group:
709-
# Access to login nodes from head node and compute nodes
710-
for index, security_group in enumerate(head_node_security_groups):
711-
self._allow_all_ingress(
712-
f"LoginSecurityGroupHeadNodeIngress{index}", security_group, managed_login_security_group.ref
713-
)
714-
for index, security_group in enumerate(compute_security_groups):
715-
self._allow_all_ingress(
716-
f"LoginSecurityGroupComputeIngress{index}", security_group, managed_login_security_group.ref
717-
)
718-
for index, security_group in enumerate(custom_login_security_groups):
719-
self._allow_all_ingress(
720-
f"LoginSecurityGroupCustomLoginSecurityGroupIngress{index}",
721-
security_group,
722-
managed_login_security_group.ref,
723-
)
705+
if managed_login_security_groups:
706+
for pool_name, managed_security_group in managed_login_security_groups.items():
707+
# Access to login nodes from head node
708+
for index, security_group in enumerate(head_node_security_groups):
709+
self._allow_all_ingress(
710+
f"{pool_name}LoginSecurityGroupHeadNodeIngress{index}",
711+
security_group,
712+
managed_security_group.ref,
713+
)
714+
# Access to login nodes from compute nodes
715+
for index, security_group in enumerate(compute_security_groups):
716+
self._allow_all_ingress(
717+
f"{pool_name}LoginSecurityGroupComputeIngress{index}",
718+
security_group,
719+
managed_security_group.ref,
720+
)
724721

725722
def _add_inbounds_to_managed_compute_security_group(
726723
self,
@@ -878,44 +875,41 @@ def _add_compute_security_group(self):
878875

879876
return compute_security_group
880877

881-
def _get_source_ingress_rule(self, setting):
882-
if setting.startswith("pl"):
883-
return ec2.CfnSecurityGroup.IngressProperty(
884-
ip_protocol="tcp", from_port=22, to_port=22, source_prefix_list_id=setting
885-
)
886-
else:
887-
return ec2.CfnSecurityGroup.IngressProperty(ip_protocol="tcp", from_port=22, to_port=22, cidr_ip=setting)
888-
889878
def _add_login_nodes_security_group(self):
890-
# TODO review this once we allow more pools to be defined in the LoginNodes section
891-
login_nodes_security_group_ingress = [
892-
# SSH access
893-
self._get_source_ingress_rule(self.config.login_nodes.pools[0].ssh.allowed_ips)
894-
]
895-
896-
if self.config.login_nodes.has_dcv_enabled:
897-
login_nodes_security_group_ingress.append(
898-
# DCV access
899-
ec2.CfnSecurityGroup.IngressProperty(
900-
ip_protocol="tcp",
901-
from_port=self.config.login_nodes.pools[0].dcv.port,
902-
to_port=self.config.login_nodes.pools[0].dcv.port,
903-
cidr_ip=self.config.login_nodes.pools[0].dcv.allowed_ips,
879+
"""Return a dictionary mapping each login node pool name to its respective managed security group."""
880+
pool_to_managed_security_group_dict = dict()
881+
882+
for pool in self.config.login_nodes.pools:
883+
# Check if the pool has user-defined security groups
884+
if not pool.networking.security_groups:
885+
security_group_ingress_rules = [
886+
# Add rule for SSH access
887+
get_source_ingress_rule(pool.ssh.allowed_ips)
888+
]
889+
# Add rule for DCV access if enabled
890+
if pool.has_dcv_enabled:
891+
security_group_ingress_rules.append(
892+
ec2.CfnSecurityGroup.IngressProperty(
893+
ip_protocol="tcp",
894+
from_port=pool.dcv.port,
895+
to_port=pool.dcv.port,
896+
cidr_ip=pool.dcv.allowed_ips,
897+
)
898+
)
899+
pool_to_managed_security_group_dict[pool.name] = ec2.CfnSecurityGroup(
900+
self.stack,
901+
f"{pool.name}LoginNodesSecurityGroup",
902+
group_description="Enable access to the login nodes",
903+
vpc_id=self.config.vpc_id,
904+
security_group_ingress=security_group_ingress_rules,
904905
)
905-
)
906906

907-
return ec2.CfnSecurityGroup(
908-
self.stack,
909-
"LoginNodesSecurityGroup",
910-
group_description="Enable access to the login nodes",
911-
vpc_id=self.config.vpc_id,
912-
security_group_ingress=login_nodes_security_group_ingress,
913-
)
907+
return pool_to_managed_security_group_dict
914908

915909
def _add_head_security_group(self):
916910
head_security_group_ingress = [
917911
# SSH access
918-
self._get_source_ingress_rule(self.config.head_node.ssh.allowed_ips)
912+
get_source_ingress_rule(self.config.head_node.ssh.allowed_ips)
919913
]
920914

921915
if self.config.is_dcv_enabled:

0 commit comments

Comments
 (0)