Skip to content

Commit 941bd2a

Browse files
committed
[Validators] Add validator to prevent the use of Placement Groups with Capacity Blocks.
Signed-off-by: Giacomo Marciani <mgiacomo@amazon.com>
1 parent 5b5cdd6 commit 941bd2a

File tree

5 files changed

+91
-2
lines changed

5 files changed

+91
-2
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ CHANGELOG
88
- Add new configuration section `Scheduling/SlurmSettings/ExternalSlurmdbd` to connect the cluster to an external Slurmdbd.
99
- Add support for Amazon Linux 2023.
1010
- Add support for `price-capacity-optimized` as an `AllocationStrategy`.
11+
- Add validator to prevent the use of Placement Groups with Capacity Blocks.
1112

1213
**BUG FIXES**
1314
- Fix DRA configuration to make `AutoExportPolicy` and `AutoImportPolicy` optional.

cli/src/pcluster/config/cluster_config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@
146146
InstanceTypeValidator,
147147
KeyPairValidator,
148148
PlacementGroupCapacityReservationValidator,
149+
PlacementGroupCapacityTypeValidator,
149150
PlacementGroupNamingValidator,
150151
)
151152
from pcluster.validators.efs_validators import EfsMountOptionsValidator
@@ -3027,6 +3028,11 @@ def _register_validators(self, context: ValidatorContext = None): # noqa: C901
30273028
os=self.image.os,
30283029
architecture=self.head_node.architecture,
30293030
)
3031+
self._register_validator(
3032+
PlacementGroupCapacityTypeValidator,
3033+
capacity_type=queue.capacity_type,
3034+
placement_group_enabled=queue.is_placement_group_enabled_for_compute_resource(compute_resource),
3035+
)
30303036
# The validation below has to be in cluster config class instead of queue class
30313037
# to make sure the subnet APIs are cached by previous validations.
30323038
cr_target = compute_resource.capacity_reservation_target or queue.capacity_reservation_target

cli/src/pcluster/validators/ec2_validators.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,20 @@ def _validate_with_subnets(
585585
)
586586

587587

588+
class PlacementGroupCapacityTypeValidator(Validator):
589+
"""Validate the placement group is compatible with the capacity type."""
590+
591+
def _validate(self, capacity_type: str, placement_group_enabled: bool):
592+
if capacity_type == CapacityType.CAPACITY_BLOCK and placement_group_enabled:
593+
self._add_failure(
594+
"When using a capacity block reservation, a placement group constraint should not be set "
595+
"as insufficient capacity errors may occur due to placement constraints outside of the "
596+
"reservation even if the capacity reservation has remaining capacity. "
597+
"Please remove the placement group for the compute resource.",
598+
FailureLevel.WARNING,
599+
)
600+
601+
588602
class PlacementGroupCapacityReservationValidator(Validator):
589603
"""Validate the placement group is compatible with the capacity reservation target."""
590604

cli/tests/pcluster/config/test_cluster_config.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
SlurmSettings,
3838
Tag,
3939
)
40+
from pcluster.validators.ec2_validators import PlacementGroupCapacityTypeValidator
4041
from tests.pcluster.aws.dummy_aws_api import mock_aws_api
4142

4243
mock_compute_resources = [
@@ -381,7 +382,14 @@ def test_registration_of_validators(self, memory_scheduling_enabled, mocker):
381382
),
382383
)
383384
cluster_config._register_validators()
384-
assert_that(cluster_config._validators).is_not_empty()
385+
actual_validators = cluster_config._validators
386+
assert_that(actual_validators).is_not_empty()
387+
assert_that(actual_validators).contains(
388+
(
389+
PlacementGroupCapacityTypeValidator().__class__,
390+
{"capacity_type": CapacityType.ONDEMAND, "placement_group_enabled": False},
391+
)
392+
)
385393

386394
def test_instances_in_slurm_queue(self):
387395
queue = SlurmQueue(

cli/tests/pcluster/validators/test_ec2_validators.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from pcluster.aws.common import AWSClientError
2323
from pcluster.config.cluster_config import CapacityReservationTarget, PlacementGroup
2424
from pcluster.config.common import CapacityType
25+
from pcluster.validators.common import FailureLevel
2526
from pcluster.validators.ec2_validators import (
2627
AmiOsCompatibleValidator,
2728
CapacityReservationResourceGroupValidator,
@@ -36,10 +37,11 @@
3637
InstanceTypeValidator,
3738
KeyPairValidator,
3839
PlacementGroupCapacityReservationValidator,
40+
PlacementGroupCapacityTypeValidator,
3941
PlacementGroupNamingValidator,
4042
)
4143
from tests.pcluster.aws.dummy_aws_api import mock_aws_api
42-
from tests.pcluster.validators.utils import assert_failure_messages
44+
from tests.pcluster.validators.utils import assert_failure_level, assert_failure_messages
4345

4446

4547
@pytest.mark.parametrize(
@@ -1380,6 +1382,64 @@ def test_placement_group_capacity_reservation_validator(
13801382
assert_failure_messages(actual_failure, expected_message)
13811383

13821384

1385+
@pytest.mark.parametrize(
1386+
"capacity_type, placement_group_enabled, expected_failure_level, expected_message",
1387+
[
1388+
(
1389+
CapacityType.ONDEMAND,
1390+
True,
1391+
None,
1392+
None,
1393+
),
1394+
(
1395+
CapacityType.ONDEMAND,
1396+
False,
1397+
None,
1398+
None,
1399+
),
1400+
(
1401+
CapacityType.SPOT,
1402+
True,
1403+
None,
1404+
None,
1405+
),
1406+
(
1407+
CapacityType.SPOT,
1408+
False,
1409+
None,
1410+
None,
1411+
),
1412+
(
1413+
CapacityType.CAPACITY_BLOCK,
1414+
True,
1415+
FailureLevel.WARNING,
1416+
"When using a capacity block reservation, a placement group constraint should not be set "
1417+
"as insufficient capacity errors may occur due to placement constraints outside of the "
1418+
"reservation even if the capacity reservation has remaining capacity. "
1419+
"Please remove the placement group for the compute resource.",
1420+
),
1421+
(
1422+
CapacityType.CAPACITY_BLOCK,
1423+
False,
1424+
None,
1425+
None,
1426+
),
1427+
],
1428+
)
1429+
def test_placement_group_capacity_type_validator(
1430+
capacity_type,
1431+
placement_group_enabled,
1432+
expected_failure_level,
1433+
expected_message,
1434+
):
1435+
actual_failures = PlacementGroupCapacityTypeValidator().execute(
1436+
capacity_type=capacity_type,
1437+
placement_group_enabled=placement_group_enabled,
1438+
)
1439+
assert_failure_level(actual_failures, expected_failure_level)
1440+
assert_failure_messages(actual_failures, expected_message)
1441+
1442+
13831443
@pytest.mark.parametrize(
13841444
"instance_type, instance_type_data, expected_message, logger_message",
13851445
[

0 commit comments

Comments
 (0)