|
28 | 28 | from retrying import retry
|
29 | 29 | from time_utils import seconds
|
30 | 30 |
|
31 |
| -from utils import find_stack_by_tag, generate_stack_name, is_directory_supported, random_alphanumeric |
| 31 | +from utils import find_stack_by_tag, generate_stack_name, is_directory_supported, random_alphanumeric, get_quarantined_stacks, is_quarantined_stack, quarantine_stacks |
32 | 32 |
|
33 | 33 | from tests.ad_integration.cluster_user import ClusterUser
|
34 | 34 | from tests.common.utils import run_system_analyzer
|
| 35 | +from constants import DO_NOT_DELETE_TAG_KEY, MAX_QUARANTINED_STACKS |
35 | 36 |
|
36 | 37 | NUM_USERS_TO_CREATE = 5
|
37 | 38 | NUM_USERS_TO_TEST = 3
|
38 | 39 |
|
39 |
| -MAX_QUARANTINED_STACKS = 5 |
40 |
| - |
41 | 40 | AD_STACK_PREFIX = 'integ-tests-MultiUserInfraStack'
|
42 | 41 |
|
43 |
| -DO_NOT_DELETE_TAG_KEY = 'DO-NOT-DELETE' |
44 |
| - |
45 | 42 |
|
46 | 43 | def get_infra_stack_outputs(stack_name):
|
47 | 44 | cfn = boto3.client("cloudformation")
|
@@ -228,15 +225,14 @@ def _create_directory_stack(cfn_stacks_factory, request, directory_type, region,
|
228 | 225 | logging.error("Failed to create stack %s", directory_stack_name)
|
229 | 226 | # We want to retain the stack in case of failure in order to debug it.
|
230 | 227 | # We retain a limited number of stack to contain the costs.
|
231 |
| - n_retained_ad_stacks = get_retained_ad_stacks_count() |
232 |
| - if n_retained_ad_stacks < MAX_QUARANTINED_STACKS: |
233 |
| - logging.warn("Retaining failed stack %s to debug failure (retained: %d, max: %d)", |
234 |
| - directory_stack_name, n_retained_ad_stacks, MAX_QUARANTINED_STACKS) |
235 |
| - |
236 |
| - add_tag_to_stack(directory_stack.name, DO_NOT_DELETE_TAG_KEY, "Retained to debug failure") |
| 228 | + n_quarantined_ad_stacks = len(get_quarantined_stacks(region, prefix=AD_STACK_PREFIX)) |
| 229 | + if n_quarantined_ad_stacks < MAX_QUARANTINED_STACKS: |
| 230 | + logging.warn("Quarantining failed stack %s to debug failure (quarantined: %d, max: %d)", |
| 231 | + directory_stack_name, n_quarantined_ad_stacks, MAX_QUARANTINED_STACKS) |
| 232 | + quarantine_stacks(region, stack_names=[directory_stack_name]) |
237 | 233 | else:
|
238 |
| - logging.warn("Cannot retain failed stack %s for debugging because there are already %d retained (max: %d)", |
239 |
| - directory_stack_name, n_retained_ad_stacks, MAX_QUARANTINED_STACKS) |
| 234 | + logging.warn("Cannot quarantine failed stack %s for debugging because there are already %d quarantined (max: %d)", |
| 235 | + directory_stack_name, n_quarantined_ad_stacks, MAX_QUARANTINED_STACKS) |
240 | 236 | raise e
|
241 | 237 | logging.info("Creation of stack %s complete", directory_stack_name)
|
242 | 238 |
|
@@ -276,7 +272,7 @@ def _directory_factory(
|
276 | 272 | stack_prefix = f"{AD_STACK_PREFIX}{directory_type}"
|
277 | 273 | directory_stack_name = find_stack_by_tag("parallelcluster:integ-tests-ad-stack", region, stack_prefix)
|
278 | 274 |
|
279 |
| - if not directory_stack_name: |
| 275 | + if not directory_stack_name or is_quarantined_stack(region, directory_stack_name): |
280 | 276 | directory_stack = _create_directory_stack(
|
281 | 277 | cfn_stacks_factory,
|
282 | 278 | request,
|
|
0 commit comments