From 1ca4ff8278469816023a9b6bb6798baa0067552e Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Sun, 22 Jun 2025 22:18:19 -0500 Subject: [PATCH 1/2] fix(aws): Implement label sanitization for AWS Batch tags in AwsBatchTaskHandler. Added methods to sanitize individual labels and a map of labels to comply with AWS constraints. Updated tests to verify sanitization functionality. Signed-off-by: Edmund Miller --- .../aws/batch/AwsBatchTaskHandler.groovy | 70 +++++++- .../aws/batch/AwsBatchTaskHandlerTest.groovy | 168 +++++++++++++++++- 2 files changed, 229 insertions(+), 9 deletions(-) diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy index 3aff103736..96a42ecc13 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy @@ -574,7 +574,7 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler sanitizeAwsBatchLabels(Map labels) { + if (!labels) return labels + + final result = new LinkedHashMap() + + for (Map.Entry entry : labels.entrySet()) { + final key = entry.getKey() + final value = entry.getValue() + + if (key != null && value != null) { + final sanitizedKey = sanitizeAwsBatchLabel(key.toString(), 128) + final sanitizedValue = sanitizeAwsBatchLabel(value.toString(), 256) + + // Only add non-empty keys and values + if (sanitizedKey && sanitizedValue) { + result.put(sanitizedKey, sanitizedValue) + } + } + } + + return result + } + + /** + * Sanitize a single label key or value for AWS Batch tags. + * Replaces invalid characters with underscores and truncates if necessary. + * + * @param input The input string to sanitize + * @param maxLength The maximum allowed length + * @return The sanitized string + */ + protected String sanitizeAwsBatchLabel(String input, int maxLength) { + if (!input) return input + + // Replace invalid characters with underscores + // AWS Batch allows: letters, numbers, spaces, and: _ . : / = + - @ + String sanitized = input.replaceAll(/[^a-zA-Z0-9\s_.\:\/=+\-@]/, '_') + + // Replace multiple consecutive underscores/spaces with single underscore + sanitized = sanitized.replaceAll(/[_\s]{2,}/, '_') + + // Remove leading/trailing underscores and spaces + sanitized = sanitized.replaceAll(/^[_\s]+|[_\s]+$/, '') + + // Truncate if too long + if (sanitized.length() > maxLength) { + sanitized = sanitized.substring(0, maxLength) + // Remove trailing underscore/space after truncation + sanitized = sanitized.replaceAll(/[_\s]+$/, '') + } + + return sanitized ?: null + } + /** * @return The list of environment variables to be defined in the Batch job execution context */ diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy index da217efce0..edfcbb4518 100644 --- a/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy +++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy @@ -530,7 +530,7 @@ class AwsBatchTaskHandlerTest extends Specification { vol2: '/here:/there:ro', vol3: '/this:/that:rw', ] - + when: handler.addVolumeMountsToContainer(mounts, container) then: @@ -585,7 +585,7 @@ class AwsBatchTaskHandlerTest extends Specification { !result.containerProperties.logConfiguration !result.containerProperties.mountPoints !result.containerProperties.privileged - + when: result = handler.makeJobDefRequest(task) then: @@ -928,7 +928,7 @@ class AwsBatchTaskHandlerTest extends Specification { then: 1 * handler.isCompleted() >> false 1 * handler.getMachineInfo() >> new CloudMachineInfo('x1.large', 'us-east-1b', PriceModel.spot) - + and: trace.native_id == 'xyz-123' trace.executorName == 'awsbatch' @@ -1099,7 +1099,7 @@ class AwsBatchTaskHandlerTest extends Specification { expect: handler.normaliseJobId(JOB_ID) == EXPECTED - + where: JOB_ID | EXPECTED null | null @@ -1118,7 +1118,7 @@ class AwsBatchTaskHandlerTest extends Specification { task.getName() >> NAME and: result == EXPECTED - + where: ENV | NAME | EXPECTED [:] | 'foo' | 'foo' @@ -1155,8 +1155,164 @@ class AwsBatchTaskHandlerTest extends Specification { 2 | true | false | 2 and: null | true | true | 5 // <-- default to 5 - 0 | true | true | 5 // <-- default to 5 + 0 | true | true | 5 // <-- default to 5 1 | true | true | 1 2 | true | true | 2 } + + @Unroll + def 'should sanitize AWS Batch label' () { + given: + def handler = Spy(AwsBatchTaskHandler) + + expect: + handler.sanitizeAwsBatchLabel(INPUT, MAX_LENGTH) == EXPECTED + + where: + INPUT | MAX_LENGTH | EXPECTED + // Valid labels that don't need sanitization + 'validLabel' | 50 | 'validLabel' + 'valid-label_123' | 50 | 'valid-label_123' + 'valid.label:test/path=value+more' | 50 | 'valid.label:test/path=value+more' + 'label with spaces' | 50 | 'label with spaces' + 'label-with@symbol' | 50 | 'label-with@symbol' + and: + // Labels with invalid characters + 'label#with#hash' | 50 | 'label_with_hash' + 'label$with%special&chars' | 50 | 'label_with_special_chars' + 'label(with)brackets[and]braces{}' | 50 | 'label_with_brackets_and_braces__' + 'label*with?wildcards' | 50 | 'label_with_wildcards' + 'unicode_λαβελ_test' | 50 | 'unicode____abel_test' + and: + // Multiple consecutive invalid characters + 'label###multiple###hashes' | 50 | 'label_multiple_hashes' + 'label multiple spaces' | 50 | 'label_multiple_spaces' + 'label___multiple___underscores' | 50 | 'label_multiple_underscores' + 'label$%^&*special*&^%$chars' | 50 | 'label_special_chars' + and: + // Leading/trailing invalid characters + '###leading-hashes' | 50 | 'leading-hashes' + 'trailing-hashes###' | 50 | 'trailing-hashes' + ' leading-spaces' | 50 | 'leading-spaces' + 'trailing-spaces ' | 50 | 'trailing-spaces' + '___leading-underscores' | 50 | 'leading-underscores' + 'trailing-underscores___' | 50 | 'trailing-underscores' + and: + // Length truncation + 'very-long-label-that-exceeds-max' | 10 | 'very-long-' + 'very-long-label-ending-with-_' | 25 | 'very-long-label-ending-w' + 'very-long-label-ending-with-___' | 28 | 'very-long-label-ending-w' + and: + // Edge cases + null | 50 | null + '' | 50 | '' + ' ' | 50 | null + '___' | 50 | null + '###' | 50 | null + '_' | 50 | null + ' ' | 50 | null + '#' | 50 | null + and: + // Complex real-world scenarios + 'user@domain.com' | 50 | 'user@domain.com' + 'workflow-run-2024/01/15' | 50 | 'workflow-run-2024/01/15' + 'task.hash.0x1234abcd' | 50 | 'task.hash.0x1234abcd' + 'pipeline#name%with&special*chars' | 50 | 'pipeline_name_with_special_chars' + 'session-id:abc123#$%' | 50 | 'session-id:abc123' + } + + @Unroll + def 'should sanitize AWS Batch labels map' () { + given: + def handler = Spy(AwsBatchTaskHandler) + + expect: + handler.sanitizeAwsBatchLabels(INPUT) == EXPECTED + + where: + INPUT | EXPECTED + // Null/empty input + null | null + [:] | [:] + and: + // Valid labels + [validKey: 'validValue'] | [validKey: 'validValue'] + ['valid-key_123': 'valid-value_456'] | ['valid-key_123': 'valid-value_456'] + ['key.with:path/chars=test+more@symbol': 'value with spaces'] | ['key.with:path/chars=test+more@symbol': 'value with spaces'] + and: + // Invalid characters in keys and values + ['key#with#hash': 'value$with%special&chars'] | ['key_with_hash': 'value_with_special_chars'] + ['key(brackets)': 'value[squares]{braces}'] | ['key_brackets_': 'value_squares__braces_'] + ['unicode_λkey': 'unicode_λvalue'] | ['unicode__key': 'unicode__value'] + and: + // Multiple entries with mixed validity + ['validKey': 'validValue', 'invalid#key': 'invalid$value', 'another.valid:key': 'another+valid@value'] | + ['validKey': 'validValue', 'invalid_key': 'invalid_value', 'another.valid:key': 'another+valid@value'] + and: + // Entries that should be filtered out (null/empty after sanitization) + ['validKey': 'validValue', '###': '$$$', ' ': '%%%', 'goodKey': 'goodValue'] | + ['validKey': 'validValue', 'goodKey': 'goodValue'] + and: + // Null keys or values + ['validKey': null, null: 'validValue', 'goodKey': 'goodValue'] | + ['goodKey': 'goodValue'] + and: + // Real-world example with Nextflow resource labels + [ + 'uniqueRunId': 'tw-12345-workflow-run', + 'taskHash': 'task.hash.0x1a2b3c4d#special', + 'pipelineUser': 'user@domain.com', + 'pipelineRunName': 'my-pipeline-run(2024)', + 'pipelineSessionId': 'session#id$with%special&chars', + 'pipelineResume': 'false', + 'pipelineName': 'my_pipeline/name:version+tag' + ] | + [ + 'uniqueRunId': 'tw-12345-workflow-run', + 'taskHash': 'task.hash.0x1a2b3c4d_special', + 'pipelineUser': 'user@domain.com', + 'pipelineRunName': 'my-pipeline-run_2024_', + 'pipelineSessionId': 'session_id_with_special_chars', + 'pipelineResume': 'false', + 'pipelineName': 'my_pipeline/name:version+tag' + ] + } + + def 'should apply label sanitization in submit request' () { + given: + def task = Mock(TaskRun) + task.getName() >> 'batch-task' + task.getConfig() >> new TaskConfig( + memory: '8GB', + cpus: 4, + resourceLabels: [ + 'validLabel': 'validValue', + 'invalid#key': 'invalid$value', + 'long-key-that-might-be-truncated-if-very-very-long': 'long-value-that-should-be-truncated-because-it-exceeds-the-maximum-allowed-length-for-aws-batch-tags-which-is-256-characters-and-this-string-is-definitely-longer-than-that-limit-so-it-will-be-cut-off-at-the-appropriate-length-and-cleaned-up' + ] + ) + + def handler = Spy(AwsBatchTaskHandler) + + when: + def req = handler.newSubmitRequest(task) + then: + 1 * handler.getSubmitCommand() >> ['bash', '-c', 'test'] + 1 * handler.maxSpotAttempts() >> 0 + 1 * handler.getAwsOptions() >> new AwsOptions() + 1 * handler.getJobQueue(task) >> 'test-queue' + 1 * handler.getJobDefinition(task) >> 'test-job-def' + 1 * handler.getEnvironmentVars() >> [] + + and: + def tags = req.getTags() + tags.size() == 3 + tags['validLabel'] == 'validValue' + tags['invalid_key'] == 'invalid_value' + // Check that long value was truncated + tags['long-key-that-might-be-truncated-if-very-very-long'].length() <= 256 + tags['long-key-that-might-be-truncated-if-very-very-long'].startsWith('long-value-that-should-be-truncated') + !tags['long-key-that-might-be-truncated-if-very-very-long'].endsWith('_') + req.getPropagateTags() == true + } } From 4d70df3e5469c74756992bd99b7031894d8278a2 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Mon, 23 Jun 2025 10:00:02 -0500 Subject: [PATCH 2/2] test(aws): Clean up failing label sanitization tests Refined expected outputs for labels with brackets, unicode characters, and special characters to ensure compliance with AWS constraints. Adjusted test cases for length truncation and null handling to reflect accurate sanitization behavior. Signed-off-by: Edmund Miller --- .../aws/batch/AwsBatchTaskHandlerTest.groovy | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy index edfcbb4518..09e23799b4 100644 --- a/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy +++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy @@ -1180,9 +1180,9 @@ class AwsBatchTaskHandlerTest extends Specification { // Labels with invalid characters 'label#with#hash' | 50 | 'label_with_hash' 'label$with%special&chars' | 50 | 'label_with_special_chars' - 'label(with)brackets[and]braces{}' | 50 | 'label_with_brackets_and_braces__' + 'label(with)brackets[and]braces{}' | 50 | 'label_with_brackets_and_braces' 'label*with?wildcards' | 50 | 'label_with_wildcards' - 'unicode_λαβελ_test' | 50 | 'unicode____abel_test' + 'unicode_λαβελ_test' | 50 | 'unicode_test' and: // Multiple consecutive invalid characters 'label###multiple###hashes' | 50 | 'label_multiple_hashes' @@ -1200,8 +1200,8 @@ class AwsBatchTaskHandlerTest extends Specification { and: // Length truncation 'very-long-label-that-exceeds-max' | 10 | 'very-long-' - 'very-long-label-ending-with-_' | 25 | 'very-long-label-ending-w' - 'very-long-label-ending-with-___' | 28 | 'very-long-label-ending-w' + 'very-long-label-ending-with-_' | 25 | 'very-long-label-ending-wi' + 'very-long-label-ending-with-___' | 28 | 'very-long-label-ending-with-' and: // Edge cases null | 50 | null @@ -1242,8 +1242,8 @@ class AwsBatchTaskHandlerTest extends Specification { and: // Invalid characters in keys and values ['key#with#hash': 'value$with%special&chars'] | ['key_with_hash': 'value_with_special_chars'] - ['key(brackets)': 'value[squares]{braces}'] | ['key_brackets_': 'value_squares__braces_'] - ['unicode_λkey': 'unicode_λvalue'] | ['unicode__key': 'unicode__value'] + ['key(brackets)': 'value[squares]{braces}'] | ['key_brackets': 'value_squares_braces'] + ['unicode_λkey': 'unicode_λvalue'] | ['unicode_key': 'unicode_value'] and: // Multiple entries with mixed validity ['validKey': 'validValue', 'invalid#key': 'invalid$value', 'another.valid:key': 'another+valid@value'] | @@ -1255,7 +1255,7 @@ class AwsBatchTaskHandlerTest extends Specification { and: // Null keys or values ['validKey': null, null: 'validValue', 'goodKey': 'goodValue'] | - ['goodKey': 'goodValue'] + [null: 'validValue', 'goodKey': 'goodValue'] and: // Real-world example with Nextflow resource labels [ @@ -1271,7 +1271,7 @@ class AwsBatchTaskHandlerTest extends Specification { 'uniqueRunId': 'tw-12345-workflow-run', 'taskHash': 'task.hash.0x1a2b3c4d_special', 'pipelineUser': 'user@domain.com', - 'pipelineRunName': 'my-pipeline-run_2024_', + 'pipelineRunName': 'my-pipeline-run_2024', 'pipelineSessionId': 'session_id_with_special_chars', 'pipelineResume': 'false', 'pipelineName': 'my_pipeline/name:version+tag'