diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy index 3aff103736..96a42ecc13 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy @@ -574,7 +574,7 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler sanitizeAwsBatchLabels(Map labels) { + if (!labels) return labels + + final result = new LinkedHashMap() + + for (Map.Entry entry : labels.entrySet()) { + final key = entry.getKey() + final value = entry.getValue() + + if (key != null && value != null) { + final sanitizedKey = sanitizeAwsBatchLabel(key.toString(), 128) + final sanitizedValue = sanitizeAwsBatchLabel(value.toString(), 256) + + // Only add non-empty keys and values + if (sanitizedKey && sanitizedValue) { + result.put(sanitizedKey, sanitizedValue) + } + } + } + + return result + } + + /** + * Sanitize a single label key or value for AWS Batch tags. + * Replaces invalid characters with underscores and truncates if necessary. + * + * @param input The input string to sanitize + * @param maxLength The maximum allowed length + * @return The sanitized string + */ + protected String sanitizeAwsBatchLabel(String input, int maxLength) { + if (!input) return input + + // Replace invalid characters with underscores + // AWS Batch allows: letters, numbers, spaces, and: _ . : / = + - @ + String sanitized = input.replaceAll(/[^a-zA-Z0-9\s_.\:\/=+\-@]/, '_') + + // Replace multiple consecutive underscores/spaces with single underscore + sanitized = sanitized.replaceAll(/[_\s]{2,}/, '_') + + // Remove leading/trailing underscores and spaces + sanitized = sanitized.replaceAll(/^[_\s]+|[_\s]+$/, '') + + // Truncate if too long + if (sanitized.length() > maxLength) { + sanitized = sanitized.substring(0, maxLength) + // Remove trailing underscore/space after truncation + sanitized = sanitized.replaceAll(/[_\s]+$/, '') + } + + return sanitized ?: null + } + /** * @return The list of environment variables to be defined in the Batch job execution context */ diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy index da217efce0..09e23799b4 100644 --- a/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy +++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy @@ -530,7 +530,7 @@ class AwsBatchTaskHandlerTest extends Specification { vol2: '/here:/there:ro', vol3: '/this:/that:rw', ] - + when: handler.addVolumeMountsToContainer(mounts, container) then: @@ -585,7 +585,7 @@ class AwsBatchTaskHandlerTest extends Specification { !result.containerProperties.logConfiguration !result.containerProperties.mountPoints !result.containerProperties.privileged - + when: result = handler.makeJobDefRequest(task) then: @@ -928,7 +928,7 @@ class AwsBatchTaskHandlerTest extends Specification { then: 1 * handler.isCompleted() >> false 1 * handler.getMachineInfo() >> new CloudMachineInfo('x1.large', 'us-east-1b', PriceModel.spot) - + and: trace.native_id == 'xyz-123' trace.executorName == 'awsbatch' @@ -1099,7 +1099,7 @@ class AwsBatchTaskHandlerTest extends Specification { expect: handler.normaliseJobId(JOB_ID) == EXPECTED - + where: JOB_ID | EXPECTED null | null @@ -1118,7 +1118,7 @@ class AwsBatchTaskHandlerTest extends Specification { task.getName() >> NAME and: result == EXPECTED - + where: ENV | NAME | EXPECTED [:] | 'foo' | 'foo' @@ -1155,8 +1155,164 @@ class AwsBatchTaskHandlerTest extends Specification { 2 | true | false | 2 and: null | true | true | 5 // <-- default to 5 - 0 | true | true | 5 // <-- default to 5 + 0 | true | true | 5 // <-- default to 5 1 | true | true | 1 2 | true | true | 2 } + + @Unroll + def 'should sanitize AWS Batch label' () { + given: + def handler = Spy(AwsBatchTaskHandler) + + expect: + handler.sanitizeAwsBatchLabel(INPUT, MAX_LENGTH) == EXPECTED + + where: + INPUT | MAX_LENGTH | EXPECTED + // Valid labels that don't need sanitization + 'validLabel' | 50 | 'validLabel' + 'valid-label_123' | 50 | 'valid-label_123' + 'valid.label:test/path=value+more' | 50 | 'valid.label:test/path=value+more' + 'label with spaces' | 50 | 'label with spaces' + 'label-with@symbol' | 50 | 'label-with@symbol' + and: + // Labels with invalid characters + 'label#with#hash' | 50 | 'label_with_hash' + 'label$with%special&chars' | 50 | 'label_with_special_chars' + 'label(with)brackets[and]braces{}' | 50 | 'label_with_brackets_and_braces' + 'label*with?wildcards' | 50 | 'label_with_wildcards' + 'unicode_λαβελ_test' | 50 | 'unicode_test' + and: + // Multiple consecutive invalid characters + 'label###multiple###hashes' | 50 | 'label_multiple_hashes' + 'label multiple spaces' | 50 | 'label_multiple_spaces' + 'label___multiple___underscores' | 50 | 'label_multiple_underscores' + 'label$%^&*special*&^%$chars' | 50 | 'label_special_chars' + and: + // Leading/trailing invalid characters + '###leading-hashes' | 50 | 'leading-hashes' + 'trailing-hashes###' | 50 | 'trailing-hashes' + ' leading-spaces' | 50 | 'leading-spaces' + 'trailing-spaces ' | 50 | 'trailing-spaces' + '___leading-underscores' | 50 | 'leading-underscores' + 'trailing-underscores___' | 50 | 'trailing-underscores' + and: + // Length truncation + 'very-long-label-that-exceeds-max' | 10 | 'very-long-' + 'very-long-label-ending-with-_' | 25 | 'very-long-label-ending-wi' + 'very-long-label-ending-with-___' | 28 | 'very-long-label-ending-with-' + and: + // Edge cases + null | 50 | null + '' | 50 | '' + ' ' | 50 | null + '___' | 50 | null + '###' | 50 | null + '_' | 50 | null + ' ' | 50 | null + '#' | 50 | null + and: + // Complex real-world scenarios + 'user@domain.com' | 50 | 'user@domain.com' + 'workflow-run-2024/01/15' | 50 | 'workflow-run-2024/01/15' + 'task.hash.0x1234abcd' | 50 | 'task.hash.0x1234abcd' + 'pipeline#name%with&special*chars' | 50 | 'pipeline_name_with_special_chars' + 'session-id:abc123#$%' | 50 | 'session-id:abc123' + } + + @Unroll + def 'should sanitize AWS Batch labels map' () { + given: + def handler = Spy(AwsBatchTaskHandler) + + expect: + handler.sanitizeAwsBatchLabels(INPUT) == EXPECTED + + where: + INPUT | EXPECTED + // Null/empty input + null | null + [:] | [:] + and: + // Valid labels + [validKey: 'validValue'] | [validKey: 'validValue'] + ['valid-key_123': 'valid-value_456'] | ['valid-key_123': 'valid-value_456'] + ['key.with:path/chars=test+more@symbol': 'value with spaces'] | ['key.with:path/chars=test+more@symbol': 'value with spaces'] + and: + // Invalid characters in keys and values + ['key#with#hash': 'value$with%special&chars'] | ['key_with_hash': 'value_with_special_chars'] + ['key(brackets)': 'value[squares]{braces}'] | ['key_brackets': 'value_squares_braces'] + ['unicode_λkey': 'unicode_λvalue'] | ['unicode_key': 'unicode_value'] + and: + // Multiple entries with mixed validity + ['validKey': 'validValue', 'invalid#key': 'invalid$value', 'another.valid:key': 'another+valid@value'] | + ['validKey': 'validValue', 'invalid_key': 'invalid_value', 'another.valid:key': 'another+valid@value'] + and: + // Entries that should be filtered out (null/empty after sanitization) + ['validKey': 'validValue', '###': '$$$', ' ': '%%%', 'goodKey': 'goodValue'] | + ['validKey': 'validValue', 'goodKey': 'goodValue'] + and: + // Null keys or values + ['validKey': null, null: 'validValue', 'goodKey': 'goodValue'] | + [null: 'validValue', 'goodKey': 'goodValue'] + and: + // Real-world example with Nextflow resource labels + [ + 'uniqueRunId': 'tw-12345-workflow-run', + 'taskHash': 'task.hash.0x1a2b3c4d#special', + 'pipelineUser': 'user@domain.com', + 'pipelineRunName': 'my-pipeline-run(2024)', + 'pipelineSessionId': 'session#id$with%special&chars', + 'pipelineResume': 'false', + 'pipelineName': 'my_pipeline/name:version+tag' + ] | + [ + 'uniqueRunId': 'tw-12345-workflow-run', + 'taskHash': 'task.hash.0x1a2b3c4d_special', + 'pipelineUser': 'user@domain.com', + 'pipelineRunName': 'my-pipeline-run_2024', + 'pipelineSessionId': 'session_id_with_special_chars', + 'pipelineResume': 'false', + 'pipelineName': 'my_pipeline/name:version+tag' + ] + } + + def 'should apply label sanitization in submit request' () { + given: + def task = Mock(TaskRun) + task.getName() >> 'batch-task' + task.getConfig() >> new TaskConfig( + memory: '8GB', + cpus: 4, + resourceLabels: [ + 'validLabel': 'validValue', + 'invalid#key': 'invalid$value', + 'long-key-that-might-be-truncated-if-very-very-long': 'long-value-that-should-be-truncated-because-it-exceeds-the-maximum-allowed-length-for-aws-batch-tags-which-is-256-characters-and-this-string-is-definitely-longer-than-that-limit-so-it-will-be-cut-off-at-the-appropriate-length-and-cleaned-up' + ] + ) + + def handler = Spy(AwsBatchTaskHandler) + + when: + def req = handler.newSubmitRequest(task) + then: + 1 * handler.getSubmitCommand() >> ['bash', '-c', 'test'] + 1 * handler.maxSpotAttempts() >> 0 + 1 * handler.getAwsOptions() >> new AwsOptions() + 1 * handler.getJobQueue(task) >> 'test-queue' + 1 * handler.getJobDefinition(task) >> 'test-job-def' + 1 * handler.getEnvironmentVars() >> [] + + and: + def tags = req.getTags() + tags.size() == 3 + tags['validLabel'] == 'validValue' + tags['invalid_key'] == 'invalid_value' + // Check that long value was truncated + tags['long-key-that-might-be-truncated-if-very-very-long'].length() <= 256 + tags['long-key-that-might-be-truncated-if-very-very-long'].startsWith('long-value-that-should-be-truncated') + !tags['long-key-that-might-be-truncated-if-very-very-long'].endsWith('_') + req.getPropagateTags() == true + } }