From 1ca4ff8278469816023a9b6bb6798baa0067552e Mon Sep 17 00:00:00 2001
From: Edmund Miller <edmund.miller@seqera.io>
Date: Sun, 22 Jun 2025 22:18:19 -0500
Subject: [PATCH 1/2] fix(aws): Implement label sanitization for AWS Batch tags
 in AwsBatchTaskHandler.

Added methods to sanitize individual labels and a map of labels to comply with AWS constraints. Updated tests to verify sanitization functionality.

Signed-off-by: Edmund Miller <edmund.miller@seqera.io>
---
 .../aws/batch/AwsBatchTaskHandler.groovy      |  70 +++++++-
 .../aws/batch/AwsBatchTaskHandlerTest.groovy  | 168 +++++++++++++++++-
 2 files changed, 229 insertions(+), 9 deletions(-)

diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy
index 3aff103736..96a42ecc13 100644
--- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy
+++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy
@@ -574,7 +574,7 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler<String,Job
 
         if( opts.executionRole )
             container.setExecutionRoleArn(opts.executionRole)
-        
+
         final logsGroup = opts.getLogsGroup()
         if( logsGroup )
             container.setLogConfiguration(getLogConfiguration(logsGroup, opts.getRegion()))
@@ -623,7 +623,7 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler<String,Job
         return result
     }
 
-    @Memoized 
+    @Memoized
     LogConfiguration getLogConfiguration(String name, String region) {
         new LogConfiguration()
             .withLogDriver('awslogs')
@@ -765,7 +765,7 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler<String,Job
         result.setJobQueue(getJobQueue(task))
         result.setJobDefinition(getJobDefinition(task))
         if( labels ) {
-            result.setTags(labels)
+            result.setTags(sanitizeAwsBatchLabels(labels))
             result.setPropagateTags(true)
         }
         // set the share identifier
@@ -849,6 +849,70 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler<String,Job
         return result
     }
 
+    /**
+     * Sanitize resource labels to comply with AWS Batch tag requirements.
+     * AWS Batch tags have specific constraints:
+     * - Keys and values can contain: letters, numbers, spaces, and characters: _ . : / = + - @
+     * - Maximum key length: 128 characters
+     * - Maximum value length: 256 characters
+     *
+     * @param labels The original resource labels map
+     * @return A new map with sanitized labels suitable for AWS Batch tags
+     */
+    protected Map<String, String> sanitizeAwsBatchLabels(Map<String, String> labels) {
+        if (!labels) return labels
+
+        final result = new LinkedHashMap<String, String>()
+
+        for (Map.Entry<String, String> entry : labels.entrySet()) {
+            final key = entry.getKey()
+            final value = entry.getValue()
+
+            if (key != null && value != null) {
+                final sanitizedKey = sanitizeAwsBatchLabel(key.toString(), 128)
+                final sanitizedValue = sanitizeAwsBatchLabel(value.toString(), 256)
+
+                // Only add non-empty keys and values
+                if (sanitizedKey && sanitizedValue) {
+                    result.put(sanitizedKey, sanitizedValue)
+                }
+            }
+        }
+
+        return result
+    }
+
+    /**
+     * Sanitize a single label key or value for AWS Batch tags.
+     * Replaces invalid characters with underscores and truncates if necessary.
+     *
+     * @param input The input string to sanitize
+     * @param maxLength The maximum allowed length
+     * @return The sanitized string
+     */
+    protected String sanitizeAwsBatchLabel(String input, int maxLength) {
+        if (!input) return input
+
+        // Replace invalid characters with underscores
+        // AWS Batch allows: letters, numbers, spaces, and: _ . : / = + - @
+        String sanitized = input.replaceAll(/[^a-zA-Z0-9\s_.\:\/=+\-@]/, '_')
+
+        // Replace multiple consecutive underscores/spaces with single underscore
+        sanitized = sanitized.replaceAll(/[_\s]{2,}/, '_')
+
+        // Remove leading/trailing underscores and spaces
+        sanitized = sanitized.replaceAll(/^[_\s]+|[_\s]+$/, '')
+
+        // Truncate if too long
+        if (sanitized.length() > maxLength) {
+            sanitized = sanitized.substring(0, maxLength)
+            // Remove trailing underscore/space after truncation
+            sanitized = sanitized.replaceAll(/[_\s]+$/, '')
+        }
+
+        return sanitized ?: null
+    }
+
     /**
      * @return The list of environment variables to be defined in the Batch job execution context
      */
diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy
index da217efce0..edfcbb4518 100644
--- a/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy
+++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy
@@ -530,7 +530,7 @@ class AwsBatchTaskHandlerTest extends Specification {
                 vol2: '/here:/there:ro',
                 vol3: '/this:/that:rw',
         ]
-        
+
         when:
         handler.addVolumeMountsToContainer(mounts, container)
         then:
@@ -585,7 +585,7 @@ class AwsBatchTaskHandlerTest extends Specification {
         !result.containerProperties.logConfiguration
         !result.containerProperties.mountPoints
         !result.containerProperties.privileged
-        
+
         when:
         result = handler.makeJobDefRequest(task)
         then:
@@ -928,7 +928,7 @@ class AwsBatchTaskHandlerTest extends Specification {
         then:
         1 * handler.isCompleted() >> false
         1 * handler.getMachineInfo() >> new CloudMachineInfo('x1.large', 'us-east-1b', PriceModel.spot)
-        
+
         and:
         trace.native_id == 'xyz-123'
         trace.executorName == 'awsbatch'
@@ -1099,7 +1099,7 @@ class AwsBatchTaskHandlerTest extends Specification {
 
         expect:
         handler.normaliseJobId(JOB_ID) == EXPECTED
-        
+
         where:
         JOB_ID       | EXPECTED
         null         | null
@@ -1118,7 +1118,7 @@ class AwsBatchTaskHandlerTest extends Specification {
         task.getName() >> NAME
         and:
         result == EXPECTED
-        
+
         where:
         ENV                             | NAME      | EXPECTED
         [:]                             | 'foo'     | 'foo'
@@ -1155,8 +1155,164 @@ class AwsBatchTaskHandlerTest extends Specification {
         2           | true  | false      | 2
         and:
         null        | true  | true       | 5    // <-- default to 5
-        0           | true  | true       | 5    // <-- default to 5 
+        0           | true  | true       | 5    // <-- default to 5
         1           | true  | true       | 1
         2           | true  | true       | 2
     }
+
+    @Unroll
+    def 'should sanitize AWS Batch label' () {
+        given:
+        def handler = Spy(AwsBatchTaskHandler)
+
+        expect:
+        handler.sanitizeAwsBatchLabel(INPUT, MAX_LENGTH) == EXPECTED
+
+        where:
+        INPUT                               | MAX_LENGTH | EXPECTED
+        // Valid labels that don't need sanitization
+        'validLabel'                        | 50         | 'validLabel'
+        'valid-label_123'                   | 50         | 'valid-label_123'
+        'valid.label:test/path=value+more'  | 50         | 'valid.label:test/path=value+more'
+        'label with spaces'                 | 50         | 'label with spaces'
+        'label-with@symbol'                 | 50         | 'label-with@symbol'
+        and:
+        // Labels with invalid characters
+        'label#with#hash'                   | 50         | 'label_with_hash'
+        'label$with%special&chars'          | 50         | 'label_with_special_chars'
+        'label(with)brackets[and]braces{}'  | 50         | 'label_with_brackets_and_braces__'
+        'label*with?wildcards'              | 50         | 'label_with_wildcards'
+        'unicode_λαβελ_test'                | 50         | 'unicode____abel_test'
+        and:
+        // Multiple consecutive invalid characters
+        'label###multiple###hashes'         | 50         | 'label_multiple_hashes'
+        'label   multiple   spaces'         | 50         | 'label_multiple_spaces'
+        'label___multiple___underscores'    | 50         | 'label_multiple_underscores'
+        'label$%^&*special*&^%$chars'       | 50         | 'label_special_chars'
+        and:
+        // Leading/trailing invalid characters
+        '###leading-hashes'                 | 50         | 'leading-hashes'
+        'trailing-hashes###'                | 50         | 'trailing-hashes'
+        '   leading-spaces'                 | 50         | 'leading-spaces'
+        'trailing-spaces   '                | 50         | 'trailing-spaces'
+        '___leading-underscores'            | 50         | 'leading-underscores'
+        'trailing-underscores___'           | 50         | 'trailing-underscores'
+        and:
+        // Length truncation
+        'very-long-label-that-exceeds-max'  | 10         | 'very-long-'
+        'very-long-label-ending-with-_'     | 25         | 'very-long-label-ending-w'
+        'very-long-label-ending-with-___'   | 28         | 'very-long-label-ending-w'
+        and:
+        // Edge cases
+        null                                | 50         | null
+        ''                                  | 50         | ''
+        '   '                               | 50         | null
+        '___'                               | 50         | null
+        '###'                               | 50         | null
+        '_'                                 | 50         | null
+        ' '                                 | 50         | null
+        '#'                                 | 50         | null
+        and:
+        // Complex real-world scenarios
+        'user@domain.com'                   | 50         | 'user@domain.com'
+        'workflow-run-2024/01/15'           | 50         | 'workflow-run-2024/01/15'
+        'task.hash.0x1234abcd'              | 50         | 'task.hash.0x1234abcd'
+        'pipeline#name%with&special*chars'  | 50         | 'pipeline_name_with_special_chars'
+        'session-id:abc123#$%'              | 50         | 'session-id:abc123'
+    }
+
+    @Unroll
+    def 'should sanitize AWS Batch labels map' () {
+        given:
+        def handler = Spy(AwsBatchTaskHandler)
+
+        expect:
+        handler.sanitizeAwsBatchLabels(INPUT) == EXPECTED
+
+        where:
+        INPUT | EXPECTED
+        // Null/empty input
+        null | null
+        [:] | [:]
+        and:
+        // Valid labels
+        [validKey: 'validValue'] | [validKey: 'validValue']
+        ['valid-key_123': 'valid-value_456'] | ['valid-key_123': 'valid-value_456']
+        ['key.with:path/chars=test+more@symbol': 'value with spaces'] | ['key.with:path/chars=test+more@symbol': 'value with spaces']
+        and:
+        // Invalid characters in keys and values
+        ['key#with#hash': 'value$with%special&chars'] | ['key_with_hash': 'value_with_special_chars']
+        ['key(brackets)': 'value[squares]{braces}'] | ['key_brackets_': 'value_squares__braces_']
+        ['unicode_λkey': 'unicode_λvalue'] | ['unicode__key': 'unicode__value']
+        and:
+        // Multiple entries with mixed validity
+        ['validKey': 'validValue', 'invalid#key': 'invalid$value', 'another.valid:key': 'another+valid@value'] |
+        ['validKey': 'validValue', 'invalid_key': 'invalid_value', 'another.valid:key': 'another+valid@value']
+        and:
+        // Entries that should be filtered out (null/empty after sanitization)
+        ['validKey': 'validValue', '###': '$$$', '   ': '%%%', 'goodKey': 'goodValue'] |
+        ['validKey': 'validValue', 'goodKey': 'goodValue']
+        and:
+        // Null keys or values
+        ['validKey': null, null: 'validValue', 'goodKey': 'goodValue'] |
+        ['goodKey': 'goodValue']
+        and:
+        // Real-world example with Nextflow resource labels
+        [
+            'uniqueRunId': 'tw-12345-workflow-run',
+            'taskHash': 'task.hash.0x1a2b3c4d#special',
+            'pipelineUser': 'user@domain.com',
+            'pipelineRunName': 'my-pipeline-run(2024)',
+            'pipelineSessionId': 'session#id$with%special&chars',
+            'pipelineResume': 'false',
+            'pipelineName': 'my_pipeline/name:version+tag'
+        ] |
+        [
+            'uniqueRunId': 'tw-12345-workflow-run',
+            'taskHash': 'task.hash.0x1a2b3c4d_special',
+            'pipelineUser': 'user@domain.com',
+            'pipelineRunName': 'my-pipeline-run_2024_',
+            'pipelineSessionId': 'session_id_with_special_chars',
+            'pipelineResume': 'false',
+            'pipelineName': 'my_pipeline/name:version+tag'
+        ]
+    }
+
+    def 'should apply label sanitization in submit request' () {
+        given:
+        def task = Mock(TaskRun)
+        task.getName() >> 'batch-task'
+        task.getConfig() >> new TaskConfig(
+            memory: '8GB',
+            cpus: 4,
+            resourceLabels: [
+                'validLabel': 'validValue',
+                'invalid#key': 'invalid$value',
+                'long-key-that-might-be-truncated-if-very-very-long': 'long-value-that-should-be-truncated-because-it-exceeds-the-maximum-allowed-length-for-aws-batch-tags-which-is-256-characters-and-this-string-is-definitely-longer-than-that-limit-so-it-will-be-cut-off-at-the-appropriate-length-and-cleaned-up'
+            ]
+        )
+
+        def handler = Spy(AwsBatchTaskHandler)
+
+        when:
+        def req = handler.newSubmitRequest(task)
+        then:
+        1 * handler.getSubmitCommand() >> ['bash', '-c', 'test']
+        1 * handler.maxSpotAttempts() >> 0
+        1 * handler.getAwsOptions() >> new AwsOptions()
+        1 * handler.getJobQueue(task) >> 'test-queue'
+        1 * handler.getJobDefinition(task) >> 'test-job-def'
+        1 * handler.getEnvironmentVars() >> []
+
+        and:
+        def tags = req.getTags()
+        tags.size() == 3
+        tags['validLabel'] == 'validValue'
+        tags['invalid_key'] == 'invalid_value'
+        // Check that long value was truncated
+        tags['long-key-that-might-be-truncated-if-very-very-long'].length() <= 256
+        tags['long-key-that-might-be-truncated-if-very-very-long'].startsWith('long-value-that-should-be-truncated')
+        !tags['long-key-that-might-be-truncated-if-very-very-long'].endsWith('_')
+        req.getPropagateTags() == true
+    }
 }

From 4d70df3e5469c74756992bd99b7031894d8278a2 Mon Sep 17 00:00:00 2001
From: Edmund Miller <edmund.miller@seqera.io>
Date: Mon, 23 Jun 2025 10:00:02 -0500
Subject: [PATCH 2/2] test(aws): Clean up failing label sanitization tests

Refined expected outputs for labels with brackets, unicode characters, and special characters to ensure compliance with AWS constraints. Adjusted test cases for length truncation and null handling to reflect accurate sanitization behavior.

Signed-off-by: Edmund Miller <edmund.miller@seqera.io>
---
 .../aws/batch/AwsBatchTaskHandlerTest.groovy     | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy
index edfcbb4518..09e23799b4 100644
--- a/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy
+++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy
@@ -1180,9 +1180,9 @@ class AwsBatchTaskHandlerTest extends Specification {
         // Labels with invalid characters
         'label#with#hash'                   | 50         | 'label_with_hash'
         'label$with%special&chars'          | 50         | 'label_with_special_chars'
-        'label(with)brackets[and]braces{}'  | 50         | 'label_with_brackets_and_braces__'
+        'label(with)brackets[and]braces{}'  | 50         | 'label_with_brackets_and_braces'
         'label*with?wildcards'              | 50         | 'label_with_wildcards'
-        'unicode_λαβελ_test'                | 50         | 'unicode____abel_test'
+        'unicode_λαβελ_test'                | 50         | 'unicode_test'
         and:
         // Multiple consecutive invalid characters
         'label###multiple###hashes'         | 50         | 'label_multiple_hashes'
@@ -1200,8 +1200,8 @@ class AwsBatchTaskHandlerTest extends Specification {
         and:
         // Length truncation
         'very-long-label-that-exceeds-max'  | 10         | 'very-long-'
-        'very-long-label-ending-with-_'     | 25         | 'very-long-label-ending-w'
-        'very-long-label-ending-with-___'   | 28         | 'very-long-label-ending-w'
+        'very-long-label-ending-with-_'     | 25         | 'very-long-label-ending-wi'
+        'very-long-label-ending-with-___'   | 28         | 'very-long-label-ending-with-'
         and:
         // Edge cases
         null                                | 50         | null
@@ -1242,8 +1242,8 @@ class AwsBatchTaskHandlerTest extends Specification {
         and:
         // Invalid characters in keys and values
         ['key#with#hash': 'value$with%special&chars'] | ['key_with_hash': 'value_with_special_chars']
-        ['key(brackets)': 'value[squares]{braces}'] | ['key_brackets_': 'value_squares__braces_']
-        ['unicode_λkey': 'unicode_λvalue'] | ['unicode__key': 'unicode__value']
+        ['key(brackets)': 'value[squares]{braces}'] | ['key_brackets': 'value_squares_braces']
+        ['unicode_λkey': 'unicode_λvalue'] | ['unicode_key': 'unicode_value']
         and:
         // Multiple entries with mixed validity
         ['validKey': 'validValue', 'invalid#key': 'invalid$value', 'another.valid:key': 'another+valid@value'] |
@@ -1255,7 +1255,7 @@ class AwsBatchTaskHandlerTest extends Specification {
         and:
         // Null keys or values
         ['validKey': null, null: 'validValue', 'goodKey': 'goodValue'] |
-        ['goodKey': 'goodValue']
+        [null: 'validValue', 'goodKey': 'goodValue']
         and:
         // Real-world example with Nextflow resource labels
         [
@@ -1271,7 +1271,7 @@ class AwsBatchTaskHandlerTest extends Specification {
             'uniqueRunId': 'tw-12345-workflow-run',
             'taskHash': 'task.hash.0x1a2b3c4d_special',
             'pipelineUser': 'user@domain.com',
-            'pipelineRunName': 'my-pipeline-run_2024_',
+            'pipelineRunName': 'my-pipeline-run_2024',
             'pipelineSessionId': 'session_id_with_special_chars',
             'pipelineResume': 'false',
             'pipelineName': 'my_pipeline/name:version+tag'