nf-core · tanaes · Feb 29, 2024 · Aug 14, 2025 · Aug 15, 2025 · Aug 15, 2025
diff --git a/CITATIONS.md b/CITATIONS.md
@@ -12,6 +12,8 @@
 
 - [Aspera CLI](https://github.com/IBM/aspera-cli)
 
+- [AWS CLI](https://aws.amazon.com/cli/)
+
 - [Python](http://www.python.org)
 
 - [Requests](https://docs.python-requests.org/)
@@ -20,6 +22,10 @@
 
 ## Pipeline resources
 
+- [AWS Open Data Program - SRA](https://registry.opendata.aws/ncbi-sra/)
+
+  > The Sequence Read Archive (SRA) is mirrored on AWS S3 as part of the AWS Open Data Program, providing free access to SRA data.
+
 - [ENA](https://pubmed.ncbi.nlm.nih.gov/33175160/)
 
   > Harrison PW, Ahamed A, Aslam R, Alako BTF, Burgin J, Buso N, Courtot M, Fan J, Gupta D, Haseeb M, Holt S, Ibrahim T, Ivanov E, Jayathilaka S, Kadhirvelu VB, Kumar M, Lopez R, Kay S, Leinonen R, Liu X, O'Cathail C, Pakseresht A, Park Y, Pesant S, Rahman N, Rajan J, Sokolov A, Vijayaraja S, Waheed Z, Zyoud A, Burdett T, Cochrane G. The European Nucleotide Archive in 2020. Nucleic Acids Res. 2021 Jan 8;49(D1):D82-D85. doi: 10.1093/nar/gkaa1028. PubMed PMID: 33175160; PubMed Central PMCID: PMC7778925.

diff --git a/docs/usage.md b/docs/usage.md
@@ -72,7 +72,10 @@ See [issue #260](https://github.com/nf-core/fetchngs/issues/260) for more detail
 
 ### Primary options for downloading data
 
-If the appropriate download links are available, the pipeline uses FTP by default to download FastQ files by setting the `--download_method ftp` parameter. If you are having issues and prefer to use sra-tools or Aspera instead, you can set the [`--download_method`](https://nf-co.re/fetchngs/parameters#download_method) parameter to `--download_method sratools` or `--download_method aspera`, respectively.
+If the appropriate download links are available, the pipeline uses FTP by default to download FastQ files by setting the `--download_method ftp` parameter. If you are having issues and prefer to use alternative methods, you can set the [`--download_method`](https://nf-co.re/fetchngs/parameters#download_method) parameter to:
+- `--download_method sratools`: Uses NCBI's sra-tools to download SRA files and convert to FastQ
+- `--download_method aspera`: Uses Aspera CLI for faster downloads from ENA
+- `--download_method aws`: Downloads SRA files from the AWS S3 Open Data Program mirror and converts to FastQ
 
 ### Downloading dbGAP data with JWT
 

diff --git a/modules/local/sra_aws_download/environment.yml b/modules/local/sra_aws_download/environment.yml
@@ -0,0 +1,7 @@
+name: sra_aws_download
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - conda-forge::awscli=2.15.0
diff --git a/modules/local/sra_aws_download/main.nf b/modules/local/sra_aws_download/main.nf
@@ -0,0 +1,55 @@
+process SRA_AWS_DOWNLOAD {
+    tag "$meta.id"
+    label 'process_low'
+    label 'error_retry'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/awscli:1.8.3--py35_0' :
+        'quay.io/biocontainers/awscli:1.8.3--py35_0' }"
+
+    input:
+    tuple val(meta), val(run_accession)
+
+    output:
+    tuple val(meta), path("*.sra"), emit: sra
+    path "versions.yml"            , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${run_accession}"
+    """
+    # Download SRA file from AWS S3 Open Data Program
+    aws s3 cp \\
+        --region us-east-1 \\
+        --no-sign-request \\
+        ${args} \\
+        s3://sra-pub-run-odp/sra/${run_accession}/${run_accession} \\
+        ${prefix}.sra
+
+    # Verify download
+    if [ ! -f "${prefix}.sra" ]; then
+        echo "ERROR: Failed to download ${run_accession} from AWS S3"
+        exit 1
+    fi
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        aws-cli: \$(aws --version 2>&1 | sed 's/aws-cli\\///; s/ Python.*//')
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${run_accession}"
+    """
+    touch ${prefix}.sra
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        aws-cli: \$(aws --version 2>&1 | sed 's/aws-cli\\///; s/ Python.*//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/sra_aws_download/nextflow.config b/modules/local/sra_aws_download/nextflow.config
@@ -0,0 +1,8 @@
+process {
+    withName: 'SRA_AWS_DOWNLOAD' {
+        publishDir = [
+            path: { "${params.outdir}/sra" },
+            enabled: false
+        ]
+    }
+}
diff --git a/modules/local/sra_aws_download/tests/main.nf.test b/modules/local/sra_aws_download/tests/main.nf.test
@@ -0,0 +1,56 @@
+nextflow_process {
+
+    name "Test Process SRA_AWS_DOWNLOAD"
+    script "../main.nf"
+    process "SRA_AWS_DOWNLOAD"
+    tag "modules"
+    tag "modules_local"
+    tag "sra_aws_download"
+
+    test("Should download SRA file from AWS") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ],
+                    'DRR028935'
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("Should download SRA file from AWS - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ],
+                    'DRR028935'
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/local/sra_aws_download/tests/main.nf.test.snap b/modules/local/sra_aws_download/tests/main.nf.test.snap
@@ -0,0 +1,72 @@
+{
+    "Should download SRA file from AWS": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "DRR028935.sra:md5,bc88b59c510081d85448416f05094ed5"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,ce0676c62bd6864661cf98777e7c2896"
+                ],
+                "sra": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "DRR028935.sra:md5,bc88b59c510081d85448416f05094ed5"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,ce0676c62bd6864661cf98777e7c2896"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "25.04.6"
+        },
+        "timestamp": "2025-08-14T14:59:02.578113"
+    },
+    "Should download SRA file from AWS - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "DRR028935.sra:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,ce0676c62bd6864661cf98777e7c2896"
+                ],
+                "sra": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "DRR028935.sra:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,ce0676c62bd6864661cf98777e7c2896"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "25.04.6"
+        },
+        "timestamp": "2025-08-14T14:59:07.021124"
+    }
+}
diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -51,9 +51,9 @@
                     "type": "string",
                     "default": "ftp",
                     "fa_icon": "fas fa-download",
-                    "enum": ["aspera", "ftp", "sratools"],
-                    "description": "Method to download FastQ files. Available options are 'aspera', 'ftp' or 'sratools'. Default is 'ftp'.",
-                    "help_text": "FTP and Aspera CLI download FastQ files directly from the ENA FTP whereas sratools uses sra-tools to download *.sra files and convert to FastQ."
+                    "enum": ["aspera", "ftp", "sratools", "aws"],
+                    "description": "Method to download FastQ files. Available options are 'aspera', 'ftp', 'sratools', or 'aws'. Default is 'ftp'.",
+                    "help_text": "FTP and Aspera CLI download FastQ files directly from the ENA FTP. sratools uses sra-tools to download *.sra files and convert to FastQ. aws uses AWS CLI to download *.sra files from the SRA mirror on AWS S3 Open Data Program and convert to FastQ."
                 },
                 "skip_fastq_download": {
                     "type": "boolean",

diff --git a/subworkflows/local/fastq_download_aws_sratools/main.nf b/subworkflows/local/fastq_download_aws_sratools/main.nf
@@ -0,0 +1,39 @@
+include { CUSTOM_SRATOOLSNCBISETTINGS } from '../../../modules/nf-core/custom/sratoolsncbisettings/main'
+include { SRA_AWS_DOWNLOAD            } from '../../../modules/local/sra_aws_download/main'
+include { SRATOOLS_FASTERQDUMP        } from '../../../modules/nf-core/sratools/fasterqdump/main'
+
+//
+// Download FASTQ sequencing reads from AWS S3 SRA mirror
+//
+workflow FASTQ_DOWNLOAD_AWS_SRATOOLS {
+    take:
+    ch_sra_ids   // channel: [ val(meta), val(id) ]
+    ch_dbgap_key // channel: [ path(dbgap_key) ]
+
+    main:
+
+    ch_versions = Channel.empty()
+
+    //
+    // Detect existing NCBI user settings or create new ones.
+    //
+    CUSTOM_SRATOOLSNCBISETTINGS ( ch_sra_ids.collect() )
+    ch_ncbi_settings = CUSTOM_SRATOOLSNCBISETTINGS.out.ncbi_settings
+    ch_versions = ch_versions.mix(CUSTOM_SRATOOLSNCBISETTINGS.out.versions)
+
+    //
+    // Download SRA files from AWS S3
+    //
+    SRA_AWS_DOWNLOAD ( ch_sra_ids )
+    ch_versions = ch_versions.mix(SRA_AWS_DOWNLOAD.out.versions.first())
+
+    //
+    // Convert the SRA format into one or more compressed FASTQ files.
+    //
+    SRATOOLS_FASTERQDUMP ( SRA_AWS_DOWNLOAD.out.sra, ch_ncbi_settings, ch_dbgap_key )
+    ch_versions = ch_versions.mix(SRATOOLS_FASTERQDUMP.out.versions.first())
+
+    emit:
+    reads    = SRATOOLS_FASTERQDUMP.out.reads // channel: [ val(meta), [ reads ] ]
+    versions = ch_versions                    // channel: [ versions.yml ]
+}
diff --git a/subworkflows/local/fastq_download_aws_sratools/tests/main.nf.test b/subworkflows/local/fastq_download_aws_sratools/tests/main.nf.test
@@ -0,0 +1,41 @@
+nextflow_workflow {
+
+    name "Test workflow: fastq_download_aws_sratools/main.nf"
+    script "../main.nf"
+    workflow "FASTQ_DOWNLOAD_AWS_SRATOOLS"
+
+    tag "CUSTOM_SRATOOLSNCBISETTINGS"
+    tag "SRA_AWS_DOWNLOAD"
+    tag "SRATOOLS_FASTERQDUMP"
+
+    test("Parameters: default") {
+
+        when {
+            workflow {
+                """
+                input[0] = Channel.of(
+                    [[ id:'test_single_end', single_end:true  ], 'DRR000774'],
+                    [[ id:'test_paired_end', single_end:false ], 'SRR11140744']
+                )
+                input[1] = []
+                """
+            }
+        }
+
+        then {
+            def pelines1 = path(workflow.out.reads[0][1][0]).linesGzip
+            def pelines2 = path(workflow.out.reads[0][1][1]).linesGzip
+            def selines = path(workflow.out.reads[1][1]).linesGzip
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(pelines1[0..5]).match("test_pe_reads_1_lines") },
+                { assert snapshot(pelines1.size()).match("test_pe_reads_1_size") },
+                { assert snapshot(pelines2[0..5]).match("test_pe_reads_2_lines") },
+                { assert snapshot(pelines2.size()).match("test_pe_reads_2_size") },
+                { assert snapshot(selines[0..5]).match("test_se_reads_lines") },
+                { assert snapshot(selines.size()).match("test_se_reads_size") },
+                { assert snapshot(workflow.out.versions).match("versions") }
+            )
+        }
+    }
+}