Skip to content

Commit 20161fa

Browse files
committed
add bowtie2 alignment
1 parent 4ac179b commit 20161fa

File tree

9 files changed

+354
-14
lines changed

9 files changed

+354
-14
lines changed

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/conf/by_docker_image.config

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,16 @@ process {
4444
container = "quay.io/biocontainers/star:2.7.11b--h43eeafb_1"
4545
}
4646

47+
withName: 'BUILD_BOWTIE2_INDEX|ALIGN_BOWTIE2' {
48+
// Bowtie2 2.5.4 (12/10/2024)
49+
container = "quay.io/biocontainers/bowtie2:2.5.4--he96a11b_5"
50+
}
51+
52+
withName: 'FEATURECOUNTS' {
53+
// featureCounts 2.0.8 (12/10/2024)
54+
container = "quay.io/biocontainers/subread:2.0.8--h577a1d6_0
55+
}
56+
4757
withName: 'BUILD_RSEM_INDEX|COUNT_ALIGNED' {
4858
// RSEM (02/14/2020)
4959
// Known issue: version is printed as 1.31 https://github.com/deweylab/RSEM/issues/153

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/main.nf

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ if ((params.accession) || params.runsheet_path || params.isa_archive_path) {
5858
}
5959

6060
include { RNASEQ } from './workflows/rnaseq.nf'
61-
61+
include { RNASEQ_MICROBES } from './workflows/rnaseq_microbes.nf'
6262
// Validate accession format. Must be OSD-#.
6363
if (params.accession && !params.accession.matches(/^(OSD|GLDS)-\d+$/)) {
6464
log.error "Invalid accession format. Expected format: OSD-# or GLDS-#"
@@ -102,7 +102,22 @@ ch_reference_gtf = params.reference_gtf ? Channel.fromPath(params.reference_gtf)
102102
// Main workflows
103103
workflow {
104104
if (params.mode == 'microbes') {
105-
//RNASEQ_MICROBES() // Uncomment after implemented
105+
RNASEQ_MICROBES(
106+
ch_dp_tools_plugin,
107+
ch_reference_table,
108+
ch_accession,
109+
ch_isa_archive,
110+
ch_runsheet,
111+
ch_api_url,
112+
ch_force_single_end,
113+
ch_truncate_to,
114+
ch_reference_source,
115+
ch_reference_version,
116+
ch_reference_fasta,
117+
ch_reference_gtf,
118+
ch_reference_store_path,
119+
ch_derived_store_path
120+
)
106121
} else {
107122
RNASEQ(
108123
ch_dp_tools_plugin,
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
process ALIGN_BOWTIE2 {
2+
// Aligns reads against Bowtie2 index
3+
tag "Sample: ${ meta.id }"
4+
5+
input:
6+
tuple val(meta), path(reads)
7+
path(bowtie2_index_dir)
8+
9+
output:
10+
path("${ meta.id }/${ meta.id }*"), emit: publishables // used to ensure direct files are available for publishing directive
11+
path("${ meta.id }/${ meta.id }.bowtie2.log"), emit: alignment_logs
12+
tuple val(meta), path("${ meta.id }/${meta.id}.bam"), emit: bam
13+
path("${ meta.id }/${ meta.id }.unmapped.fastq"), emit: unmapped_reads
14+
path("versions.yml"), emit: versions
15+
16+
script:
17+
def readArgs = meta.paired_end ? "-1 ${ reads[0] } -2 ${ reads[1] }" : "-U ${ reads }"
18+
19+
"""
20+
export BOWTIE2_INDEXES=${ bowtie2_index_dir }
21+
22+
23+
mkdir -p ${ meta.id }
24+
bowtie2 -x ${ BOWTIE2_INDEX_DIR } \
25+
${readArgs} \
26+
--threads ${ task.cpus } \
27+
--minins 0 \
28+
--maxins 500 \
29+
-k 1 \
30+
--un ${ meta.id }/${ meta.id }.unmapped.fastq \
31+
2> ${ meta.id }/${ meta.id }.bowtie2.log \
32+
| samtools view -bS --threads ${ task.cpus } -o ${ meta.id }/${ meta.id }.bam -
33+
34+
echo '"${task.process}":' > versions.yml
35+
echo " bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//')" >> versions.yml
36+
"""
37+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
process BUILD_BOWTIE2_INDEX {
2+
// Builds Bowtie 2 index, this is ercc-spike-in and organism specific
3+
tag "Refs: ${ genome_fasta }, ${ genome_gtf }, Source: ${reference_source}${reference_source.toLowerCase().contains('ensembl') ? ', Version: ' + reference_version : ''}, GenomeSubsample: ' + params.genome_subsample : ''}"
4+
storeDir "${ derived_store_path }/Bowtie2_Indices/${ reference_source }/${reference_source.toLowerCase().contains('ensembl') ? reference_version + '/' : ''}${ meta.organism_sci }"
5+
6+
input:
7+
val(derived_store_path)
8+
val(organism_sci)
9+
val(reference_source)
10+
val(reference_version)
11+
tuple path(genome_fasta), path(genome_gtf)
12+
val(meta)
13+
14+
15+
output:
16+
path("${ genome_fasta.baseName }"), emit: index_dir
17+
script:
18+
"""
19+
mkdir -p ${ genome_fasta.baseName }
20+
21+
bowtie2-build --threads ${task.cpus} \
22+
-f ${ genome_fasta } \
23+
${ genome_fasta.baseName }/${ genome_fasta.baseName }
24+
"""
25+
}

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/modules/deseq2_dge.nf renamed to RNAseq/Workflow_Documentation/NF_RCP/workflow_code/modules/dge_deseq2.nf

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
*/
44
// ERCC counts are removed before normalization
55

6-
process DESEQ2_DGE {
7-
tag "Dataset-wide"
6+
process DGE_DESEQ2 {
87

98
input:
109
val(meta)
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
process FEATURECOUNTS {
2+
3+
input:
4+
val(meta)
5+
tuple path(genomeFasta), path(genomeGtf)
6+
val(strandedness)
7+
path(bam_files)
8+
9+
output:
10+
tuple path("FeatureCounts_GLbulkRNAseq.csv"), path("FeatureCounts_GLbulkRNAseq.csv.summary"), emit: publishables
11+
path("versions.yml"), emit: versions
12+
script:
13+
def pairedOption = meta.paired_end ? "-p" : ""
14+
def strandOption = (strandedness == "unstranded") ? 0 : (strandedness == "sense") ? 1 : 2
15+
def bamList = bam_files.join(' ')
16+
"""
17+
featureCounts ${pairedOption} \
18+
-T ${ task.cpus } \
19+
-a ${ genomeGtf } \
20+
-s ${strandOption} \
21+
-t exon \
22+
-g gene_id \
23+
-o "FeatureCounts_GLbulkRNAseq.csv" \
24+
${bamList}
25+
26+
27+
echo '"${task.process}":' > versions.yml
28+
echo " featurecounts: \$(echo \$(featureCounts -v 2>&1) | sed 's/^.*featureCounts v//; s/ .*\$//')" >> versions.yml
29+
"""
30+
31+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
process GTF_TO_BED {
2+
// Converts reference gtf into bed
3+
storeDir "${ derived_store_path }/Genome_GTF_BED_Files/${reference_source}/${reference_source.toLowerCase().contains('ensembl') ? reference_version + '/' : ''}${organism_sci}/microbes"
4+
5+
input:
6+
val(derived_store_path)
7+
val(organism_sci)
8+
val(reference_source) // Used for defining storage location
9+
val(reference_version) // Used for defining storage location
10+
path(genome_gtf)
11+
12+
output:
13+
path("${ genome_gtf.baseName }.bed"), emit: genome_bed
14+
15+
script:
16+
"""
17+
gtf_to_bed.py ${ genome_gtf } ${ genome_gtf.baseName }.bed
18+
"""
19+
}

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/workflows/rnaseq.nf

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,10 @@ include { MULTIQC as ALL_MULTIQC } from '../modules/multiqc.nf' addParams(MQCLab
4545
//include { QUALIMAP_BAM_QC } from '../modules/qualimap.nf' not implemented
4646
//include { QUALIMAP_RNASEQ_QC } from '../modules/qualimap.nf' not implemented
4747

48-
include { DESEQ2_DGE } from '../modules/deseq2_dge.nf'
48+
include { DGE_DESEQ2 } from '../modules/dge_deseq2.nf'
4949
include { ADD_GENE_ANNOTATIONS } from '../modules/add_gene_annotations.nf'
50-
include { EXTEND_DGE_TABLE } from '../modules/extend_dge_table.nf'
51-
include { GENERATE_PCA_TABLE } from '../modules/generate_pca_table.nf'
50+
// include { EXTEND_DGE_TABLE } from '../modules/extend_dge_table.nf'
51+
// include { GENERATE_PCA_TABLE } from '../modules/generate_pca_table.nf'
5252

5353

5454
include { VV_RAW_READS;
@@ -282,8 +282,8 @@ workflow RNASEQ {
282282

283283

284284
// Normalize counts, DGE
285-
DESEQ2_DGE( ch_meta, runsheet_path, COUNT_ALIGNED.out.genes_results | toSortedList )
286-
dge_table = DESEQ2_DGE.out.dge_table
285+
DGE_DESEQ2( ch_meta, runsheet_path, COUNT_ALIGNED.out.genes_results | toSortedList )
286+
dge_table = DGE_DESEQ2.out.dge_table
287287
// Add annotations to DGE table
288288
ADD_GENE_ANNOTATIONS( ch_meta, gene_annotations_url, dge_table )
289289
annotated_dge_table = ADD_GENE_ANNOTATIONS.out.annotated_dge_table
@@ -292,7 +292,7 @@ workflow RNASEQ {
292292
//EXTEND_DGE_TABLE( annotated_dge_table )
293293
// Generate PCA table from normalized counts
294294
// Step being removed on update
295-
//GENERATE_PCA_TABLE ( DESEQ2_DGE.out.norm_counts | map { it[1] })
295+
//GENERATE_PCA_TABLE ( DGE_DESEQ2.out.norm_counts | map { it[1] })
296296

297297
// Parse QC metrics
298298
all_multiqc_output = RAW_READS_MULTIQC.out.data
@@ -380,12 +380,12 @@ workflow RNASEQ {
380380
// QUANTIFY_RSEM_GENES.out.publishables,
381381
// COUNT_MULTIQC.out.zipped_report,
382382
// COUNT_MULTIQC.out.unzipped_report,
383-
// DESEQ2_DGE.out.norm_counts,
384-
// DESEQ2_DGE.out.contrasts
385-
// .mix( DESEQ2_DGE.out.sample_table )
383+
// DGE_DESEQ2.out.norm_counts,
384+
// DGE_DESEQ2.out.contrasts
385+
// .mix( DGE_DESEQ2.out.sample_table )
386386
// .mix( annotated_dge_table )
387387
// .mix( GENERATE_PCA_TABLE.out.pca_table ),
388-
// DESEQ2_DGE.out.norm_counts_ercc | ifEmpty( { file("NO_FILES.placeholder") }),
388+
// DGE_DESEQ2.out.norm_counts_ercc | ifEmpty( { file("NO_FILES.placeholder") }),
389389
// DGE_BY_DESEQ2.out.dge_ercc | ifEmpty( { file("NO_FILES.placeholder") }),
390390
// "${ projectDir }/bin/dp_tools__NF_RCP" // dp_tools plugin
391391
// )

0 commit comments

Comments
 (0)