Skip to content

Commit e2b79c3

Browse files
committed
clean up workflow code
1 parent ce6fdc1 commit e2b79c3

File tree

1 file changed

+18
-32
lines changed

1 file changed

+18
-32
lines changed

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/workflows/rnaseq_microbes.nf

Lines changed: 18 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,14 @@ include { DOWNLOAD_REFERENCES } from '../modules/download_references.nf'
88
include { SUBSAMPLE_GENOME } from '../modules/subsample_genome.nf'
99
include { DOWNLOAD_ERCC } from '../modules/download_ercc.nf'
1010
include { CONCAT_ERCC } from '../modules/concat_ercc.nf'
11-
1211
include { GTF_TO_PRED } from '../modules/gtf_to_pred.nf'
1312
include { PRED_TO_BED } from '../modules/pred_to_bed.nf'
14-
1513
include { GTF_TO_BED } from '../modules/gtf_to_bed.nf'
1614
include { STAGE_RAW_READS } from './stage_raw_reads.nf'
1715
include { FASTQC as RAW_FASTQC } from '../modules/fastqc.nf'
1816
include { GET_MAX_READ_LENGTH } from '../modules/get_max_read_length.nf'
1917
include { TRIMGALORE } from '../modules/trimgalore.nf'
2018
include { FASTQC as TRIMMED_FASTQC } from '../modules/fastqc.nf'
21-
22-
2319
include { BUILD_BOWTIE2_INDEX } from '../modules/build_bowtie2_index.nf'
2420
include { ALIGN_BOWTIE2 } from '../modules/align_bowtie2.nf'
2521
include { SAM_TO_BAM } from '../modules/sam_to_bam.nf'
@@ -29,8 +25,6 @@ include { GENEBODY_COVERAGE } from '../modules/rseqc.nf'
2925
include { INNER_DISTANCE } from '../modules/rseqc.nf'
3026
include { READ_DISTRIBUTION } from '../modules/rseqc.nf'
3127
include { ASSESS_STRANDEDNESS } from '../modules/assess_strandedness.nf'
32-
33-
3428
include { MULTIQC as RAW_READS_MULTIQC } from '../modules/multiqc.nf'
3529
include { MULTIQC as TRIMMED_READS_MULTIQC } from '../modules/multiqc.nf'
3630
include { MULTIQC as TRIMMING_MULTIQC } from '../modules/multiqc.nf'
@@ -41,28 +35,20 @@ include { MULTIQC as INNER_DISTANCE_MULTIQC } from '../modules/multiqc.nf'
4135
include { MULTIQC as READ_DISTRIBUTION_MULTIQC } from '../modules/multiqc.nf'
4236
include { MULTIQC as COUNT_MULTIQC } from '../modules/multiqc.nf'
4337
include { MULTIQC as ALL_MULTIQC } from '../modules/multiqc.nf'
44-
4538
// include { QUALIMAP_BAM_QC } from '../modules/qualimap.nf'
4639
// include { QUALIMAP_RNASEQ_QC } from '../modules/qualimap.nf'
4740
include { GET_GTF_FEATURES } from '../modules/get_gtf_features.nf'
4841
include { FEATURECOUNTS } from '../modules/featurecounts.nf'
49-
50-
5142
include { EXTRACT_RRNA } from '../modules/extract_rrna.nf'
5243
include { REMOVE_RRNA_FEATURECOUNTS } from '../modules/remove_rrna_featurecounts.nf'
53-
5444
include { DGE_DESEQ2 } from '../modules/dge_deseq2.nf'
5545
include { ADD_GENE_ANNOTATIONS } from '../modules/add_gene_annotations.nf'
5646
//include { EXTEND_DGE_TABLE } from '../modules/extend_dge_table.nf'
5747
//include { GENERATE_PCA_TABLE } from '../modules/generate_pca_table.nf'
58-
5948
include { SOFTWARE_VERSIONS } from '../modules/software_versions.nf'
60-
6149
include { MD5SUM as RAW_MD5SUM } from '../modules/md5sum.nf'
6250
include { MD5SUM as PROCESSED_MD5SUM } from '../modules/md5sum.nf'
63-
6451
include { validateParameters; paramsSummaryLog; samplesheetToList } from 'plugin/nf-schema'
65-
6652
include { VV_RAW_READS;
6753
VV_TRIMMED_READS;
6854
VV_STAR_ALIGNMENTS;
@@ -111,6 +97,7 @@ workflow RNASEQ_MICROBES {
11197
else {
11298
ch_outdir = ch_outdir.map { it + "/results" }
11399
}
100+
114101
// if runsheet_path is not provided, set it up from ISA input
115102
// If ISA input is not provided, use the accession to get the ISA
116103
if ( runsheet_path == null ) {
@@ -134,10 +121,13 @@ workflow RNASEQ_MICROBES {
134121
// Get samples from runsheet
135122
samples = PARSE_RUNSHEET.out.samples
136123
//samples | view
124+
125+
// Get dataset-wide metadata
137126
samples | first
138127
| map { meta, reads -> meta }
139128
| set { ch_meta }
140129

130+
// Set metadata
141131
ch_meta | map { meta -> meta.organism_sci }
142132
| set { organism_sci }
143133

@@ -187,21 +177,21 @@ workflow RNASEQ_MICROBES {
187177
)
188178
genome_bed = PRED_TO_BED.out.genome_bed
189179

190-
// Metadata and reference files are ready. Stage the raw reads, find the max read length, and build the Bowtie 2 index.
191-
192180
// Stage the raw or truncated reads.
193181
STAGE_RAW_READS( samples )
194182
raw_reads = STAGE_RAW_READS.out.raw_reads
195183
samples_txt = STAGE_RAW_READS.out.samples_txt
196184
//samples_txt | view
197185

186+
// Run FastQC on raw reads
198187
RAW_FASTQC( raw_reads )
199-
188+
// Collect the raw read fastqc zip files
200189
RAW_FASTQC.out.fastqc | map { it -> [ it[1], it[2] ] }
201190
| flatten
202191
| collect // Collect all zip files into a single list
203192
| set { raw_fastqc_zip } // Create a channel with all zip files
204193

194+
// Get the max read length by parsing the raw read fastqc zip files
205195
GET_MAX_READ_LENGTH( raw_fastqc_zip )
206196
max_read_length = GET_MAX_READ_LENGTH.out.length | map { it.toString().toInteger() }
207197
//max_read_length.view { "Max read length: $it" }
@@ -228,6 +218,7 @@ workflow RNASEQ_MICROBES {
228218

229219
// Convert Bowtie2 SAM to BAM (query-name order, matching FASTQ input order )
230220
SAM_TO_BAM( ALIGN_BOWTIE2.out.sam )
221+
231222
// Sort and index BAM files to convert from query-name order to genome coordinate order
232223
SORT_AND_INDEX_BAM( SAM_TO_BAM.out.bam )
233224
sorted_bam = SORT_AND_INDEX_BAM.out.sorted_bam
@@ -261,21 +252,15 @@ workflow RNASEQ_MICROBES {
261252
FEATURECOUNTS( ch_meta, genome_references, gtf_features, strandedness, bams )
262253
counts = FEATURECOUNTS.out.counts
263254

264-
// Run Qualimap BAM QC and rnaseq
265-
// QUALIMAP_BAM_QC( sorted_bam, genome_bed, strandedness )
266-
// QUALIMAP_RNASEQ_QC( sorted_bam, genome_references | map { it[1] }, strandedness )
267-
// qualimap_outputs = QUALIMAP_BAM_QC.out.results
268-
// // | concat(QUALIMAP_RNASEQ_QC.out.results )
269-
// | collect
270-
255+
// Find rRNA sequences in the genome and remove them from the counts table
271256
EXTRACT_RRNA( organism_sci, genome_references | map { it[1] })
272257
REMOVE_RRNA_FEATURECOUNTS ( counts, EXTRACT_RRNA.out.rrna_ids )
273258

274259
// Normalize counts, DGE
275260
DGE_DESEQ2( ch_meta, runsheet_path, counts )
276-
dge_table = DGE_DESEQ2.out.dge_table
261+
277262
// Add annotations to DGE table
278-
ADD_GENE_ANNOTATIONS( ch_meta, PARSE_ANNOTATIONS_TABLE.out.gene_annotations_url, dge_table )
263+
ADD_GENE_ANNOTATIONS( ch_meta, PARSE_ANNOTATIONS_TABLE.out.gene_annotations_url, DGE_DESEQ2.out.dge_table )
279264
annotated_dge_table = ADD_GENE_ANNOTATIONS.out.annotated_dge_table
280265

281266
// MultiQC
@@ -294,7 +279,6 @@ workflow RNASEQ_MICROBES {
294279
nf_version = '"NEXTFLOW":\n nextflow: '.concat("${nextflow.version}\n")
295280
ch_nextflow_version = Channel.value(nf_version)
296281
ch_software_versions = Channel.empty()
297-
298282
// Mix in versions from each process
299283
ch_software_versions = ch_software_versions
300284
| mix(ISA_TO_RUNSHEET.out.versions)
@@ -316,15 +300,13 @@ workflow RNASEQ_MICROBES {
316300
ch_software_versions
317301
| unique
318302
| collectFile(
319-
name: "software_versions.txt",
320303
newLine: true,
321304
cache: false
322305
)
323306
| set { ch_final_software_versions }
324-
307+
// Convert software versions combined yaml to markdown table
325308
SOFTWARE_VERSIONS(ch_final_software_versions)
326309

327-
328310
// Generate md5sums for raw and processed data
329311
RAW_MD5SUM(
330312
STAGE_RAW_READS.out.ch_all_raw_reads
@@ -338,8 +320,12 @@ workflow RNASEQ_MICROBES {
338320
// "processed"
339321
// )
340322

341-
342-
323+
// Run Qualimap BAM QC and rnaseq
324+
// QUALIMAP_BAM_QC( sorted_bam, genome_bed, strandedness )
325+
// QUALIMAP_RNASEQ_QC( sorted_bam, genome_references | map { it[1] }, strandedness )
326+
// qualimap_outputs = QUALIMAP_BAM_QC.out.results
327+
// // | concat(QUALIMAP_RNASEQ_QC.out.results )
328+
// | collect
343329
emit:
344330
RAW_MD5SUM.out.md5sums
345331
}

0 commit comments

Comments
 (0)