@@ -8,18 +8,14 @@ include { DOWNLOAD_REFERENCES } from '../modules/download_references.nf'
8
8
include { SUBSAMPLE_GENOME } from ' ../modules/subsample_genome.nf'
9
9
include { DOWNLOAD_ERCC } from ' ../modules/download_ercc.nf'
10
10
include { CONCAT_ERCC } from ' ../modules/concat_ercc.nf'
11
-
12
11
include { GTF_TO_PRED } from ' ../modules/gtf_to_pred.nf'
13
12
include { PRED_TO_BED } from ' ../modules/pred_to_bed.nf'
14
-
15
13
include { GTF_TO_BED } from ' ../modules/gtf_to_bed.nf'
16
14
include { STAGE_RAW_READS } from ' ./stage_raw_reads.nf'
17
15
include { FASTQC as RAW_FASTQC } from ' ../modules/fastqc.nf'
18
16
include { GET_MAX_READ_LENGTH } from ' ../modules/get_max_read_length.nf'
19
17
include { TRIMGALORE } from ' ../modules/trimgalore.nf'
20
18
include { FASTQC as TRIMMED_FASTQC } from ' ../modules/fastqc.nf'
21
-
22
-
23
19
include { BUILD_BOWTIE2_INDEX } from ' ../modules/build_bowtie2_index.nf'
24
20
include { ALIGN_BOWTIE2 } from ' ../modules/align_bowtie2.nf'
25
21
include { SAM_TO_BAM } from ' ../modules/sam_to_bam.nf'
@@ -29,8 +25,6 @@ include { GENEBODY_COVERAGE } from '../modules/rseqc.nf'
29
25
include { INNER_DISTANCE } from ' ../modules/rseqc.nf'
30
26
include { READ_DISTRIBUTION } from ' ../modules/rseqc.nf'
31
27
include { ASSESS_STRANDEDNESS } from ' ../modules/assess_strandedness.nf'
32
-
33
-
34
28
include { MULTIQC as RAW_READS_MULTIQC } from ' ../modules/multiqc.nf'
35
29
include { MULTIQC as TRIMMED_READS_MULTIQC } from ' ../modules/multiqc.nf'
36
30
include { MULTIQC as TRIMMING_MULTIQC } from ' ../modules/multiqc.nf'
@@ -41,28 +35,20 @@ include { MULTIQC as INNER_DISTANCE_MULTIQC } from '../modules/multiqc.nf'
41
35
include { MULTIQC as READ_DISTRIBUTION_MULTIQC } from ' ../modules/multiqc.nf'
42
36
include { MULTIQC as COUNT_MULTIQC } from ' ../modules/multiqc.nf'
43
37
include { MULTIQC as ALL_MULTIQC } from ' ../modules/multiqc.nf'
44
-
45
38
// include { QUALIMAP_BAM_QC } from '../modules/qualimap.nf'
46
39
// include { QUALIMAP_RNASEQ_QC } from '../modules/qualimap.nf'
47
40
include { GET_GTF_FEATURES } from ' ../modules/get_gtf_features.nf'
48
41
include { FEATURECOUNTS } from ' ../modules/featurecounts.nf'
49
-
50
-
51
42
include { EXTRACT_RRNA } from ' ../modules/extract_rrna.nf'
52
43
include { REMOVE_RRNA_FEATURECOUNTS } from ' ../modules/remove_rrna_featurecounts.nf'
53
-
54
44
include { DGE_DESEQ2 } from ' ../modules/dge_deseq2.nf'
55
45
include { ADD_GENE_ANNOTATIONS } from ' ../modules/add_gene_annotations.nf'
56
46
// include { EXTEND_DGE_TABLE } from '../modules/extend_dge_table.nf'
57
47
// include { GENERATE_PCA_TABLE } from '../modules/generate_pca_table.nf'
58
-
59
48
include { SOFTWARE_VERSIONS } from ' ../modules/software_versions.nf'
60
-
61
49
include { MD5SUM as RAW_MD5SUM } from ' ../modules/md5sum.nf'
62
50
include { MD5SUM as PROCESSED_MD5SUM } from ' ../modules/md5sum.nf'
63
-
64
51
include { validateParameters; paramsSummaryLog; samplesheetToList } from ' plugin/nf-schema'
65
-
66
52
include { VV_RAW_READS ;
67
53
VV_TRIMMED_READS ;
68
54
VV_STAR_ALIGNMENTS ;
@@ -111,6 +97,7 @@ workflow RNASEQ_MICROBES {
111
97
else {
112
98
ch_outdir = ch_outdir. map { it + " /results" }
113
99
}
100
+
114
101
// if runsheet_path is not provided, set it up from ISA input
115
102
// If ISA input is not provided, use the accession to get the ISA
116
103
if ( runsheet_path == null ) {
@@ -134,10 +121,13 @@ workflow RNASEQ_MICROBES {
134
121
// Get samples from runsheet
135
122
samples = PARSE_RUNSHEET . out. samples
136
123
// samples | view
124
+
125
+ // Get dataset-wide metadata
137
126
samples | first
138
127
| map { meta, reads -> meta }
139
128
| set { ch_meta }
140
129
130
+ // Set metadata
141
131
ch_meta | map { meta -> meta. organism_sci }
142
132
| set { organism_sci }
143
133
@@ -187,21 +177,21 @@ workflow RNASEQ_MICROBES {
187
177
)
188
178
genome_bed = PRED_TO_BED . out. genome_bed
189
179
190
- // Metadata and reference files are ready. Stage the raw reads, find the max read length, and build the Bowtie 2 index.
191
-
192
180
// Stage the raw or truncated reads.
193
181
STAGE_RAW_READS ( samples )
194
182
raw_reads = STAGE_RAW_READS . out. raw_reads
195
183
samples_txt = STAGE_RAW_READS . out. samples_txt
196
184
// samples_txt | view
197
185
186
+ // Run FastQC on raw reads
198
187
RAW_FASTQC ( raw_reads )
199
-
188
+ // Collect the raw read fastqc zip files
200
189
RAW_FASTQC . out. fastqc | map { it -> [ it[1 ], it[2 ] ] }
201
190
| flatten
202
191
| collect // Collect all zip files into a single list
203
192
| set { raw_fastqc_zip } // Create a channel with all zip files
204
193
194
+ // Get the max read length by parsing the raw read fastqc zip files
205
195
GET_MAX_READ_LENGTH ( raw_fastqc_zip )
206
196
max_read_length = GET_MAX_READ_LENGTH . out. length | map { it. toString(). toInteger() }
207
197
// max_read_length.view { "Max read length: $it" }
@@ -228,6 +218,7 @@ workflow RNASEQ_MICROBES {
228
218
229
219
// Convert Bowtie2 SAM to BAM (query-name order, matching FASTQ input order )
230
220
SAM_TO_BAM ( ALIGN_BOWTIE2 . out. sam )
221
+
231
222
// Sort and index BAM files to convert from query-name order to genome coordinate order
232
223
SORT_AND_INDEX_BAM ( SAM_TO_BAM . out. bam )
233
224
sorted_bam = SORT_AND_INDEX_BAM . out. sorted_bam
@@ -261,21 +252,15 @@ workflow RNASEQ_MICROBES {
261
252
FEATURECOUNTS ( ch_meta, genome_references, gtf_features, strandedness, bams )
262
253
counts = FEATURECOUNTS . out. counts
263
254
264
- // Run Qualimap BAM QC and rnaseq
265
- // QUALIMAP_BAM_QC( sorted_bam, genome_bed, strandedness )
266
- // QUALIMAP_RNASEQ_QC( sorted_bam, genome_references | map { it[1] }, strandedness )
267
- // qualimap_outputs = QUALIMAP_BAM_QC.out.results
268
- // // | concat(QUALIMAP_RNASEQ_QC.out.results )
269
- // | collect
270
-
255
+ // Find rRNA sequences in the genome and remove them from the counts table
271
256
EXTRACT_RRNA ( organism_sci, genome_references | map { it[1 ] })
272
257
REMOVE_RRNA_FEATURECOUNTS ( counts, EXTRACT_RRNA . out. rrna_ids )
273
258
274
259
// Normalize counts, DGE
275
260
DGE_DESEQ2 ( ch_meta, runsheet_path, counts )
276
- dge_table = DGE_DESEQ2 . out . dge_table
261
+
277
262
// Add annotations to DGE table
278
- ADD_GENE_ANNOTATIONS ( ch_meta, PARSE_ANNOTATIONS_TABLE . out. gene_annotations_url, dge_table )
263
+ ADD_GENE_ANNOTATIONS ( ch_meta, PARSE_ANNOTATIONS_TABLE . out. gene_annotations_url, DGE_DESEQ2 . out . dge_table )
279
264
annotated_dge_table = ADD_GENE_ANNOTATIONS . out. annotated_dge_table
280
265
281
266
// MultiQC
@@ -294,7 +279,6 @@ workflow RNASEQ_MICROBES {
294
279
nf_version = ' "NEXTFLOW":\n nextflow: ' . concat(" ${ nextflow.version} \n " )
295
280
ch_nextflow_version = Channel . value(nf_version)
296
281
ch_software_versions = Channel . empty()
297
-
298
282
// Mix in versions from each process
299
283
ch_software_versions = ch_software_versions
300
284
| mix(ISA_TO_RUNSHEET . out. versions)
@@ -316,15 +300,13 @@ workflow RNASEQ_MICROBES {
316
300
ch_software_versions
317
301
| unique
318
302
| collectFile(
319
- name : " software_versions.txt" ,
320
303
newLine : true ,
321
304
cache : false
322
305
)
323
306
| set { ch_final_software_versions }
324
-
307
+ // Convert software versions combined yaml to markdown table
325
308
SOFTWARE_VERSIONS (ch_final_software_versions)
326
309
327
-
328
310
// Generate md5sums for raw and processed data
329
311
RAW_MD5SUM (
330
312
STAGE_RAW_READS . out. ch_all_raw_reads
@@ -338,8 +320,12 @@ workflow RNASEQ_MICROBES {
338
320
// "processed"
339
321
// )
340
322
341
-
342
-
323
+ // Run Qualimap BAM QC and rnaseq
324
+ // QUALIMAP_BAM_QC( sorted_bam, genome_bed, strandedness )
325
+ // QUALIMAP_RNASEQ_QC( sorted_bam, genome_references | map { it[1] }, strandedness )
326
+ // qualimap_outputs = QUALIMAP_BAM_QC.out.results
327
+ // // | concat(QUALIMAP_RNASEQ_QC.out.results )
328
+ // | collect
343
329
emit :
344
330
RAW_MD5SUM . out. md5sums
345
331
}
0 commit comments