1
+ /* VV check processes
2
+ * These processes intentional deviate from Nextflow isolation to ensure QC reports
3
+ * are based on files in publish directory and not work directories.
4
+ */
5
+
6
+ // NOTE: first VV step also creates initial VV file that is shared across all vv steps
7
+ // process VV_RAW_READS {
8
+ // label 'VV'
9
+ // // Log publishing
10
+ // publishDir "${ publishdir }",
11
+ // pattern: "test.txt",
12
+ // mode: params.publish_dir_mode
13
+
14
+ // input:
15
+ // val(publishdir)
16
+ // val(meta)
17
+ // path("Metadata/*_runsheet.csv") // Runsheet
18
+ // path("00-RawData/Fastq/*") // Raw reads
19
+ // path("00-RawData/FastQC_Reports/*") // Raw FastQC reports
20
+ // path("00-RawData/FastQC_Reports/*") // Raw MultiQC report
21
+ // path(dp_tools__NF_RCP)
22
+
23
+ // output:
24
+ // path("test.txt"), emit: test
25
+
26
+ // script:
27
+ // """
28
+ // echo "Meta: ${meta}" > test.txt
29
+ // """
30
+ // }
31
+ process VV_RAW_READS {
32
+ // Log publishing
33
+ publishDir " ${ publishdir } " ,
34
+ pattern: " VV_log.tsv" ,
35
+ mode: params. publish_dir_mode,
36
+ saveAs: { " VV_Logs/VV_log_${ task.process.replace(":","-") }${ params.output_suffix } .tsv" }
37
+ // V&V'ed data publishing
38
+ publishDir " ${ publishdir } " ,
39
+ pattern: ' 00-RawData/**' ,
40
+ mode: params. publish_dir_mode
41
+
42
+ label ' VV'
43
+
44
+ input:
45
+ val(publishdir)
46
+ val(meta)
47
+ path(" VV_INPUT/Metadata/*" ) // Runsheet
48
+ path(" VV_INPUT/00-RawData/Fastq/*" ) // Raw reads
49
+ path(" VV_INPUT/00-RawData/FastQC_Reports/*" ) // Raw FastQC reports
50
+ path(" VV_INPUT/00-RawData/FastQC_Reports/*" ) // Unzipped Raw MultiQC report
51
+ path(" VV_INPUT/00-RawData/FastQC_Reports/*" ) // Zipped Raw MultiQC report
52
+ path(dp_tools__NF_RCP)
53
+
54
+ output:
55
+ path(" Metadata/*_runsheet.csv" ), emit: VVed_runsheet
56
+ path(" 00-RawData/Fastq" ), emit: VVed_raw_reads
57
+ path(" 00-RawData/FastQC_Reports/*{_fastqc.html,_fastqc.zip}" ), emit: VVed_raw_fastqc
58
+ path(" 00-RawData/FastQC_Reports/raw_multiqc${ params.output_suffix} _report" ), emit: VVed_raw_unzipped_multiqc_report
59
+ path(" 00-RawData/FastQC_Reports/raw_multiqc${ params.output_suffix} _report.zip" ), emit: VVed_raw_zipped_multiqc_report
60
+ path(" VV_log.tsv" ), optional: params. skipVV, emit: log
61
+
62
+ script:
63
+ """
64
+ # move from VV_INPUT to task directory
65
+ # This allows detection as output files for publishing
66
+ mv VV_INPUT/* . || true
67
+
68
+ # Run V&V unless user requests to skip V&V
69
+ if ${ !params.skipVV } ; then
70
+ dpt validation run ${ dp_tools__NF_RCP} . Metadata/*_runsheet.csv \\
71
+ --data-asset-key-sets \\
72
+ ${ meta.paired_end ? "'demuliplexed paired end raw data,qc reports for paired end raw data'" : "'demuliplexed single end raw data,qc reports for single end raw data'"} \\
73
+ --run-components \\
74
+ 'Metadata,Raw Reads,Raw Reads By Sample' \\
75
+ --max-flag-code ${ params.max_flag_code } \\
76
+ --output VV_log.tsv
77
+ fi
78
+ """
79
+ }
80
+
81
+ process VV_TRIMMED_READS {
82
+ // Log publishing
83
+ publishDir " ${ publishdir } " ,
84
+ pattern: " VV_log.tsv" ,
85
+ mode: params. publish_dir_mode,
86
+ saveAs: { " VV_Logs/VV_log_${ task.process.replace(":","-") }${ params.output_suffix } .tsv" }
87
+ // V&V'ed data publishing
88
+ publishDir " ${ publishdir } " ,
89
+ pattern: ' 01-TG_Preproc/' ,
90
+ mode: params. publish_dir_mode
91
+
92
+ label ' VV'
93
+
94
+ input:
95
+ val(publishdir)
96
+ val(meta)
97
+ path(" VV_INPUT/Metadata/*" ) // runsheet
98
+ path(" VV_INPUT/01-TG_Preproc/Fastq/*" ) // trimmed reads
99
+ path(" VV_INPUT/01-TG_Preproc/FastQC_Reports/*" ) // trimmed reads fastqc
100
+ path(" VV_INPUT/01-TG_Preproc/FastQC_Reports/*" ) // trimmed reads multiqc unzipped report
101
+ path(" VV_INPUT/01-TG_Preproc/FastQC_Reports/*" ) // trimmed reads multiqc zipped report
102
+ path(" VV_INPUT/01-TG_Preproc/Trimming_Reports/*" ) // trimming reports
103
+ path(" VV_INPUT/01-TG_Preproc/Trimming_Reports/*" ) // trimming reports multiqc unzipped report
104
+ path(" VV_INPUT/01-TG_Preproc/Trimming_Reports/*" ) // trimming reports multiqc zipped report
105
+ path(dp_tools__NF_RCP)
106
+
107
+ output:
108
+ path(" 01-TG_Preproc/Fastq" ), emit: VVed_trimmed_reads
109
+ path(" 01-TG_Preproc/FastQC_Reports/*{_fastqc.html,_fastqc.zip}" ), emit: VVed_trimmed_fastqc
110
+ path(" 01-TG_Preproc/FastQC_Reports/trimmed_multiqc_GLbulkRNAseq_report" ), emit: VVed_trimmed_unzipped_multiqc_report
111
+ path(" 01-TG_Preproc/FastQC_Reports/trimmed_multiqc_GLbulkRNAseq_report.zip" ), emit: VVed_trimmed_zipped_multiqc_report
112
+ path(" 01-TG_Preproc/Trimming_Reports" ), emit: VVed_trimming_reports_all
113
+ path(" VV_log.tsv" ), optional: params. skipVV, emit: log
114
+
115
+ script:
116
+ """
117
+ # move from VV_INPUT to task directory
118
+ # This allows detection as output files for publishing
119
+ mv VV_INPUT/* . || true
120
+
121
+ # Run V&V unless user requests to skip V&V
122
+ if ${ !params.skipVV } ; then
123
+ dpt validation run ${ dp_tools__NF_RCP} . Metadata/*_runsheet.csv \\
124
+ --data-asset-key-sets \\
125
+ ${ meta.paired_end ? "'paired end trimmed reads,qc reports for paired end trimmed reads data'" : "'single end trimmed reads,qc reports for single end trimmed reads data'"} \\
126
+ --run-components \\
127
+ 'Trim Reads,Trimmed Reads By Sample' \\
128
+ --max-flag-code ${ params.max_flag_code } \\
129
+ --output VV_log.tsv
130
+ fi
131
+ """
132
+ }
133
+
134
+ process VV_STAR_ALIGNMENTS {
135
+ // Log publishing
136
+ publishDir " ${ publishdir } " ,
137
+ pattern: " VV_log.tsv" ,
138
+ mode: params. publish_dir_mode,
139
+ saveAs: { " VV_Logs/VV_log_${ task.process.replace(":","-") }${ params.output_suffix } .tsv" }
140
+ // V&V'ed data publishing
141
+ publishDir " ${ publishdir } " ,
142
+ pattern: ' 02-STAR_Alignment/' ,
143
+ mode: params. publish_dir_mode
144
+
145
+ label ' VV'
146
+
147
+ input:
148
+ val(publishdir)
149
+ path(" VV_INPUT/Metadata/*" )
150
+ path(" VV_INPUT/02-STAR_Alignment/*" ) // direct STAR alignment output
151
+ path(" VV_INPUT/02-STAR_Alignment/*" ) // STAR alignment counts tables
152
+ path(" VV_INPUT/02-STAR_Alignment/*" ) // zipped multiqc report
153
+ path(" VV_INPUT/02-STAR_Alignment/*" ) // unzipped multiqc report
154
+ path(" VV_INPUT/02-STAR_Alignment/*" ) // reindexed, sorted bam/bed files
155
+ path(dp_tools__NF_RCP)
156
+
157
+ output:
158
+ path(" 02-STAR_Alignment" )
159
+ path(" VV_log.tsv" ), optional: params. skipVV, emit: log
160
+
161
+ script:
162
+ """
163
+ # move from VV_INPUT to task directory
164
+ # This allows detection as output files for publishing
165
+ mv VV_INPUT/* . || true
166
+ sort_into_subdirectories_by_sample.py 02-STAR_Alignment 02-STAR_Alignment '_*'
167
+
168
+ # Run V&V unless user requests to skip V&V
169
+ if ${ !params.skipVV } ; then
170
+ dpt validation run ${ dp_tools__NF_RCP} . Metadata/*_runsheet.csv \\
171
+ --data-asset-key-sets \\
172
+ 'STAR alignments' \\
173
+ --run-components \\
174
+ 'STAR Alignments,STAR Alignments By Sample' \\
175
+ --max-flag-code ${ params.max_flag_code } \\
176
+ --output VV_log.tsv
177
+ fi
178
+ """
179
+
180
+ }
181
+ process VV_RSEQC {
182
+ // Log publishing
183
+ publishDir " ${ publishdir } " ,
184
+ pattern: " VV_log.tsv" ,
185
+ mode: params. publish_dir_mode,
186
+ saveAs: { " VV_Logs/VV_log_${ task.process.replace(":","-") }${ params.output_suffix } .tsv" }
187
+ // V&V'ed data publishing
188
+ publishDir " ${ publishdir } " ,
189
+ pattern: ' RSeQC_Analyses/' ,
190
+ mode: params. publish_dir_mode
191
+
192
+ label ' VV'
193
+
194
+ input:
195
+ val(publishdir)
196
+ val(meta)
197
+ path(" VV_INPUT/Metadata/*" )
198
+ path(" VV_INPUT/RSeQC_Analyses/*" ) // direct logs
199
+ path(" VV_INPUT/RSeQC_Analyses/02_geneBody_coverage/*" ) // genebody multiqc
200
+ path(" VV_INPUT/RSeQC_Analyses/03_infer_experiment/*" ) // genebody multiqc
201
+ path(" VV_INPUT/RSeQC_Analyses/04_inner_distance/*" ) // genebody multiqc
202
+ path(" VV_INPUT/RSeQC_Analyses/05_read_distribution/*" ) // genebody multiqc
203
+ path(dp_tools__NF_RCP)
204
+
205
+ output:
206
+ path(" RSeQC_Analyses" )
207
+ path(" VV_log.tsv" ), optional: params. skipVV, emit: log
208
+
209
+ script:
210
+ """
211
+ # move from VV_INPUT to task directory
212
+ # This allows detection as output files for publishing
213
+ mv VV_INPUT/* . || true
214
+ sort_into_subdirectories_by_sample.py RSeQC_Analyses RSeQC_Analyses/02_geneBody_coverage '.geneBodyCoverage.txt'
215
+ sort_into_subdirectories_by_sample.py RSeQC_Analyses RSeQC_Analyses/02_geneBody_coverage '.geneBodyCoverage.curves.pdf'
216
+ sort_into_subdirectories_by_sample.py RSeQC_Analyses RSeQC_Analyses/02_geneBody_coverage '.geneBodyCoverage.r'
217
+ # These are not in sub directories: sort_into_subdirectories_by_sample.py RSeQC_Analyses RSeQC_Analyses/03_infer_experiment '_infer_expt.out'
218
+ mv RSeQC_Analyses/*_infer_expt.out RSeQC_Analyses/03_infer_experiment
219
+ ${ meta.paired_end ? '' : '# Only for Paired end datasets: '} sort_into_subdirectories_by_sample.py RSeQC_Analyses RSeQC_Analyses/04_inner_distance '.inner_distance_freq.txt'
220
+ ${ meta.paired_end ? '' : '# Only for Paired end datasets: '} sort_into_subdirectories_by_sample.py RSeQC_Analyses RSeQC_Analyses/04_inner_distance '.inner_distance_plot.pdf'
221
+ ${ meta.paired_end ? '' : '# Only for Paired end datasets: '} sort_into_subdirectories_by_sample.py RSeQC_Analyses RSeQC_Analyses/04_inner_distance '.inner_distance_plot.r'
222
+ ${ meta.paired_end ? '' : '# Only for Paired end datasets: '} sort_into_subdirectories_by_sample.py RSeQC_Analyses RSeQC_Analyses/04_inner_distance '.inner_distance.txt'
223
+ # These are not in sub directories: sort_into_subdirectories_by_sample.py RSeQC_Analyses/05_read_distribution RSeQC_Analyses/05_read_distribution '_read_dist.out'
224
+ mv RSeQC_Analyses/*_read_dist.out RSeQC_Analyses/05_read_distribution
225
+
226
+
227
+ # Run V&V unless user requests to skip V&V
228
+ if ${ !params.skipVV } ; then
229
+ dpt validation run ${ dp_tools__NF_RCP} . Metadata/*_runsheet.csv \\
230
+ --data-asset-key-sets \\
231
+ ${ meta.paired_end ? "'RSeQC output for paired end data'" : "'RSeQC output for single end data'"} \\
232
+ --run-components \\
233
+ 'RSeQC,RSeQC By Sample' \\
234
+ --max-flag-code ${ params.max_flag_code } \\
235
+ --output VV_log.tsv
236
+ fi
237
+
238
+ # Remove all placeholder files and empty directories to prevent publishing
239
+ find RSeQC_Analyses -type f,l -name *.placeholder -delete
240
+ find RSeQC_Analyses -empty -type d -delete
241
+ """
242
+
243
+ }
244
+
245
+
246
+ process VV_RSEM_COUNTS {
247
+ // Log publishing
248
+ publishDir " ${ publishdir } " ,
249
+ pattern: " VV_log.tsv" ,
250
+ mode: params. publish_dir_mode,
251
+ saveAs: { " VV_Logs/VV_log_${ task.process.replace(":","-") }${ params.output_suffix } .tsv" }
252
+ // V&V'ed data publishing
253
+ publishDir " ${ publishdir } " ,
254
+ pattern: ' 03-RSEM_Counts/' ,
255
+ mode: params. publish_dir_mode
256
+
257
+ label ' VV'
258
+
259
+ input:
260
+ val(publishdir)
261
+ path(" VV_INPUT/Metadata/*" )
262
+ path(" VV_INPUT/03-RSEM_Counts/*" ) // RSEM sample wise output
263
+ path(" VV_INPUT/03-RSEM_Counts/*" ) // RSEM dataset output
264
+ path(" VV_INPUT/03-RSEM_Counts/*" ) // zipped multiqc report
265
+ path(" VV_INPUT/03-RSEM_Counts/*" ) // unzipped multiqc report
266
+ path(dp_tools__NF_RCP)
267
+
268
+
269
+ output:
270
+ path(" 03-RSEM_Counts" )
271
+ path(" VV_log.tsv" ), optional: params. skipVV, emit: log
272
+
273
+ script:
274
+ """
275
+ # move from VV_INPUT to task directory
276
+ # This allows detection as output files for publishing
277
+ mv VV_INPUT/* . || true
278
+
279
+ # Run V&V unless user requests to skip V&V
280
+ if ${ !params.skipVV } ; then
281
+ dpt validation run ${ dp_tools__NF_RCP} . Metadata/*_runsheet.csv \\
282
+ --data-asset-key-sets \\
283
+ 'RSEM counts' \\
284
+ --run-components \\
285
+ 'RSEM Counts' \\
286
+ --max-flag-code ${ params.max_flag_code } \\
287
+ --output VV_log.tsv
288
+ fi
289
+ """
290
+ }
291
+
292
+ process VV_DESEQ2_ANALYSIS {
293
+ // Log publishing
294
+ publishDir " ${ publishdir } " ,
295
+ pattern: " VV_log.tsv" ,
296
+ mode: params. publish_dir_mode,
297
+ saveAs: { " VV_Logs/VV_log_${ task.process.replace(":","-") }${ params.output_suffix } .tsv" }
298
+ // V&V'ed data publishing
299
+ publishDir " ${ publishdir } " ,
300
+ pattern: ' {04-DESeq2_NormCounts,05-DESeq2_DGE}' ,
301
+ mode: params. publish_dir_mode
302
+
303
+ label ' VV'
304
+
305
+ input:
306
+ val(publishdir)
307
+ val(meta)
308
+ path(" VV_INPUT/Metadata/*" )
309
+ path(" VV_INPUT/03-RSEM_Counts/*" ) // RSEM dataset output
310
+ path(" VV_INPUT/03-RSEM_Counts/*" ) // zipped multiqc report
311
+ path(" VV_INPUT/03-RSEM_Counts/*" ) // unzipped multiqc report
312
+ path(" VV_INPUT/04-DESeq2_NormCounts/*" ) // norm counts files
313
+ path(" VV_INPUT/05-DESeq2_DGE/*" ) // dge files
314
+ path(" VV_INPUT/04-DESeq2_NormCounts/*" ) // ERCC norm counts files
315
+ path(" VV_INPUT/05-DESeq2_DGE/ERCC_NormDGE/*" ) // ERCC dge files
316
+ path(dp_tools__NF_RCP)
317
+
318
+ output:
319
+ path(" 04-DESeq2_NormCounts" )
320
+ path(" 05-DESeq2_DGE" )
321
+ path(" VV_log.tsv" ), optional: params. skipVV, emit: log
322
+
323
+ script:
324
+ """
325
+ # move from VV_INPUT to task directory
326
+ # This allows detection as output files for publishing
327
+ mv VV_INPUT/* . || true
328
+
329
+ # Run V&V unless user requests to skip V&V
330
+ if ${ !params.skipVV } ; then
331
+ dpt validation run ${ dp_tools__NF_RCP} . Metadata/*_runsheet.csv \\
332
+ --data-asset-key-sets \\
333
+ 'RSEM Output,DGE Output${ meta.has_ercc ? ",ERCC DGE Output" : ''} ' \\
334
+ --run-components \\
335
+ 'DGE Metadata${ meta.has_ercc ? ",DGE Metadata ERCC" : '' } ,DGE Output${ meta.has_ercc ? ",DGE Output ERCC" : '' } ' \\
336
+ --max-flag-code ${ params.max_flag_code } \\
337
+ --output VV_log.tsv
338
+ fi
339
+
340
+ # Remove all placeholder files and empty directories to prevent publishing
341
+ find . -type f,l -name *.placeholder -delete
342
+ find . -empty -type d -delete
343
+ """
344
+ }
345
+
346
+ process VV_CONCAT_FILTER {
347
+ publishDir " ${ params.outputDir } /${ params.gldsAccession } /VV_Logs" ,
348
+ mode: params. publish_dir_mode
349
+
350
+ label ' VV'
351
+
352
+ input:
353
+ path(" VV_in.tsv" )
354
+
355
+ output:
356
+ tuple path(" VV_log_final_GLbulkRNAseq.tsv" ), path(" VV_log_final_only_issues_GLbulkRNAseq.tsv" )
357
+
358
+ script:
359
+ """
360
+ concat_logs.py
361
+ filter_to_only_issues.py
362
+ """
363
+ }
0 commit comments