Skip to content

Commit a584d25

Browse files
committed
misc changes
1 parent 97b1ac2 commit a584d25

File tree

3 files changed

+474
-10
lines changed

3 files changed

+474
-10
lines changed
Lines changed: 363 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,363 @@
1+
/* VV check processes
2+
* These processes intentional deviate from Nextflow isolation to ensure QC reports
3+
* are based on files in publish directory and not work directories.
4+
*/
5+
6+
// NOTE: first VV step also creates initial VV file that is shared across all vv steps
7+
// process VV_RAW_READS {
8+
// label 'VV'
9+
// // Log publishing
10+
// publishDir "${ publishdir }",
11+
// pattern: "test.txt",
12+
// mode: params.publish_dir_mode
13+
14+
// input:
15+
// val(publishdir)
16+
// val(meta)
17+
// path("Metadata/*_runsheet.csv") // Runsheet
18+
// path("00-RawData/Fastq/*") // Raw reads
19+
// path("00-RawData/FastQC_Reports/*") // Raw FastQC reports
20+
// path("00-RawData/FastQC_Reports/*") // Raw MultiQC report
21+
// path(dp_tools__NF_RCP)
22+
23+
// output:
24+
// path("test.txt"), emit: test
25+
26+
// script:
27+
// """
28+
// echo "Meta: ${meta}" > test.txt
29+
// """
30+
// }
31+
process VV_RAW_READS {
32+
// Log publishing
33+
publishDir "${ publishdir }",
34+
pattern: "VV_log.tsv" ,
35+
mode: params.publish_dir_mode,
36+
saveAs: { "VV_Logs/VV_log_${ task.process.replace(":","-") }${ params.output_suffix }.tsv" }
37+
// V&V'ed data publishing
38+
publishDir "${ publishdir }",
39+
pattern: '00-RawData/**',
40+
mode: params.publish_dir_mode
41+
42+
label 'VV'
43+
44+
input:
45+
val(publishdir)
46+
val(meta)
47+
path("VV_INPUT/Metadata/*") // Runsheet
48+
path("VV_INPUT/00-RawData/Fastq/*") // Raw reads
49+
path("VV_INPUT/00-RawData/FastQC_Reports/*") // Raw FastQC reports
50+
path("VV_INPUT/00-RawData/FastQC_Reports/*") // Unzipped Raw MultiQC report
51+
path("VV_INPUT/00-RawData/FastQC_Reports/*") // Zipped Raw MultiQC report
52+
path(dp_tools__NF_RCP)
53+
54+
output:
55+
path("Metadata/*_runsheet.csv"), emit: VVed_runsheet
56+
path("00-RawData/Fastq"), emit: VVed_raw_reads
57+
path("00-RawData/FastQC_Reports/*{_fastqc.html,_fastqc.zip}"), emit: VVed_raw_fastqc
58+
path("00-RawData/FastQC_Reports/raw_multiqc${params.output_suffix}_report"), emit: VVed_raw_unzipped_multiqc_report
59+
path("00-RawData/FastQC_Reports/raw_multiqc${params.output_suffix}_report.zip"), emit: VVed_raw_zipped_multiqc_report
60+
path("VV_log.tsv"), optional: params.skipVV, emit: log
61+
62+
script:
63+
"""
64+
# move from VV_INPUT to task directory
65+
# This allows detection as output files for publishing
66+
mv VV_INPUT/* . || true
67+
68+
# Run V&V unless user requests to skip V&V
69+
if ${ !params.skipVV } ; then
70+
dpt validation run ${dp_tools__NF_RCP} . Metadata/*_runsheet.csv \\
71+
--data-asset-key-sets \\
72+
${ meta.paired_end ? "'demuliplexed paired end raw data,qc reports for paired end raw data'" : "'demuliplexed single end raw data,qc reports for single end raw data'"} \\
73+
--run-components \\
74+
'Metadata,Raw Reads,Raw Reads By Sample' \\
75+
--max-flag-code ${ params.max_flag_code } \\
76+
--output VV_log.tsv
77+
fi
78+
"""
79+
}
80+
81+
process VV_TRIMMED_READS {
82+
// Log publishing
83+
publishDir "${ publishdir }",
84+
pattern: "VV_log.tsv" ,
85+
mode: params.publish_dir_mode,
86+
saveAs: { "VV_Logs/VV_log_${ task.process.replace(":","-") }${ params.output_suffix }.tsv" }
87+
// V&V'ed data publishing
88+
publishDir "${ publishdir }",
89+
pattern: '01-TG_Preproc/',
90+
mode: params.publish_dir_mode
91+
92+
label 'VV'
93+
94+
input:
95+
val(publishdir)
96+
val(meta)
97+
path("VV_INPUT/Metadata/*") // runsheet
98+
path("VV_INPUT/01-TG_Preproc/Fastq/*") // trimmed reads
99+
path("VV_INPUT/01-TG_Preproc/FastQC_Reports/*") // trimmed reads fastqc
100+
path("VV_INPUT/01-TG_Preproc/FastQC_Reports/*") // trimmed reads multiqc unzipped report
101+
path("VV_INPUT/01-TG_Preproc/FastQC_Reports/*") // trimmed reads multiqc zipped report
102+
path("VV_INPUT/01-TG_Preproc/Trimming_Reports/*") // trimming reports
103+
path("VV_INPUT/01-TG_Preproc/Trimming_Reports/*") // trimming reports multiqc unzipped report
104+
path("VV_INPUT/01-TG_Preproc/Trimming_Reports/*") // trimming reports multiqc zipped report
105+
path(dp_tools__NF_RCP)
106+
107+
output:
108+
path("01-TG_Preproc/Fastq"), emit: VVed_trimmed_reads
109+
path("01-TG_Preproc/FastQC_Reports/*{_fastqc.html,_fastqc.zip}"), emit: VVed_trimmed_fastqc
110+
path("01-TG_Preproc/FastQC_Reports/trimmed_multiqc_GLbulkRNAseq_report"), emit: VVed_trimmed_unzipped_multiqc_report
111+
path("01-TG_Preproc/FastQC_Reports/trimmed_multiqc_GLbulkRNAseq_report.zip"), emit: VVed_trimmed_zipped_multiqc_report
112+
path("01-TG_Preproc/Trimming_Reports"), emit: VVed_trimming_reports_all
113+
path("VV_log.tsv"), optional: params.skipVV, emit: log
114+
115+
script:
116+
"""
117+
# move from VV_INPUT to task directory
118+
# This allows detection as output files for publishing
119+
mv VV_INPUT/* . || true
120+
121+
# Run V&V unless user requests to skip V&V
122+
if ${ !params.skipVV } ; then
123+
dpt validation run ${dp_tools__NF_RCP} . Metadata/*_runsheet.csv \\
124+
--data-asset-key-sets \\
125+
${ meta.paired_end ? "'paired end trimmed reads,qc reports for paired end trimmed reads data'" : "'single end trimmed reads,qc reports for single end trimmed reads data'"} \\
126+
--run-components \\
127+
'Trim Reads,Trimmed Reads By Sample' \\
128+
--max-flag-code ${ params.max_flag_code } \\
129+
--output VV_log.tsv
130+
fi
131+
"""
132+
}
133+
134+
process VV_STAR_ALIGNMENTS {
135+
// Log publishing
136+
publishDir "${ publishdir }",
137+
pattern: "VV_log.tsv" ,
138+
mode: params.publish_dir_mode,
139+
saveAs: { "VV_Logs/VV_log_${ task.process.replace(":","-") }${ params.output_suffix }.tsv" }
140+
// V&V'ed data publishing
141+
publishDir "${ publishdir }",
142+
pattern: '02-STAR_Alignment/',
143+
mode: params.publish_dir_mode
144+
145+
label 'VV'
146+
147+
input:
148+
val(publishdir)
149+
path("VV_INPUT/Metadata/*")
150+
path("VV_INPUT/02-STAR_Alignment/*") // direct STAR alignment output
151+
path("VV_INPUT/02-STAR_Alignment/*") // STAR alignment counts tables
152+
path("VV_INPUT/02-STAR_Alignment/*") // zipped multiqc report
153+
path("VV_INPUT/02-STAR_Alignment/*") // unzipped multiqc report
154+
path("VV_INPUT/02-STAR_Alignment/*") // reindexed, sorted bam/bed files
155+
path(dp_tools__NF_RCP)
156+
157+
output:
158+
path("02-STAR_Alignment")
159+
path("VV_log.tsv"), optional: params.skipVV, emit: log
160+
161+
script:
162+
"""
163+
# move from VV_INPUT to task directory
164+
# This allows detection as output files for publishing
165+
mv VV_INPUT/* . || true
166+
sort_into_subdirectories_by_sample.py 02-STAR_Alignment 02-STAR_Alignment '_*'
167+
168+
# Run V&V unless user requests to skip V&V
169+
if ${ !params.skipVV } ; then
170+
dpt validation run ${dp_tools__NF_RCP} . Metadata/*_runsheet.csv \\
171+
--data-asset-key-sets \\
172+
'STAR alignments' \\
173+
--run-components \\
174+
'STAR Alignments,STAR Alignments By Sample' \\
175+
--max-flag-code ${ params.max_flag_code } \\
176+
--output VV_log.tsv
177+
fi
178+
"""
179+
180+
}
181+
process VV_RSEQC {
182+
// Log publishing
183+
publishDir "${ publishdir }",
184+
pattern: "VV_log.tsv" ,
185+
mode: params.publish_dir_mode,
186+
saveAs: { "VV_Logs/VV_log_${ task.process.replace(":","-") }${ params.output_suffix }.tsv" }
187+
// V&V'ed data publishing
188+
publishDir "${ publishdir }",
189+
pattern: 'RSeQC_Analyses/',
190+
mode: params.publish_dir_mode
191+
192+
label 'VV'
193+
194+
input:
195+
val(publishdir)
196+
val(meta)
197+
path("VV_INPUT/Metadata/*")
198+
path("VV_INPUT/RSeQC_Analyses/*") // direct logs
199+
path("VV_INPUT/RSeQC_Analyses/02_geneBody_coverage/*") // genebody multiqc
200+
path("VV_INPUT/RSeQC_Analyses/03_infer_experiment/*") // genebody multiqc
201+
path("VV_INPUT/RSeQC_Analyses/04_inner_distance/*") // genebody multiqc
202+
path("VV_INPUT/RSeQC_Analyses/05_read_distribution/*") // genebody multiqc
203+
path(dp_tools__NF_RCP)
204+
205+
output:
206+
path("RSeQC_Analyses")
207+
path("VV_log.tsv"), optional: params.skipVV, emit: log
208+
209+
script:
210+
"""
211+
# move from VV_INPUT to task directory
212+
# This allows detection as output files for publishing
213+
mv VV_INPUT/* . || true
214+
sort_into_subdirectories_by_sample.py RSeQC_Analyses RSeQC_Analyses/02_geneBody_coverage '.geneBodyCoverage.txt'
215+
sort_into_subdirectories_by_sample.py RSeQC_Analyses RSeQC_Analyses/02_geneBody_coverage '.geneBodyCoverage.curves.pdf'
216+
sort_into_subdirectories_by_sample.py RSeQC_Analyses RSeQC_Analyses/02_geneBody_coverage '.geneBodyCoverage.r'
217+
# These are not in sub directories: sort_into_subdirectories_by_sample.py RSeQC_Analyses RSeQC_Analyses/03_infer_experiment '_infer_expt.out'
218+
mv RSeQC_Analyses/*_infer_expt.out RSeQC_Analyses/03_infer_experiment
219+
${ meta.paired_end ? '' : '# Only for Paired end datasets: '} sort_into_subdirectories_by_sample.py RSeQC_Analyses RSeQC_Analyses/04_inner_distance '.inner_distance_freq.txt'
220+
${ meta.paired_end ? '' : '# Only for Paired end datasets: '} sort_into_subdirectories_by_sample.py RSeQC_Analyses RSeQC_Analyses/04_inner_distance '.inner_distance_plot.pdf'
221+
${ meta.paired_end ? '' : '# Only for Paired end datasets: '} sort_into_subdirectories_by_sample.py RSeQC_Analyses RSeQC_Analyses/04_inner_distance '.inner_distance_plot.r'
222+
${ meta.paired_end ? '' : '# Only for Paired end datasets: '} sort_into_subdirectories_by_sample.py RSeQC_Analyses RSeQC_Analyses/04_inner_distance '.inner_distance.txt'
223+
# These are not in sub directories: sort_into_subdirectories_by_sample.py RSeQC_Analyses/05_read_distribution RSeQC_Analyses/05_read_distribution '_read_dist.out'
224+
mv RSeQC_Analyses/*_read_dist.out RSeQC_Analyses/05_read_distribution
225+
226+
227+
# Run V&V unless user requests to skip V&V
228+
if ${ !params.skipVV } ; then
229+
dpt validation run ${dp_tools__NF_RCP} . Metadata/*_runsheet.csv \\
230+
--data-asset-key-sets \\
231+
${ meta.paired_end ? "'RSeQC output for paired end data'" : "'RSeQC output for single end data'"} \\
232+
--run-components \\
233+
'RSeQC,RSeQC By Sample' \\
234+
--max-flag-code ${ params.max_flag_code } \\
235+
--output VV_log.tsv
236+
fi
237+
238+
# Remove all placeholder files and empty directories to prevent publishing
239+
find RSeQC_Analyses -type f,l -name *.placeholder -delete
240+
find RSeQC_Analyses -empty -type d -delete
241+
"""
242+
243+
}
244+
245+
246+
process VV_RSEM_COUNTS {
247+
// Log publishing
248+
publishDir "${ publishdir }",
249+
pattern: "VV_log.tsv" ,
250+
mode: params.publish_dir_mode,
251+
saveAs: { "VV_Logs/VV_log_${ task.process.replace(":","-") }${ params.output_suffix }.tsv" }
252+
// V&V'ed data publishing
253+
publishDir "${ publishdir }",
254+
pattern: '03-RSEM_Counts/',
255+
mode: params.publish_dir_mode
256+
257+
label 'VV'
258+
259+
input:
260+
val(publishdir)
261+
path("VV_INPUT/Metadata/*")
262+
path("VV_INPUT/03-RSEM_Counts/*") // RSEM sample wise output
263+
path("VV_INPUT/03-RSEM_Counts/*") // RSEM dataset output
264+
path("VV_INPUT/03-RSEM_Counts/*") // zipped multiqc report
265+
path("VV_INPUT/03-RSEM_Counts/*") // unzipped multiqc report
266+
path(dp_tools__NF_RCP)
267+
268+
269+
output:
270+
path("03-RSEM_Counts")
271+
path("VV_log.tsv"), optional: params.skipVV, emit: log
272+
273+
script:
274+
"""
275+
# move from VV_INPUT to task directory
276+
# This allows detection as output files for publishing
277+
mv VV_INPUT/* . || true
278+
279+
# Run V&V unless user requests to skip V&V
280+
if ${ !params.skipVV } ; then
281+
dpt validation run ${dp_tools__NF_RCP} . Metadata/*_runsheet.csv \\
282+
--data-asset-key-sets \\
283+
'RSEM counts' \\
284+
--run-components \\
285+
'RSEM Counts' \\
286+
--max-flag-code ${ params.max_flag_code } \\
287+
--output VV_log.tsv
288+
fi
289+
"""
290+
}
291+
292+
process VV_DESEQ2_ANALYSIS {
293+
// Log publishing
294+
publishDir "${ publishdir }",
295+
pattern: "VV_log.tsv" ,
296+
mode: params.publish_dir_mode,
297+
saveAs: { "VV_Logs/VV_log_${ task.process.replace(":","-") }${ params.output_suffix }.tsv" }
298+
// V&V'ed data publishing
299+
publishDir "${ publishdir }",
300+
pattern: '{04-DESeq2_NormCounts,05-DESeq2_DGE}',
301+
mode: params.publish_dir_mode
302+
303+
label 'VV'
304+
305+
input:
306+
val(publishdir)
307+
val(meta)
308+
path("VV_INPUT/Metadata/*")
309+
path("VV_INPUT/03-RSEM_Counts/*") // RSEM dataset output
310+
path("VV_INPUT/03-RSEM_Counts/*") // zipped multiqc report
311+
path("VV_INPUT/03-RSEM_Counts/*") // unzipped multiqc report
312+
path("VV_INPUT/04-DESeq2_NormCounts/*") // norm counts files
313+
path("VV_INPUT/05-DESeq2_DGE/*") // dge files
314+
path("VV_INPUT/04-DESeq2_NormCounts/*") // ERCC norm counts files
315+
path("VV_INPUT/05-DESeq2_DGE/ERCC_NormDGE/*") // ERCC dge files
316+
path(dp_tools__NF_RCP)
317+
318+
output:
319+
path("04-DESeq2_NormCounts")
320+
path("05-DESeq2_DGE")
321+
path("VV_log.tsv"), optional: params.skipVV, emit: log
322+
323+
script:
324+
"""
325+
# move from VV_INPUT to task directory
326+
# This allows detection as output files for publishing
327+
mv VV_INPUT/* . || true
328+
329+
# Run V&V unless user requests to skip V&V
330+
if ${ !params.skipVV } ; then
331+
dpt validation run ${dp_tools__NF_RCP} . Metadata/*_runsheet.csv \\
332+
--data-asset-key-sets \\
333+
'RSEM Output,DGE Output${ meta.has_ercc ? ",ERCC DGE Output" : ''}' \\
334+
--run-components \\
335+
'DGE Metadata${ meta.has_ercc ? ",DGE Metadata ERCC" : '' },DGE Output${ meta.has_ercc ? ",DGE Output ERCC" : '' }' \\
336+
--max-flag-code ${ params.max_flag_code } \\
337+
--output VV_log.tsv
338+
fi
339+
340+
# Remove all placeholder files and empty directories to prevent publishing
341+
find . -type f,l -name *.placeholder -delete
342+
find . -empty -type d -delete
343+
"""
344+
}
345+
346+
process VV_CONCAT_FILTER {
347+
publishDir "${ params.outputDir }/${ params.gldsAccession }/VV_Logs",
348+
mode: params.publish_dir_mode
349+
350+
label 'VV'
351+
352+
input:
353+
path("VV_in.tsv")
354+
355+
output:
356+
tuple path("VV_log_final_GLbulkRNAseq.tsv"), path("VV_log_final_only_issues_GLbulkRNAseq.tsv")
357+
358+
script:
359+
"""
360+
concat_logs.py
361+
filter_to_only_issues.py
362+
"""
363+
}

RNAseq/Workflow_Documentation/NF_RCP-G/workflow_code/nextflow.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ profiles {
9797
singularity {
9898
singularity.enabled = true
9999
singularity.autoMounts = true
100-
conda.enabled = false
100+
conda.enabled = true
101101
docker.enabled = false
102102
podman.enabled = false
103103
shifter.enabled = false

0 commit comments

Comments
 (0)