@@ -34,7 +34,7 @@ Variables that may need to be adjusted should be changed there, not here.
34
34
####### Assay-specific GL suffix #######
35
35
########################################
36
36
37
- assay_suffix = "GLAmpSeq "
37
+ assay_suffix = "_GLAmpSeq "
38
38
39
39
40
40
########################################
@@ -93,27 +93,27 @@ for dir in needed_dirs:
93
93
base_PE_inputs = [
94
94
expand (config ["filtered_reads_dir" ] + "{ID}" + config ["filtered_R1_suffix" ], ID = sample_ID_list ),
95
95
expand (config ["filtered_reads_dir" ] + "{ID}" + config ["filtered_R2_suffix" ], ID = sample_ID_list ),
96
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy_ { assay_suffix } .tsv" ,
97
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts_ { assay_suffix } .biom.zip" ,
98
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"ASVs_ { assay_suffix } .fasta" ,
99
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"read-count-tracking_ { assay_suffix } .tsv" ,
100
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"counts_ { assay_suffix } .tsv" ,
101
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts_ { assay_suffix } .tsv" ,
102
- config ["fastqc_out_dir" ] + config ["output_prefix" ] + f"raw_multiqc_ { assay_suffix } _report.zip" ,
103
- config ["fastqc_out_dir" ] + config ["output_prefix" ] + f"filtered_multiqc_ { assay_suffix } _report.zip"
96
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy { assay_suffix } .tsv" ,
97
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts { assay_suffix } .biom.zip" ,
98
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"ASVs { assay_suffix } .fasta" ,
99
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"read-count-tracking { assay_suffix } .tsv" ,
100
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"counts { assay_suffix } .tsv" ,
101
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts { assay_suffix } .tsv" ,
102
+ config ["fastqc_out_dir" ] + config ["output_prefix" ] + f"raw_multiqc { assay_suffix } _report.zip" ,
103
+ config ["fastqc_out_dir" ] + config ["output_prefix" ] + f"filtered_multiqc { assay_suffix } _report.zip"
104
104
]
105
105
106
106
# Base rule all inputs (final outs) for SE, with or without trimming
107
107
base_SE_inputs = [
108
108
expand (config ["filtered_reads_dir" ] + "{ID}" + config ["filtered_R1_suffix" ], ID = sample_ID_list ),
109
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy_ { assay_suffix } .tsv" ,
110
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts_ { assay_suffix } .biom.zip" ,
111
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"ASVs_ { assay_suffix } .fasta" ,
112
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"read-count-tracking_ { assay_suffix } .tsv" ,
113
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"counts_ { assay_suffix } .tsv" ,
114
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts_ { assay_suffix } .tsv" ,
115
- config ["fastqc_out_dir" ] + config ["output_prefix" ] + f"raw_multiqc_ { assay_suffix } _report.zip" ,
116
- config ["fastqc_out_dir" ] + config ["output_prefix" ] + f"filtered_multiqc_ { assay_suffix } _report.zip"
109
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy { assay_suffix } .tsv" ,
110
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts { assay_suffix } .biom.zip" ,
111
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"ASVs { assay_suffix } .fasta" ,
112
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"read-count-tracking { assay_suffix } .tsv" ,
113
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"counts { assay_suffix } .tsv" ,
114
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts { assay_suffix } .tsv" ,
115
+ config ["fastqc_out_dir" ] + config ["output_prefix" ] + f"raw_multiqc { assay_suffix } _report.zip" ,
116
+ config ["fastqc_out_dir" ] + config ["output_prefix" ] + f"filtered_multiqc { assay_suffix } _report.zip"
117
117
]
118
118
119
119
# Add additional inputs for trimming
@@ -122,18 +122,18 @@ if config["trim_primers"] == "TRUE":
122
122
base_PE_inputs += [
123
123
expand (config ["trimmed_reads_dir" ] + "{ID}" + config ["primer_trimmed_R1_suffix" ], ID = sample_ID_list ),
124
124
expand (config ["trimmed_reads_dir" ] + "{ID}" + config ["primer_trimmed_R2_suffix" ], ID = sample_ID_list ),
125
- config ["trimmed_reads_dir" ] + config ["output_prefix" ] + f"cutadapt_ { assay_suffix } .log" ,
126
- config ["trimmed_reads_dir" ] + config ["output_prefix" ] + f"trimmed-read-counts_ { assay_suffix } .tsv" ,
125
+ config ["trimmed_reads_dir" ] + config ["output_prefix" ] + f"cutadapt { assay_suffix } .log" ,
126
+ config ["trimmed_reads_dir" ] + config ["output_prefix" ] + f"trimmed-read-counts { assay_suffix } .tsv" ,
127
127
]
128
128
else : # SE with primer trimming
129
129
base_SE_inputs += [
130
130
expand (config ["trimmed_reads_dir" ] + "{ID}" + config ["primer_trimmed_R1_suffix" ], ID = sample_ID_list ),
131
- config ["trimmed_reads_dir" ] + config ["output_prefix" ] + f"cutadapt_ { assay_suffix } .log" ,
132
- config ["trimmed_reads_dir" ] + config ["output_prefix" ] + f"trimmed-read-counts_ { assay_suffix } .tsv" ,
131
+ config ["trimmed_reads_dir" ] + config ["output_prefix" ] + f"cutadapt { assay_suffix } .log" ,
132
+ config ["trimmed_reads_dir" ] + config ["output_prefix" ] + f"trimmed-read-counts { assay_suffix } .tsv" ,
133
133
]
134
134
135
135
# Conditional addition of visualization outputs (color legend only to keep it simple)
136
- visualization_outputs = [config ["plots_dir" ] + config ["output_prefix" ] + f"color_legend_ { assay_suffix } .png" ] if enable_visualizations == "TRUE" else []
136
+ visualization_outputs = [config ["plots_dir" ] + config ["output_prefix" ] + f"color_legend { assay_suffix } .png" ] if enable_visualizations == "TRUE" else []
137
137
138
138
########################################
139
139
############# Rules start ##############
@@ -160,16 +160,16 @@ if config["data_type"] == "PE":
160
160
input :
161
161
expand (config ["trimmed_reads_dir" ] + "{ID}" + config ["primer_trimmed_R1_suffix" ], ID = sample_ID_list ),
162
162
expand (config ["trimmed_reads_dir" ] + "{ID}" + config ["primer_trimmed_R2_suffix" ], ID = sample_ID_list ),
163
- config ["trimmed_reads_dir" ] + config ["output_prefix" ] + f"trimmed-read-counts_ { assay_suffix } .tsv"
163
+ config ["trimmed_reads_dir" ] + config ["output_prefix" ] + f"trimmed-read-counts { assay_suffix } .tsv"
164
164
output :
165
165
expand (config ["filtered_reads_dir" ] + "{ID}" + config ["filtered_R1_suffix" ], ID = sample_ID_list ),
166
166
expand (config ["filtered_reads_dir" ] + "{ID}" + config ["filtered_R2_suffix" ], ID = sample_ID_list ),
167
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy_ { assay_suffix } .tsv" ,
168
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts_ { assay_suffix } .biom" ,
169
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"ASVs_ { assay_suffix } .fasta" ,
170
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"read-count-tracking_ { assay_suffix } .tsv" ,
171
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"counts_ { assay_suffix } .tsv" ,
172
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts_ { assay_suffix } .tsv"
167
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy { assay_suffix } .tsv" ,
168
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts { assay_suffix } .biom" ,
169
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"ASVs { assay_suffix } .fasta" ,
170
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"read-count-tracking { assay_suffix } .tsv" ,
171
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"counts { assay_suffix } .tsv" ,
172
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts { assay_suffix } .tsv"
173
173
params :
174
174
left_trunc = config ["left_trunc" ],
175
175
right_trunc = config ["right_trunc" ],
@@ -211,12 +211,12 @@ if config["data_type"] == "PE":
211
211
output :
212
212
expand (config ["filtered_reads_dir" ] + "{ID}" + config ["filtered_R1_suffix" ], ID = sample_ID_list ),
213
213
expand (config ["filtered_reads_dir" ] + "{ID}" + config ["filtered_R2_suffix" ], ID = sample_ID_list ),
214
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy_ { assay_suffix } .tsv" ,
215
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts_ { assay_suffix } .biom" ,
216
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"ASVs_ { assay_suffix } .fasta" ,
217
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"read-count-tracking_ { assay_suffix } .tsv" ,
218
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"counts_ { assay_suffix } .tsv" ,
219
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts_ { assay_suffix } .tsv"
214
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy { assay_suffix } .tsv" ,
215
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts { assay_suffix } .biom" ,
216
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"ASVs { assay_suffix } .fasta" ,
217
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"read-count-tracking { assay_suffix } .tsv" ,
218
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"counts { assay_suffix } .tsv" ,
219
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts { assay_suffix } .tsv"
220
220
params :
221
221
left_trunc = config ["left_trunc" ],
222
222
right_trunc = config ["right_trunc" ],
@@ -338,7 +338,7 @@ if config["data_type"] == "PE":
338
338
r2_html_files = expand (config ["raw_reads_dir" ] + "{ID}" + config ["raw_R2_suffix" ].rsplit ("." , 2 )[0 ] + "_fastqc.html" , ID = sample_ID_list ),
339
339
config_file = "config/multiqc.config"
340
340
output :
341
- final_out_zip = config ["fastqc_out_dir" ] + config ["output_prefix" ] + f"raw_multiqc_ { assay_suffix } _report.zip"
341
+ final_out_zip = config ["fastqc_out_dir" ] + config ["output_prefix" ] + f"raw_multiqc { assay_suffix } _report.zip"
342
342
benchmark :
343
343
"benchmarks/raw_multiqc-benchmarks.tsv"
344
344
shell :
@@ -387,7 +387,7 @@ if config["data_type"] == "PE":
387
387
r2_html_files = expand (config ["filtered_reads_dir" ] + "{ID}" + config ["filtered_R2_suffix" ].rsplit ("." , 2 )[0 ] + "_fastqc.html" , ID = sample_ID_list ),
388
388
config_file = "config/multiqc.config"
389
389
output :
390
- final_out_zip = config ["fastqc_out_dir" ] + config ["output_prefix" ] + f"filtered_multiqc_ { assay_suffix } _report.zip"
390
+ final_out_zip = config ["fastqc_out_dir" ] + config ["output_prefix" ] + f"filtered_multiqc { assay_suffix } _report.zip"
391
391
benchmark :
392
392
"benchmarks/filtered_multiqc-benchmarks.tsv"
393
393
@@ -418,15 +418,15 @@ if config["data_type"] == "SE":
418
418
"envs/R.yaml"
419
419
input :
420
420
expand (config ["trimmed_reads_dir" ] + "{ID}" + config ["primer_trimmed_R1_suffix" ], ID = sample_ID_list ),
421
- config ["trimmed_reads_dir" ] + config ["output_prefix" ] + f"trimmed-read-counts_ { assay_suffix } .tsv"
421
+ config ["trimmed_reads_dir" ] + config ["output_prefix" ] + f"trimmed-read-counts { assay_suffix } .tsv"
422
422
output :
423
423
expand (config ["filtered_reads_dir" ] + "{ID}" + config ["filtered_R1_suffix" ], ID = sample_ID_list ),
424
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy_ { assay_suffix } .tsv" ,
425
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts_ { assay_suffix } .biom" ,
426
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"ASVs_ { assay_suffix } .fasta" ,
427
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"read-count-tracking_ { assay_suffix } .tsv" ,
428
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"counts_ { assay_suffix } .tsv" ,
429
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts_ { assay_suffix } .tsv"
424
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy { assay_suffix } .tsv" ,
425
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts { assay_suffix } .biom" ,
426
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"ASVs { assay_suffix } .fasta" ,
427
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"read-count-tracking { assay_suffix } .tsv" ,
428
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"counts { assay_suffix } .tsv" ,
429
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts { assay_suffix } .tsv"
430
430
params :
431
431
left_trunc = config ["left_trunc" ],
432
432
left_maxEE = config ["left_maxEE" ],
@@ -461,12 +461,12 @@ if config["data_type"] == "SE":
461
461
expand (config ["raw_reads_dir" ] + "{ID}" + config ["raw_R1_suffix" ], ID = sample_ID_list )
462
462
output :
463
463
expand (config ["filtered_reads_dir" ] + "{ID}" + config ["filtered_R1_suffix" ], ID = sample_ID_list ),
464
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy_ { assay_suffix } .tsv" ,
465
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts_ { assay_suffix } .biom" ,
466
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"ASVs_ { assay_suffix } .fasta" ,
467
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"read-count-tracking_ { assay_suffix } .tsv" ,
468
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"counts_ { assay_suffix } .tsv" ,
469
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts_ { assay_suffix } .tsv"
464
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy { assay_suffix } .tsv" ,
465
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts { assay_suffix } .biom" ,
466
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"ASVs { assay_suffix } .fasta" ,
467
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"read-count-tracking { assay_suffix } .tsv" ,
468
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"counts { assay_suffix } .tsv" ,
469
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts { assay_suffix } .tsv"
470
470
params :
471
471
left_trunc = config ["left_trunc" ],
472
472
left_maxEE = config ["left_maxEE" ],
@@ -577,7 +577,7 @@ if config["data_type"] == "SE":
577
577
r1_html_files = expand (config ["raw_reads_dir" ] + "{ID}" + config ["raw_R1_suffix" ].rsplit ("." , 2 )[0 ] + "_fastqc.html" , ID = sample_ID_list ),
578
578
config_file = "config/multiqc.config"
579
579
output :
580
- final_out_zip = config ["fastqc_out_dir" ] + config ["output_prefix" ] + f"raw_multiqc_ { assay_suffix } _report.zip"
580
+ final_out_zip = config ["fastqc_out_dir" ] + config ["output_prefix" ] + f"raw_multiqc { assay_suffix } _report.zip"
581
581
benchmark :
582
582
"benchmarks/raw_multiqc-benchmarks.tsv"
583
583
shell :
@@ -622,7 +622,7 @@ if config["data_type"] == "SE":
622
622
r1_html_files = expand (config ["filtered_reads_dir" ] + "{ID}" + config ["filtered_R1_suffix" ].rsplit ("." , 2 )[0 ] + "_fastqc.html" , ID = sample_ID_list ),
623
623
config_file = "config/multiqc.config"
624
624
output :
625
- final_out_zip = config ["fastqc_out_dir" ] + config ["output_prefix" ] + f"filtered_multiqc_ { assay_suffix } _report.zip"
625
+ final_out_zip = config ["fastqc_out_dir" ] + config ["output_prefix" ] + f"filtered_multiqc { assay_suffix } _report.zip"
626
626
benchmark :
627
627
"benchmarks/filtered_multiqc-benchmarks.tsv"
628
628
@@ -639,11 +639,11 @@ rule r_visualizations:
639
639
input :
640
640
runsheet = config ["runsheet" ],
641
641
sample_info = config ["sample_info_file" ],
642
- counts = config ["final_outputs_dir" ] + config ["output_prefix" ] + f"counts_ { assay_suffix } .tsv" ,
643
- taxonomy = config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy_ { assay_suffix } .tsv"
642
+ counts = config ["final_outputs_dir" ] + config ["output_prefix" ] + f"counts { assay_suffix } .tsv" ,
643
+ taxonomy = config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy { assay_suffix } .tsv"
644
644
output :
645
645
# Use completion flag file in place of plot outputs for variable plots
646
- legend = config ["plots_dir" ] + config ["output_prefix" ] + f"color_legend_ { assay_suffix } .png"
646
+ legend = config ["plots_dir" ] + config ["output_prefix" ] + f"color_legend { assay_suffix } .png"
647
647
params :
648
648
assay_suffix = assay_suffix ,
649
649
plots_dir = config ["plots_dir" ],
@@ -657,15 +657,15 @@ rule r_visualizations:
657
657
"benchmarks/r-visualizations-benchmarks.tsv"
658
658
shell :
659
659
"""
660
- Rscript visualizations/Illumina-R-visualizations.R "{input.runsheet}" "{input.sample_info}" "{input.counts}" "{input.taxonomy}" "{params.assay_suffix }" "{params.plots_dir }" "{params.output_prefix}" > {log} 2>&1
660
+ Rscript visualizations/Illumina-R-visualizations.R "{input.runsheet}" "{input.sample_info}" "{input.counts}" "{input.taxonomy}" "{params.plots_dir }" "{params.output_prefix }" "{params.assay_suffix}" > {log} 2>&1
661
661
"""
662
662
663
663
664
664
rule zip_biom :
665
665
input :
666
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts_ { assay_suffix } .biom"
666
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts { assay_suffix } .biom"
667
667
output :
668
- config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts_ { assay_suffix } .biom.zip"
668
+ config ["final_outputs_dir" ] + config ["output_prefix" ] + f"taxonomy-and-counts { assay_suffix } .biom.zip"
669
669
shell :
670
670
"""
671
671
zip -j -q {output} {input} && rm {input}
@@ -678,8 +678,8 @@ rule combine_cutadapt_logs_and_summarize:
678
678
counts = expand (config ["trimmed_reads_dir" ] + "{ID}-trimmed-counts.tsv" , ID = sample_ID_list ),
679
679
logs = expand (config ["trimmed_reads_dir" ] + "{ID}-cutadapt.log" , ID = sample_ID_list )
680
680
output :
681
- combined_log = config ["trimmed_reads_dir" ] + config ["output_prefix" ] + f"cutadapt_ { assay_suffix } .log" ,
682
- combined_counts = config ["trimmed_reads_dir" ] + config ["output_prefix" ] + f"trimmed-read-counts_ { assay_suffix } .tsv"
681
+ combined_log = config ["trimmed_reads_dir" ] + config ["output_prefix" ] + f"cutadapt { assay_suffix } .log" ,
682
+ combined_counts = config ["trimmed_reads_dir" ] + config ["output_prefix" ] + f"trimmed-read-counts { assay_suffix } .tsv"
683
683
benchmark :
684
684
"benchmarks/combine_cutadapt_logs_and_summarize-benchmarks.tsv"
685
685
shell :
0 commit comments