Skip to content

Commit 3eb1876

Browse files
committed
Added software version tracking
1 parent 522e347 commit 3eb1876

File tree

10 files changed

+672
-62
lines changed

10 files changed

+672
-62
lines changed

Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/bin/edit_create_runsheet.py

Lines changed: 521 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/usr/bin/env Rscript
2+
3+
# Get versions
4+
VERSIONS <- sprintf("dada2 %s\nDECIPHER %s\nbiomformat %s\n",
5+
packageVersion("dada2"),
6+
packageVersion("DECIPHER"),
7+
packageVersion("biomformat"))
8+
9+
# Write versions to file
10+
11+
write(x= VERSIONS, file="versions.txt", append=TRUE)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/usr/bin/env Rscript
2+
3+
# Get versions
4+
VERSIONS <- sprintf("vegan %s\ntidyverse %s\ndendextend %s\nphyloseq %s\nDESeq2 %s\nggrepel %s\ndplyr %s\nRColorBrewer %s\ngrid %s\n",
5+
packageVersion("vegan"),
6+
packageVersion("tidyverse"),
7+
packageVersion("dendextend"),
8+
packageVersion("phyloseq"),
9+
packageVersion("DESeq2"),
10+
packageVersion("ggrepel"),
11+
packageVersion("dplyr"),
12+
packageVersion("RColorBrewer"),
13+
packageVersion("grid"))
14+
15+
# Write versions to file
16+
17+
write(x= VERSIONS, file="versions.txt", append=TRUE)

Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/main.nf

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ c_bright_green = "\u001b[32;1m";
77
c_blue = "\033[0;34m";
88
c_reset = "\033[0m";
99

10-
params.help = false
1110
/**************************************************
1211
* HELP MENU **************************************
1312
**************************************************/
@@ -16,18 +15,18 @@ if (params.help) {
1615
println("Nextflow AmpIllumina Consensus Pipeline: $workflow.manifest.version")
1716
println("USAGE:")
1817
println("Example 1: Submit and run jobs with slurm in singularity containers.")
19-
println(" > nextflow run main.nf -resume -profile slurm_sing --csv_file PE_file.csv --target_region 16S --F_primer AGAGTTTGATCCTGGCTCAG --R_primer CTGCCTCCCGTAGGAGT")
18+
println(" > nextflow run main.nf -resume -profile slurm,singularity --csv_file PE_file.csv --target_region 16S --F_primer AGAGTTTGATCCTGGCTCAG --R_primer CTGCCTCCCGTAGGAGT")
2019
println()
2120
println("Example 2: : Submit and run jobs with slurm in conda environments.")
22-
println(" > nextflow run main.nf -resume -profile slurm_conda --csv_file SE_file.csv --target_region 1TS --F_primer AGAGTTTGATCCTGGCTCAG --R_primer CTGCCTCCCGTAGGAGT")
21+
println(" > nextflow run main.nf -resume -profile slurm,conda --csv_file SE_file.csv --target_region 1TS --F_primer AGAGTTTGATCCTGGCTCAG --R_primer CTGCCTCCCGTAGGAGT")
2322
println()
2423
println("Example 3: Run jobs locally in conda environments, supplying a GLDS accession, and specifying the path to an existing conda environment")
2524
println(" > nextflow run main.nf -resume -profile conda --GLDS_accession GLDS-487 --target_region 16S --F_primer AGAGTTTGATCCTGGCTCAG --R_primer CTGCCTCCCGTAGGAGT --conda.qc <path/to/existing/conda/environment>")
2625
println()
2726
println("Required arguments:")
28-
println("""-profile [STRING] What profile should be used to run the workflow. Options are [singularity, docker, conda, slurm_sing, slurm_conda].
27+
println("""-profile [STRING] What profile should be used to run the workflow. Options are [singularity, docker, conda, slurm].
2928
singularity, docker and conda will run the pipelne locally using singularity, docker, and conda, respectively.
30-
slurm_sing and slurm_conda will submit and run jobs using slurm in singularity containers and conda environments, respectively. """)
29+
To combine profiles, pass them together separated by comma. For example, to run jobs using slurm in singularity containers use 'slurm,singularity' . """)
3130
println("--csv_file [PATH] A 3-column (single-end) or 4-column (paired-end) input file (sample_id, forward, [reverse,] paired). Mandatory if a GLDS accession is not provided.")
3231
println(" Please see the files: SE_file.csv and PE_file.csv for single-end and paired-end examples, respectively.")
3332
println(" The sample_id column should contain unique sample ids.")
@@ -38,22 +37,22 @@ if (params.help) {
3837
println("PLEASE NOTE: This workflow assumes that all your raw reads end with the same suffix. If they don't please modify your filenames to have the same suffix as shown below.")
3938
println("--raw_R1_suffix [STRING] Raw forward reads suffix (region following the unique part of the sample names). e.g. _R1_raw.fastq.gz.")
4039
println("--raw_R2_suffix [STRING] Raw reverse reads suffix (region following the unique part of the sample names). e.g. _R2_raw.fastq.gz.")
41-
println()
40+
4241
println("Cutadapt (trimming) parameters:")
4342
println(" --F_primer [STRING] Forward primer sequence e.g. AGAGTTTGATCCTGGCTCAG. Default: emptry string.")
4443
println(" --R_primer [STRING] Reverse primer sequence e.g. CTGCCTCCCGTAGGAGT. Default: emptry string.")
4544
println(" --min_cutadapt_len [INTEGER] What should be the minimum read length after quality trimming with cutadapt. Default: 130.")
4645
println(" --primers_linked [STRING] Are the primers linked?. https://cutadapt.readthedocs.io/en/stable/recipes.html#trimming-amplicon-primers-from-paired-end-reads. Default: TRUE. ")
4746
println(" --discard_untrimmed [STRING] Should untrimmed reads be discarded? Any supplied string except TRUE will not discard them. Default: TRUE.")
48-
println()
47+
4948
println("Optional arguments:")
5049
println(" --help Print this help message and exit.")
5150
println(" --publishDir_mode [STRING] How should nextflow publish file outputs. Options can be found here https://www.nextflow.io/docs/latest/process.html#publishdir. Default: link.")
5251
println(" --errorStrategy [STRING] How should nextflow handle errors. Options can be found here https://www.nextflow.io/docs/latest/process.html#errorstrategy. Default: terminate")
5352
println(" --enable_visualizations [BOOLEAN] Should ASV plots be made? true or false. if true supply a path to the ruhnsheet for plotting to the --runsheet option. Default: false.")
5453
println(" --runsheet [PATH] A 4-column file with these exact headers [Sample Name, read1_path, raw_R1_suffix, groups] for plotting. Only relevant if --enable_visualizations is true. Default: null.")
5554
println(" --multiqc_config [PATH] Path to a custome multiqc config file. Default: config/multiqc.config.")
56-
println()
55+
5756
println("Dada2 parameters passed to filterAndTrim() function:")
5857
println(" --left_trunc [INTEGER] truncate the sequences to the left by this number of bases. Default: 0.")
5958
println(" --right_trunc [INTEGER] truncate the sequences to the right by this number of bases. Default: 0.")
@@ -63,13 +62,12 @@ if (params.help) {
6362
println(" This is typically used with primers like 515-926, that captured 18S fragments that are typically too long to merge.")
6463
println(" Note that 16S and 18S should have been separated already prior to running this workflow. This should likely be left as FALSE for any option other than 18S above.")
6564
println(" Values are TRUE or FALSE Default: FALSE.")
66-
println()
65+
6766
println("File Suffixes:")
6867
println(" --primer_trimmed_R1_suffix [STRING] Suffix to use for naming your primer trimmed forward reads. Default: _R1_trimmed.fastq.gz.")
6968
println(" --primer_trimmed_R2_suffix [STRING] Suffix to use for naming your primer trimmed reverse reads. Default: _R2_trimmed.fastq.gz.")
7069
println(" --filtered_R1_suffix [STRING] Suffix to use for naming your quality filtered forward reads. Default: _R1_filtered.fastq.gz.")
7170
println(" --filtered_R2_suffix [STRING] Suffix to use for naming your quality filtered reverse reads. Default: _R2_filtered.fastq.gz.")
72-
println()
7371
println("Output directories:")
7472
println(" --raw_reads_dir [PATH] Where should the fastqc report of the raw reads be stored. Default: Raw_Sequence_Data/.")
7573
println(" --fastqc_out_dir [PATH] Where should multiqc outputs be stored. Default: workflow_output/FastQC_Outputs/.")
@@ -78,12 +76,10 @@ if (params.help) {
7876
println(" --info_out_dir [PATH] Where should output metadata be stored. Default: workflow_output/Metadata/.")
7977
println(" --plots_dir [PATH] Where should your plots be stored if visualization is enabled. Default: workflow_output/Final_Outputs/Plots/.")
8078
println(" --final_outputs_dir [PATH] Where should most outputs and summary reports be stored. Default: workflow_output/Final_Outputs/.")
81-
println()
8279
println("Genelab specific arguements:")
8380
println(" --GLDS_accession [STRING] A Genelab accession number if the --csv_file parameter is not set. If this parameter is set, it will ignore the --csv_file parameter.")
8481
println(" --assay_suffix [STRING] Genelabs assay suffix. Default: GLAmpSeq.")
8582
println(" --output_prefix [STRING] Unique name to tag onto output files. Default: empty string.")
86-
println()
8783
println("Paths to existing conda environments to use otherwise a new one will be created using the yaml file in envs/.")
8884
println(" --conda.qc [PATH] Path to a conda environment containing fastqc, multiqc, zip and python. Default: null.")
8985
println(" --conda.R [PATH] Path to a conda environment containing R along with the packages decipher and biomformat installed. Default: null.")
@@ -94,11 +90,12 @@ if (params.help) {
9490
exit 0
9591
}
9692

93+
94+
if(params.debug){
9795
log.info """
9896
Nextflow AmpIllumina Consensus Pipeline: $workflow.manifest.version
9997
10098
You have set the following parameters:
101-
Profile: ${workflow.profile}
10299
Input csv file : ${params.csv_file}
103100
GLDS_accession : ${params.GLDS_accession}
104101
Amplicon target region : ${params.target_region}
@@ -151,7 +148,7 @@ log.info """
151148
cutadapt: ${params.conda.cutadapt}
152149
R_visualizations: ${params.conda.R_visualizations}
153150
""".stripIndent()
154-
151+
}
155152

156153
// Create GLDS runsheet
157154
include { GET_RUNSHEET } from "./modules/create_runsheet.nf"
@@ -178,6 +175,9 @@ def deleteWS(string){
178175

179176

180177
workflow {
178+
179+
// Capture software versions
180+
software_versions_ch = Channel.empty()
181181

182182
if(params.GLDS_accession){
183183

@@ -198,6 +198,7 @@ workflow {
198198
row -> "${row.data_type}" == "PE" ? ["${row.raw_R1_suffix}", "${row.raw_R2_suffix}"] : ["${row.raw_R1_suffix}"]
199199
}.first()
200200

201+
GET_RUNSHEET.out.version | mix(software_versions_ch) | set{software_versions_ch}
201202

202203
}else{
203204

@@ -207,8 +208,8 @@ workflow {
207208
}
208209

209210
file_ch.map{
210-
row -> deleteWS(row.paired) == 'true' ? tuple( "${row.sample_id}", [file("${row.forward}"), file("${row.reverse}")], deleteWS(row.paired)) :
211-
tuple( "${row.sample_id}", [file("${row.forward}")], deleteWS(row.paired))
211+
row -> deleteWS(row.paired) == 'true' ? tuple( "${row.sample_id}", [file("${row.forward}", checkIfExists: true), file("${row.reverse}", checkIfExists: true)], deleteWS(row.paired)) :
212+
tuple( "${row.sample_id}", [file("${row.forward}", checkIfExists: true)], deleteWS(row.paired))
212213
}.set{reads_ch}
213214

214215
// Generating a file with sample ids on a new line
@@ -217,9 +218,14 @@ workflow {
217218
.set{sample_ids_ch}
218219

219220
// Read quality check and trimming
220-
raw_fastqc_files = RAW_FASTQC(reads_ch).flatten().collect()
221+
RAW_FASTQC(reads_ch)
222+
raw_fastqc_files = RAW_FASTQC.out.html.flatten().collect()
223+
221224
RAW_MULTIQC("raw", params.multiqc_config,raw_fastqc_files)
222225

226+
RAW_FASTQC.out.version | mix(software_versions_ch) | set{software_versions_ch}
227+
RAW_MULTIQC.out.version | mix(software_versions_ch) | set{software_versions_ch}
228+
223229
if(params.trim_primers){
224230

225231
if(!params.GLDS_accession) primers_ch = Channel.value([params.F_primer, params.R_primer])
@@ -231,7 +237,8 @@ workflow {
231237
}.flatten().collect()
232238

233239
COMBINE_CUTADAPT_LOGS_AND_SUMMARIZE(counts, logs)
234-
trimmed_fastqc_files = TRIMMED_FASTQC(CUTADAPT.out.reads).flatten().collect()
240+
TRIMMED_FASTQC(CUTADAPT.out.reads)
241+
trimmed_fastqc_files = TRIMMED_FASTQC.out.html.flatten().collect()
235242
TRIMMED_MULTIQC("filtered", params.multiqc_config, trimmed_fastqc_files)
236243

237244
isPaired_ch = CUTADAPT.out.reads.map{
@@ -248,6 +255,11 @@ workflow {
248255
dada_taxonomy = RUN_R_TRIM.out.taxonomy
249256
dada_biom = RUN_R_TRIM.out.biom
250257

258+
CUTADAPT.out.version | mix(software_versions_ch) | set{software_versions_ch}
259+
TRIMMED_FASTQC.out.version | mix(software_versions_ch) | set{software_versions_ch}
260+
TRIMMED_MULTIQC.out.version | mix(software_versions_ch) | set{software_versions_ch}
261+
RUN_R_TRIM.out.version | mix(software_versions_ch) | set{software_versions_ch}
262+
251263
}else{
252264

253265
raw_reads_ch = reads_ch.map{
@@ -270,19 +282,36 @@ workflow {
270282
dada_taxonomy = RUN_R_NOTRIM.out.taxonomy
271283
dada_biom = RUN_R_NOTRIM.out.biom
272284

285+
RUN_R_NOTRIM.out.version | mix(software_versions_ch) | set{software_versions_ch}
286+
273287
}
274288

275289

276290
// Zip biom file
277291
ZIP_BIOM(dada_biom)
278292

293+
ZIP_BIOM.out.version | mix(software_versions_ch) | set{software_versions_ch}
294+
279295
if(params.enable_visualizations){
280296
// Visualize
281297
runsheet = params.GLDS_accession ? GET_RUNSHEET.out.runsheet : params.runsheet
282298
R_VISUALIZATION(runsheet, sample_ids_ch, dada_counts, dada_taxonomy)
299+
R_VISUALIZATION.out.version | mix(software_versions_ch) | set{software_versions_ch}
283300

284301
}
285302

303+
// Software Version Capturing - combining all captured sofware versions
304+
nf_version = "Nextflow Version:".concat("${nextflow.version}\n<><><>\n")
305+
nextflow_version_ch = Channel.value(nf_version)
306+
307+
// Write software versions to file
308+
software_versions_ch | map { it.text + "\n<><><>\n"}
309+
| unique
310+
| mix(nextflow_version_ch)
311+
| collectFile(name: "${params.metadata_dir}/software_versions.txt", newLine: true, cache: false)
312+
| set{final_software_versions_ch}
313+
314+
286315
}
287316

288317
workflow.onComplete {

Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/modules/create_runsheet.nf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,14 @@ process GET_RUNSHEET {
1313
path("*.zip"), emit: zip
1414
path("GLparams_file.csv"), emit: params_file
1515
path("GLfile.csv"), emit: input_file
16+
path("versions.txt"), emit: version
1617

1718
script:
1819
"""
1920
create_runsheet.py --OSD ${params.GLDS_accession} --target ${params.target_region}
21+
GL-version | grep "GeneLab utils"| sed -E 's/^\\s+//' > versions.txt
22+
echo "dptools v1.3.4" >> versions.txt
23+
python --version >> versions.txt
2024
"""
2125
}
2226

Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/modules/quality_assessment.nf

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,15 @@ process FASTQC {
2020
input:
2121
tuple val(sample_id), path(reads), val(isPaired)
2222
output:
23-
tuple path("*.html"), path("*.zip")
23+
tuple path("*.html"), path("*.zip"), emit: html
24+
path("versions.txt"), emit: version
2425
script:
2526
"""
2627
fastqc -o . \\
2728
-t ${task.cpus} \\
2829
${reads}
30+
31+
fastqc --version > versions.txt
2932
"""
3033
}
3134

@@ -39,7 +42,8 @@ process MULTIQC {
3942
path(multiqc_config)
4043
path(files)
4144
output:
42-
path("${prefix}_multiqc${params.assay_suffix}_report.zip")
45+
path("${prefix}_multiqc${params.assay_suffix}_report.zip"), emit: report
46+
path("versions.txt"), emit: version
4347
script:
4448
"""
4549
multiqc -q --filename ${prefix}_multiqc \\
@@ -50,6 +54,7 @@ process MULTIQC {
5054
# Zipping
5155
zip -q -r ${prefix}_multiqc${params.assay_suffix}_report.zip ${prefix}_multiqc_report
5256
57+
multiqc --version > versions.txt
5358
"""
5459
}
5560

@@ -69,6 +74,7 @@ process CUTADAPT {
6974
tuple val(sample_id), path("*${params.primer_trimmed_R1_suffix[-5..-1]}"), val(isPaired), emit: reads
7075
tuple val(sample_id), path("${sample_id}-cutadapt.log"), emit: logs
7176
tuple val(sample_id), path("${sample_id}-trimmed-counts.tsv"), emit: trim_counts
77+
path("versions.txt"), emit: version
7278
script:
7379
"""
7480
R_primer_comp=`echo ${R_primer} |tr ATGCRYSWKMBVDHN TACGYRSWMKVBHDN |rev`
@@ -175,6 +181,9 @@ process CUTADAPT {
175181
<( grep "Reads written" ${sample_id}-cutadapt.log | tr -s " " "\\t" | cut -f 5 | tr -d "," ) \\
176182
> ${sample_id}-trimmed-counts.tsv
177183
fi
184+
185+
VERSION=`cutadapt --version`
186+
echo "cutadapt \${VERSION}" > versions.txt
178187
"""
179188
}
180189

@@ -213,7 +222,8 @@ workflow quality_check {
213222

214223

215224
main:
216-
fastqc_ch = FASTQC(reads_ch).flatten().collect()
225+
FASTQC(reads_ch)
226+
fastqc_ch = FASTQC.out.html.flatten().collect()
217227
MULTIQC(prefix_ch, multiqc_config, fastqc_ch)
218228
}
219229

Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/modules/run_dada.nf

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@ process RUN_R_TRIM {
1616
path(trimmed_read_counts)
1717
output:
1818
path("Filtered_Sequence_Data/*${params.filtered_R1_suffix[-5..-1]}"), emit: reads
19+
path("Filtered_Sequence_Data/filtered-read-counts${params.assay_suffix}.tsv"), emit: filtered_count
1920
path("final_outputs/taxonomy${params.assay_suffix}.tsv"), emit: taxonomy
2021
path("final_outputs/taxonomy-and-counts${params.assay_suffix}.biom"), emit: biom
2122
path("final_outputs/ASVs${params.assay_suffix}.fasta"), emit: fasta
2223
path("final_outputs/read-count-tracking${params.assay_suffix}.tsv"), emit: read_count
2324
path("final_outputs/counts${params.assay_suffix}.tsv"), emit: counts
2425
path("final_outputs/taxonomy-and-counts${params.assay_suffix}.tsv"), emit: taxonomy_count
26+
path("versions.txt"), emit: version
2527
script:
2628

2729
"""
@@ -82,6 +84,9 @@ process RUN_R_TRIM {
8284
(head -n 1 final_outputs/taxonomy-and-counts${params.assay_suffix}.tsv; \\
8385
awk 'NR>1{print}' final_outputs/taxonomy-and-counts${params.assay_suffix}.tsv | sort -V -k1) \\
8486
> temp_tax_cont.tsv && mv temp_tax_cont.tsv final_outputs/taxonomy-and-counts${params.assay_suffix}.tsv
87+
88+
R --vanilla --version |grep "R version" > versions.txt
89+
get_R_package_version.R
8590
"""
8691

8792
}
@@ -99,12 +104,14 @@ process RUN_R_NOTRIM {
99104
val(raw_read_suffix) //[R1,R2] or [R1]
100105
output:
101106
path("Filtered_Sequence_Data/*${params.filtered_R1_suffix[-5..-1]}"), emit: reads
107+
path("Filtered_Sequence_Data/filtered-read-counts${params.assay_suffix}.tsv"), emit: filtered_count
102108
path("final_outputs/taxonomy${params.assay_suffix}.tsv"), emit: taxonomy
103109
path("final_outputs/taxonomy-and-counts${params.assay_suffix}.biom"), emit: biom
104110
path("final_outputs/ASVs${params.assay_suffix}.fasta"), emit: fasta
105111
path("final_outputs/read-count-tracking${params.assay_suffix}.tsv"), emit: read_count
106112
path("final_outputs/counts${params.assay_suffix}.tsv"), emit: counts
107-
path("final_outputs/taxonomy-and-counts${params.assay_suffix}.tsv"), emit: taxonomy_count
113+
path("final_outputs/taxonomy-and-counts${params.assay_suffix}.tsv"), emit: taxonomy_count
114+
path("versions.txt"), emit: version
108115
script:
109116
"""
110117
if [ ${isPaired} == true ]; then
@@ -159,5 +166,8 @@ process RUN_R_NOTRIM {
159166
(head -n 1 final_outputs/taxonomy-and-counts${params.assay_suffix}.tsv; \\
160167
awk 'NR>1{print}' final_outputs/taxonomy-and-counts${params.assay_suffix}.tsv | sort -V -k1) \\
161168
> temp_tax_cont.tsv && mv temp_tax_cont.tsv final_outputs/taxonomy-and-counts${params.assay_suffix}.tsv
169+
170+
R --vanilla --version |grep "R version" > versions.txt
171+
get_R_package_version.R
162172
"""
163173
}

0 commit comments

Comments
 (0)