nasa
diff --git a/‎Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/bin/edit_create_runsheet.py
Lines changed: 521 additions & 0 deletions b/‎Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/bin/edit_create_runsheet.py
Lines changed: 521 additions & 0 deletions
diff --git a/‎Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/bin/get_R_package_version.R
Lines changed: 11 additions & 0 deletions b/‎Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/bin/get_R_package_version.R
Lines changed: 11 additions & 0 deletions
diff --git a/‎Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/bin/get_Rvisualization_package_version.R
Lines changed: 17 additions & 0 deletions b/‎Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/bin/get_Rvisualization_package_version.R
Lines changed: 17 additions & 0 deletions
diff --git a/‎Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/main.nf
Lines changed: 47 additions & 18 deletions b/‎Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/main.nf
Lines changed: 47 additions & 18 deletions
diff --git a/‎Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/modules/create_runsheet.nf
Lines changed: 4 additions & 0 deletions b/‎Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/modules/create_runsheet.nf
Lines changed: 4 additions & 0 deletions
diff --git a/‎Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/modules/quality_assessment.nf
Lines changed: 13 additions & 3 deletions b/‎Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/modules/quality_assessment.nf
Lines changed: 13 additions & 3 deletions
diff --git a/‎Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/modules/run_dada.nf
Lines changed: 11 additions & 1 deletion b/‎Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/modules/run_dada.nf
Lines changed: 11 additions & 1 deletion
@@ -0,0 +1,11 @@
+#!/usr/bin/env Rscript
+
+# Get versions
+VERSIONS <-  sprintf("dada2 %s\nDECIPHER %s\nbiomformat %s\n", 
+		     packageVersion("dada2"), 
+		     packageVersion("DECIPHER"), 
+		     packageVersion("biomformat"))
+
+# Write versions to file
+
+write(x= VERSIONS, file="versions.txt", append=TRUE)
@@ -0,0 +1,17 @@
+#!/usr/bin/env Rscript
+
+# Get versions
+VERSIONS <-  sprintf("vegan %s\ntidyverse %s\ndendextend %s\nphyloseq %s\nDESeq2 %s\nggrepel %s\ndplyr %s\nRColorBrewer %s\ngrid %s\n", 
+		     packageVersion("vegan"), 
+		     packageVersion("tidyverse"), 
+		     packageVersion("dendextend"),
+                     packageVersion("phyloseq"),
+                     packageVersion("DESeq2"),
+                     packageVersion("ggrepel"),
+                     packageVersion("dplyr"),
+                     packageVersion("RColorBrewer"),
+                     packageVersion("grid"))
+
+# Write versions to file
+
+write(x= VERSIONS, file="versions.txt", append=TRUE)
@@ -7,7 +7,6 @@ c_bright_green = "\u001b[32;1m";
 c_blue = "\033[0;34m";
 c_reset = "\033[0m";
 
-params.help = false
 /**************************************************
 * HELP MENU  **************************************
 **************************************************/
@@ -16,18 +15,18 @@ if (params.help) {
   println("Nextflow AmpIllumina Consensus Pipeline: $workflow.manifest.version")
   println("USAGE:")
   println("Example 1: Submit and run jobs with slurm in singularity containers.")
-  println("   > nextflow run main.nf -resume -profile slurm_sing --csv_file PE_file.csv --target_region 16S --F_primer AGAGTTTGATCCTGGCTCAG --R_primer CTGCCTCCCGTAGGAGT")
+  println("   > nextflow run main.nf -resume -profile slurm,singularity --csv_file PE_file.csv --target_region 16S --F_primer AGAGTTTGATCCTGGCTCAG --R_primer CTGCCTCCCGTAGGAGT")
   println()
   println("Example 2: : Submit and run jobs with slurm in conda environments.")
-  println("   > nextflow run main.nf -resume -profile slurm_conda --csv_file SE_file.csv --target_region 1TS --F_primer AGAGTTTGATCCTGGCTCAG --R_primer CTGCCTCCCGTAGGAGT")
+  println("   > nextflow run main.nf -resume -profile slurm,conda --csv_file SE_file.csv --target_region 1TS --F_primer AGAGTTTGATCCTGGCTCAG --R_primer CTGCCTCCCGTAGGAGT")
   println()
   println("Example 3: Run jobs locally in conda environments, supplying a GLDS accession, and specifying the path to an existing conda environment")
   println("   > nextflow run main.nf -resume -profile conda --GLDS_accession GLDS-487 --target_region 16S --F_primer AGAGTTTGATCCTGGCTCAG --R_primer CTGCCTCCCGTAGGAGT --conda.qc <path/to/existing/conda/environment>")
   println()
   println("Required arguments:")
-  println("""-profile [STRING] What profile should be used to run the workflow. Options are [singularity, docker, conda, slurm_sing, slurm_conda].
+  println("""-profile [STRING] What profile should be used to run the workflow. Options are [singularity, docker, conda, slurm].
 	         singularity, docker and conda will run the pipelne locally using singularity, docker, and conda, respectively.
-             slurm_sing and slurm_conda will submit and run jobs using slurm in singularity containers and conda environments, respectively. """)			 
+                 To combine profiles, pass them together separated by comma. For example, to run jobs using slurm in singularity containers use 'slurm,singularity' . """)			 
   println("--csv_file  [PATH] A 3-column (single-end) or 4-column (paired-end) input file (sample_id, forward, [reverse,] paired). Mandatory if a GLDS accession is not provided.")
   println(" Please see the files: SE_file.csv and PE_file.csv for single-end and paired-end examples, respectively.")
   println(" The sample_id column should contain unique sample ids.")
@@ -38,22 +37,22 @@ if (params.help) {
   println("PLEASE NOTE: This workflow assumes that all your raw reads end with the same suffix. If they don't please modify your filenames to have the same suffix as shown below.")
   println("--raw_R1_suffix [STRING] Raw forward reads suffix (region following the unique part of the sample names). e.g. _R1_raw.fastq.gz.") 
   println("--raw_R2_suffix [STRING] Raw reverse reads suffix (region following the unique part of the sample names). e.g. _R2_raw.fastq.gz.") 
-  println()
+
   println("Cutadapt (trimming) parameters:")
   println("	    --F_primer [STRING] Forward primer sequence e.g. AGAGTTTGATCCTGGCTCAG. Default: emptry string.")
   println("	    --R_primer [STRING] Reverse primer sequence e.g. CTGCCTCCCGTAGGAGT. Default: emptry string.")
   println("	    --min_cutadapt_len [INTEGER] What should be the minimum read length after quality trimming with cutadapt. Default: 130.")
   println("	    --primers_linked [STRING] Are the primers linked?. https://cutadapt.readthedocs.io/en/stable/recipes.html#trimming-amplicon-primers-from-paired-end-reads. Default: TRUE. ")
   println("	    --discard_untrimmed [STRING] Should untrimmed reads be discarded? Any supplied string except TRUE will not discard them. Default: TRUE.")
-  println()	
+	
   println("Optional arguments:")  
   println("  --help  Print this help message and exit.")
   println("  --publishDir_mode [STRING]  How should nextflow publish file outputs. Options can be found here https://www.nextflow.io/docs/latest/process.html#publishdir. Default: link.")
   println("  --errorStrategy [STRING] How should nextflow handle errors. Options can be found here https://www.nextflow.io/docs/latest/process.html#errorstrategy. Default: terminate")
   println("  --enable_visualizations [BOOLEAN] Should ASV plots be made? true or false. if true supply a path to the ruhnsheet for plotting to the --runsheet option. Default: false.")
   println("  --runsheet [PATH] A 4-column file with these exact headers [Sample Name, read1_path, raw_R1_suffix, groups] for plotting. Only relevant if --enable_visualizations is true. Default: null.") 
   println("  --multiqc_config [PATH] Path to a custome multiqc config file. Default: config/multiqc.config.")
-  println()
+
   println("Dada2 parameters passed to filterAndTrim() function:")
   println("	    --left_trunc [INTEGER] truncate the sequences to the left by this number of bases. Default: 0.") 
   println("	    --right_trunc [INTEGER] truncate the sequences to the right by this number of bases. Default: 0.") 
@@ -63,13 +62,12 @@ if (params.help) {
   println("      This is typically used with primers like 515-926, that captured 18S fragments that are typically too long to merge.")
   println("      Note that 16S and 18S should have been separated already prior to running this workflow. This should likely be left as FALSE for any option other than 18S above.") 	    
   println("	     Values are TRUE or FALSE Default: FALSE.")
-  println()
+
   println("File Suffixes:")
   println("      --primer_trimmed_R1_suffix [STRING] Suffix to use for naming your primer trimmed forward reads. Default: _R1_trimmed.fastq.gz.")
   println("      --primer_trimmed_R2_suffix [STRING] Suffix to use for naming your primer trimmed reverse reads. Default: _R2_trimmed.fastq.gz.")  
   println("      --filtered_R1_suffix [STRING]  Suffix to use for naming your quality filtered forward reads. Default: _R1_filtered.fastq.gz.")
   println("      --filtered_R2_suffix [STRING]  Suffix to use for naming your quality filtered reverse reads. Default: _R2_filtered.fastq.gz.")
-  println()
   println("Output directories:")
   println("      --raw_reads_dir [PATH] Where should the fastqc report of the raw reads be stored. Default: Raw_Sequence_Data/.")
   println("      --fastqc_out_dir [PATH] Where should multiqc outputs be stored. Default: workflow_output/FastQC_Outputs/.")
@@ -78,12 +76,10 @@ if (params.help) {
   println("      --info_out_dir [PATH] Where should output metadata be stored. Default: workflow_output/Metadata/.")
   println("      --plots_dir [PATH] Where should your plots be stored if visualization is enabled. Default: workflow_output/Final_Outputs/Plots/.")
   println("      --final_outputs_dir [PATH] Where should most outputs and summary reports be stored.  Default: workflow_output/Final_Outputs/.")
-  println()
   println("Genelab specific arguements:")
   println("      --GLDS_accession [STRING]  A Genelab accession number if the --csv_file parameter is not set. If this parameter is set, it will ignore the --csv_file parameter.")
   println("      --assay_suffix [STRING]  Genelabs assay suffix. Default: GLAmpSeq.")
   println("      --output_prefix [STRING] Unique name to tag onto output files. Default: empty string.")
-  println()
   println("Paths to existing conda environments to use otherwise a new one will be created using the yaml file in envs/.")
   println("      --conda.qc [PATH] Path to a conda environment containing fastqc, multiqc, zip and python. Default: null.")
   println("      --conda.R [PATH] Path to a conda environment containing R along with the packages decipher and biomformat installed. Default: null.")
@@ -94,11 +90,12 @@ if (params.help) {
   exit 0
   }
 
+
+if(params.debug){
 log.info """
          Nextflow AmpIllumina Consensus Pipeline: $workflow.manifest.version
          
          You have set the following parameters:
-         Profile: ${workflow.profile}
          Input csv file : ${params.csv_file}
          GLDS_accession : ${params.GLDS_accession}
          Amplicon target region : ${params.target_region}
@@ -151,7 +148,7 @@ log.info """
          cutadapt: ${params.conda.cutadapt}
          R_visualizations: ${params.conda.R_visualizations}
          """.stripIndent()
-
+}
 
 // Create GLDS runsheet
 include { GET_RUNSHEET } from "./modules/create_runsheet.nf"
@@ -178,6 +175,9 @@ def deleteWS(string){
 
 
 workflow {
+    
+    // Capture software versions
+    software_versions_ch = Channel.empty()
 
    if(params.GLDS_accession){
 
@@ -198,6 +198,7 @@ workflow {
                            row -> "${row.data_type}" == "PE" ? ["${row.raw_R1_suffix}", "${row.raw_R2_suffix}"] : ["${row.raw_R1_suffix}"] 
                            }.first() 
 
+      GET_RUNSHEET.out.version | mix(software_versions_ch) | set{software_versions_ch}
 
    }else{
 
@@ -207,8 +208,8 @@ workflow {
    }
 
     file_ch.map{
-                     row -> deleteWS(row.paired)  == 'true' ? tuple( "${row.sample_id}", [file("${row.forward}"), file("${row.reverse}")], deleteWS(row.paired)) : 
-                                         tuple( "${row.sample_id}", [file("${row.forward}")], deleteWS(row.paired))
+                     row -> deleteWS(row.paired)  == 'true' ? tuple( "${row.sample_id}", [file("${row.forward}", checkIfExists: true), file("${row.reverse}", checkIfExists: true)], deleteWS(row.paired)) : 
+                                         tuple( "${row.sample_id}", [file("${row.forward}", checkIfExists: true)], deleteWS(row.paired))
                 }.set{reads_ch} 
 
     // Generating a file with sample ids on a new line
@@ -217,9 +218,14 @@ workflow {
               .set{sample_ids_ch}
 
     // Read quality check and trimming
-    raw_fastqc_files = RAW_FASTQC(reads_ch).flatten().collect()
+    RAW_FASTQC(reads_ch)
+    raw_fastqc_files = RAW_FASTQC.out.html.flatten().collect()
+
     RAW_MULTIQC("raw", params.multiqc_config,raw_fastqc_files)
 
+    RAW_FASTQC.out.version | mix(software_versions_ch) | set{software_versions_ch}
+    RAW_MULTIQC.out.version | mix(software_versions_ch) | set{software_versions_ch}
+
     if(params.trim_primers){
 
         if(!params.GLDS_accession) primers_ch = Channel.value([params.F_primer, params.R_primer])
@@ -231,7 +237,8 @@ workflow {
                                               }.flatten().collect()
 
         COMBINE_CUTADAPT_LOGS_AND_SUMMARIZE(counts, logs)
-        trimmed_fastqc_files = TRIMMED_FASTQC(CUTADAPT.out.reads).flatten().collect()
+        TRIMMED_FASTQC(CUTADAPT.out.reads)
+        trimmed_fastqc_files = TRIMMED_FASTQC.out.html.flatten().collect()
         TRIMMED_MULTIQC("filtered", params.multiqc_config, trimmed_fastqc_files)
 
         isPaired_ch = CUTADAPT.out.reads.map{ 
@@ -248,6 +255,11 @@ workflow {
         dada_taxonomy = RUN_R_TRIM.out.taxonomy
         dada_biom = RUN_R_TRIM.out.biom
 
+        CUTADAPT.out.version | mix(software_versions_ch) | set{software_versions_ch}
+        TRIMMED_FASTQC.out.version | mix(software_versions_ch) | set{software_versions_ch}
+        TRIMMED_MULTIQC.out.version | mix(software_versions_ch) | set{software_versions_ch}
+        RUN_R_TRIM.out.version | mix(software_versions_ch) | set{software_versions_ch}
+
     }else{
 
         raw_reads_ch = reads_ch.map{
@@ -270,19 +282,36 @@ workflow {
         dada_taxonomy = RUN_R_NOTRIM.out.taxonomy
         dada_biom = RUN_R_NOTRIM.out.biom
 
+        RUN_R_NOTRIM.out.version | mix(software_versions_ch) | set{software_versions_ch}
+
     }
 
 
     // Zip biom file
     ZIP_BIOM(dada_biom)
 
+    ZIP_BIOM.out.version | mix(software_versions_ch) | set{software_versions_ch}
+
     if(params.enable_visualizations){
         // Visualize
         runsheet = params.GLDS_accession ? GET_RUNSHEET.out.runsheet : params.runsheet
         R_VISUALIZATION(runsheet, sample_ids_ch, dada_counts, dada_taxonomy)
+        R_VISUALIZATION.out.version | mix(software_versions_ch) | set{software_versions_ch}
 
     }
 
+        // Software Version Capturing - combining all captured sofware versions
+     nf_version = "Nextflow Version:".concat("${nextflow.version}\n<><><>\n")
+     nextflow_version_ch = Channel.value(nf_version)
+
+     //  Write software versions to file
+     software_versions_ch | map { it.text + "\n<><><>\n"}
+                          | unique
+                          | mix(nextflow_version_ch)
+                          | collectFile(name: "${params.metadata_dir}/software_versions.txt", newLine: true, cache: false)
+                          | set{final_software_versions_ch}
+
+
 }
 
 workflow.onComplete {
 
@@ -13,10 +13,14 @@ process GET_RUNSHEET {
         path("*.zip"), emit: zip
         path("GLparams_file.csv"), emit: params_file
         path("GLfile.csv"), emit: input_file
+        path("versions.txt"), emit: version
 
     script:
         """
         create_runsheet.py --OSD ${params.GLDS_accession} --target ${params.target_region}
+        GL-version | grep "GeneLab utils"| sed -E 's/^\\s+//' > versions.txt
+        echo "dptools v1.3.4" >> versions.txt
+        python --version >> versions.txt 
         """
 }
 
 
@@ -20,12 +20,15 @@ process FASTQC {
     input:
         tuple val(sample_id), path(reads), val(isPaired)
     output:
-        tuple path("*.html"), path("*.zip")
+        tuple path("*.html"), path("*.zip"), emit: html
+        path("versions.txt"), emit: version
     script:
         """
         fastqc -o . \\
         -t ${task.cpus} \\
         ${reads}
+
+        fastqc --version > versions.txt
         """
 }
 
@@ -39,7 +42,8 @@ process MULTIQC {
         path(multiqc_config)
         path(files)
     output:
-        path("${prefix}_multiqc${params.assay_suffix}_report.zip")
+        path("${prefix}_multiqc${params.assay_suffix}_report.zip"), emit: report
+        path("versions.txt"), emit: version
     script:
         """
         multiqc -q --filename ${prefix}_multiqc \\
@@ -50,6 +54,7 @@ process MULTIQC {
         # Zipping
         zip -q -r ${prefix}_multiqc${params.assay_suffix}_report.zip ${prefix}_multiqc_report
 
+        multiqc --version > versions.txt
         """
   }
 
@@ -69,6 +74,7 @@ process CUTADAPT {
         tuple val(sample_id), path("*${params.primer_trimmed_R1_suffix[-5..-1]}"), val(isPaired), emit: reads
         tuple val(sample_id),  path("${sample_id}-cutadapt.log"), emit: logs
         tuple val(sample_id),  path("${sample_id}-trimmed-counts.tsv"), emit: trim_counts
+        path("versions.txt"), emit: version
     script:
     """
     R_primer_comp=`echo ${R_primer} |tr ATGCRYSWKMBVDHN  TACGYRSWMKVBHDN |rev`
@@ -175,6 +181,9 @@ process CUTADAPT {
                   <( grep "Reads written" ${sample_id}-cutadapt.log | tr -s " " "\\t" | cut -f 5 | tr -d "," ) \\
                   > ${sample_id}-trimmed-counts.tsv
     fi
+    
+    VERSION=`cutadapt --version`
+    echo "cutadapt \${VERSION}" > versions.txt
     """
 }
 
@@ -213,7 +222,8 @@ workflow quality_check {
 
 
     main:
-    fastqc_ch = FASTQC(reads_ch).flatten().collect()
+        FASTQC(reads_ch)
+    fastqc_ch = FASTQC.out.html.flatten().collect()
     MULTIQC(prefix_ch, multiqc_config, fastqc_ch)
 }
 
 
@@ -16,12 +16,14 @@ process RUN_R_TRIM {
         path(trimmed_read_counts)
     output:
         path("Filtered_Sequence_Data/*${params.filtered_R1_suffix[-5..-1]}"), emit: reads
+        path("Filtered_Sequence_Data/filtered-read-counts${params.assay_suffix}.tsv"), emit: filtered_count
         path("final_outputs/taxonomy${params.assay_suffix}.tsv"), emit: taxonomy
         path("final_outputs/taxonomy-and-counts${params.assay_suffix}.biom"), emit: biom
         path("final_outputs/ASVs${params.assay_suffix}.fasta"), emit: fasta
         path("final_outputs/read-count-tracking${params.assay_suffix}.tsv"), emit: read_count
         path("final_outputs/counts${params.assay_suffix}.tsv"), emit: counts
         path("final_outputs/taxonomy-and-counts${params.assay_suffix}.tsv"), emit: taxonomy_count
+        path("versions.txt"), emit: version
     script:
 
         """
@@ -82,6 +84,9 @@ process RUN_R_TRIM {
         (head -n 1 final_outputs/taxonomy-and-counts${params.assay_suffix}.tsv; \\
             awk 'NR>1{print}' final_outputs/taxonomy-and-counts${params.assay_suffix}.tsv | sort -V -k1) \\
             > temp_tax_cont.tsv && mv temp_tax_cont.tsv final_outputs/taxonomy-and-counts${params.assay_suffix}.tsv
+
+        R --vanilla --version  |grep "R version" > versions.txt
+        get_R_package_version.R
         """
 
 }
@@ -99,12 +104,14 @@ process RUN_R_NOTRIM {
         val(raw_read_suffix) //[R1,R2] or [R1]
     output:
         path("Filtered_Sequence_Data/*${params.filtered_R1_suffix[-5..-1]}"), emit: reads
+        path("Filtered_Sequence_Data/filtered-read-counts${params.assay_suffix}.tsv"), emit: filtered_count
         path("final_outputs/taxonomy${params.assay_suffix}.tsv"), emit: taxonomy
         path("final_outputs/taxonomy-and-counts${params.assay_suffix}.biom"), emit: biom
         path("final_outputs/ASVs${params.assay_suffix}.fasta"), emit: fasta
         path("final_outputs/read-count-tracking${params.assay_suffix}.tsv"), emit: read_count
         path("final_outputs/counts${params.assay_suffix}.tsv"), emit: counts
-        path("final_outputs/taxonomy-and-counts${params.assay_suffix}.tsv"), emit: taxonomy_count  
+        path("final_outputs/taxonomy-and-counts${params.assay_suffix}.tsv"), emit: taxonomy_count
+        path("versions.txt"), emit: version  
     script:
         """
         if [ ${isPaired} == true ]; then
@@ -159,5 +166,8 @@ process RUN_R_NOTRIM {
         (head -n 1 final_outputs/taxonomy-and-counts${params.assay_suffix}.tsv; \\
             awk 'NR>1{print}' final_outputs/taxonomy-and-counts${params.assay_suffix}.tsv | sort -V -k1) \\
             > temp_tax_cont.tsv && mv temp_tax_cont.tsv final_outputs/taxonomy-and-counts${params.assay_suffix}.tsv
+        
+         R --vanilla --version  |grep "R version" > versions.txt
+         get_R_package_version.R
         """
 }