nasa
diff --git a/‎Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/PE_file.csv
Lines changed: 2 additions & 2 deletions b/‎Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/PE_file.csv
Lines changed: 2 additions & 2 deletions
diff --git a/‎Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/SE_file.csv
Lines changed: 2 additions & 2 deletions b/‎Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/SE_file.csv
Lines changed: 2 additions & 2 deletions
diff --git a/‎Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/main.nf
Lines changed: 141 additions & 6 deletions b/‎Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/main.nf
Lines changed: 141 additions & 6 deletions
@@ -1,3 +1,3 @@
 sample_id,forward,reverse,paired
-Sample-1,/global/data/temp_scratch/oobayomi/amplicon/nextflow_test/example-amplicon-reads/Sample-1_R1_raw.fastq.gz,/global/data/temp_scratch/oobayomi/amplicon/nextflow_test/example-amplicon-reads/Sample-1_R2_raw.fastq.gz,true
-Sample-2,/global/data/temp_scratch/oobayomi/amplicon/nextflow_test/example-amplicon-reads/Sample-2_R1_raw.fastq.gz,/global/data/temp_scratch/oobayomi/amplicon/nextflow_test/example-amplicon-reads/Sample-2_R2_raw.fastq.gz,true
+Sample-1,/path/to/raw-reads/Sample-1_R1_raw.fastq.gz,/path/to/raw-reads/Sample-1_R2_raw.fastq.gz,true
+Sample-2,/path/to/raw-reads/Sample-2_R1_raw.fastq.gz,/path/to/raw-reads/Sample-2_R2_raw.fastq.gz,true
@@ -1,3 +1,3 @@
 sample_id,forward,paired
-Sample-1,/global/data/temp_scratch/oobayomi/amplicon/nextflow_test/example-amplicon-reads/Sample-1_R1_raw.fastq.gz,false
-Sample-2,/global/data/temp_scratch/oobayomi/amplicon/nextflow_test/example-amplicon-reads/Sample-2_R1_raw.fastq.gz,false
+Sample-1,/path/to/raw-reads/Sample-1_R1_raw.fastq.gz,false
+Sample-2,/path/to/raw-reads/Sample-2_R1_raw.fastq.gz,false
@@ -7,10 +7,147 @@ c_bright_green = "\u001b[32;1m";
 c_blue = "\033[0;34m";
 c_reset = "\033[0m";
 
-
-
-// create GLD runsheet
-
+/**************************************************
+* HELP MENU  **************************************
+**************************************************/
+if (params.help) {
+  println()
+  println("Nextflow Amp454IonTor Consensus Pipeline: $workflow.manifest.version")
+  println("USAGE:")
+  println("Example 1: Submit and run jobs with slurm in singularity containers.")
+  println("   > nextflow run main.nf -resume -profile slurm_sing --csv_file PE_file.csv --target_region 16S --F_primer AGAGTTTGATCCTGGCTCAG --R_primer CTGCCTCCCGTAGGAGT")
+  println()
+  println("Example 2: : Submit and run jobs with slurm in conda environments.")
+  println("   > nextflow run main.nf -resume -profile slurm_conda --csv_file SE_file.csv --target_region 1TS --F_primer AGAGTTTGATCCTGGCTCAG --R_primer CTGCCTCCCGTAGGAGT")
+  println()
+  println("Example 3: Run jobs locally in conda environments, supplying a GLDS accession, and specify the path to an existing conda environment")
+  println("   > nextflow run main.nf -resume -profile conda --GLDS_accession OSD-256 --target_region 18S --F_primer AGAGTTTGATCCTGGCTCAG --R_primer CTGCCTCCCGTAGGAGT --conda.qc <path/to/existing/conda/environment>")
+  println()
+  println("Required arguments:")
+  println("""-profile [STRING] What profile should be used be use to run the workflow. Options are [singularity, docker, conda, slurm_sing, slurm_conda].
+	         singularity, docker and conda will run the pipelne locally using singularity, docker, and conda, respectively.
+             slurm_sing and slurm_conda will submit and run jobs using slurm in singularity containers and conda environments, respectively. """)			 
+  println("--csv_file  [PATH] A 3-column (single-end) or 4-column (paired-end) input file (sample_id, forward, [reverse,] paired). Mandatory if a GLDS accession is not provided.")
+  println(" Please see the files: SE_file.csv and PE_file.csv for single-end and paired-end examples, respectively.")
+  println(" The sample_id column should contain unique sample ids.")
+  println(" The forward and reverse columns should contain the absolute or relative path to the sample's forward and reverse reads.")
+  println(" The paired column should be true for paired-end or anything else for single-end reads.")
+  println("--target_region [STRING] What is the amplicon target region to be analyzed. Options are one of [16S, 18S, ITS]. Default: 16S")
+  println("--trim_primers [BOOLEAN] Should primers be trimmed? true or false. Default: true") 
+  println("--raw_R1_suffix [STRING] Raw forward reads suffix (region following the unique part of the sample names). e.g. _R1_raw.fastq.gz") 
+  println("--raw_R2_suffix [STRING] Raw reverse reads suffix (region following the unique part of the sample names). e.g. _R2_raw.fastq.gz") 
+
+  println("Cutadapt (trimming) parameters:")
+  println("	    --F_primer [STRING] Forward primer sequence e.g. AGAGTTTGATCCTGGCTCAG")
+  println("	    --R_primer [STRING] Reverse primer sequence e.g. CTGCCTCCCGTAGGAGT")
+  println("	    --min_cutadapt_len [int] What should be the minimum read length after quality trimming with cutadapt. Default: 130")
+  println("	    --primers_linked [STRING] Are the primers linked?. https://cutadapt.readthedocs.io/en/stable/recipes.html#trimming-amplicon-primers-from-paired-end-reads. Default: TRUE ")
+  println("	    --discard_untrimmed [STRING] Should untrimmed reads be discarded? Any supplied string except TRUE will not discard them. Default: TRUE")
+	
+  println("Optional arguments:")  
+  println("  --help  Print this help message and exit")
+  println("  --publishDir_mode [STRING]  How should nextflow publish file outputs. Options can be found here https://www.nextflow.io/docs/latest/process.html#publishdir Default: link.")
+  println("  --errorStrategy [STRING] How should nextflow handle errors. Options can be found here https://www.nextflow.io/docs/latest/process.html#errorstrategy. Default: ignore")
+  println("  --enable_visualizations [BOOLEAN] Should ASV plots be made? true or false. if true supply a path to the ruhnsheet for plotting to the --runsheet option. Default: false")
+  println("  --runsheet [PATH] A 4-column file with these exact headers [ Sample Name, read1_path, raw_R1_suffix, groups] for plotting. Only relevant if --enable_visualizations is true. Default: null") 
+  println("  --multiqc_config [PATH] Path to a custome multiqc config file. Default: config/multiqc.config")
+
+  println("Dada2 parameters passed to filterAndTrim() function:")
+  println("	    --left_trunc [INT] truncate the sequences to the left by this number of bases. Default: 0") 
+  println("	    --right_trunc [INT] truncate the sequences to the right by this number of bases. Default: 0") 
+  println("	    --left_maxEE [INT] Maximum allowed errors to the left. Default: 1")
+  println("	    --right_maxEE [INT] Maximum allowed errors to the right. Default: 1")
+  println("	    --concatenate_reads_only [STRING] Concatenate only with dada2 instead of merging paired reads if TRUE.")
+  println("      This is typically used with primers like 515-926, that captured 18S fragments that are typically too long to merge.")
+  println("      Note that 16S and 18S should have been separated already prior to running this workflow. This should likely be left as FALSE for any option other than 18S above") 	    
+  println("	     Values are TRUE or FALSE Default: FALSE")
+
+  println("File Suffixes:")
+  println("      --primer_trimmed_R1_suffix [STRING] Suffix to use for naming your primer trimmed forward reads. Default: _R1_trimmed.fastq.gz")
+  println("      --primer_trimmed_R2_suffix [STRING] Suffix to use for naming your primer trimmed reverse reads. Default: _R2_trimmed.fastq.gz")  
+  println("      --filtered_R1_suffix [STRING]  Suffix to use for naming your quality filtered forward reads. Default: _R1_filtered.fastq.gz")
+  println("      --filtered_R2_suffix [STRING]  Suffix to use for naming your quality filtered reverse reads. Default: _R2_filtered.fastq.gz")
+  println("Output directories:")
+  println("      --raw_reads_dir [PATH] Where should the fastqc report of the raw reads be stored. Default: Raw_Sequence_Data/")
+  println("      --fastqc_out_dir [PATH] Where should multiqc outputs be stored. Default: workflow_output/FastQC_Outputs/")
+  println("      --trimmed_reads_dir [PATH] Where should your cutadapt trimmed reads be stored. Default: workflow_output/Trimmed_Sequence_Data/")
+  println("      --filtered_reads_dir [PATH] Where should your filtered reads be stored.  Default: workflow_output/Filtered_Sequence_Data/")
+  println("      --info_out_dir [PATH] Where should output metadata be stored. Default: workflow_output/Metadata/")
+  println("      --plots_dir [PATH] Where should your plots be stored if visualization is enabled. Default: workflow_output/Final_Outputs/Plots/")
+  println("      --final_outputs_dir [PATH] Where should most outputs and summary reports be stored.  Default: workflow_output/Final_Outputs/")
+  println("Genelab specific arguements:")
+  println("      --GLDS_accession [STRING]  A Genelab accession number if the --csv_file parameter is not set. If this parameter is set, it will ignore the --csv_file parameter.")
+  println("      --assay_suffix [STRING]  Genelabs assay suffix. Default: GLAmpSeq.")
+  println("      --output_prefix [STRING] Unique name to tag onto output files. Default: ''")
+  println("Paths to existing conda environments to use otherwise a new one will be created using the yaml file in envs/.")
+  println("      --conda.qc [PATH] Path to a conda environment containing fastqc, multiqc, zip and python. Default: null.")
+  println("      --conda.R [PATH] Path to a conda environment containing R along with the packages decipher and biomformat installed. Default: null.")
+  println("      --conda.genelab  [PATH] Path to a conda environment containing genlab-utils. Default: null.")
+  println("      --conda.cutadapt [PATH] Path to a conda environment containing cutadapt. Default: null.")
+  println("      --conda.R_visualizations [PATH] Path to a conda environment containing R packages required for plotting. Default: null.")
+  print("Advanced users can edit the nextflow.config file for more control over default settings such container choice, number cpus, memory per task etc.")
+  exit 0
+  }
+
+log.info """
+         Nextflow Amp454IonTor Consensus Pipeline: $workflow.manifest.version
+         
+         You have set the following parameters:
+         Input csv file : ${params.csv_file}
+         GLDS_accession : ${params.GLDS_accession}
+         Amplicon target region : ${params.target_region}
+         Nextflow Directory publishing mode: ${params.publishDir_mode}
+         Trim Primers: ${params.trim_primers}
+         Nextflow Error strategy: ${params.errorStrategy}
+         Enable visualization: ${params.enable_visualizations}
+         Runsheet For plotting: ${params.runsheet}
+         MultiQC configuration file: ${params.multiqc_config}
+
+         File Suffixes:
+         Raw Forward Reads Suffix: ${params.raw_R1_suffix}
+         Raw Reverse Reads Suffix: ${params.raw_R2_suffix}
+         Trimmed Forward Reads Suffix: ${params.primer_trimmed_R1_suffix}
+         Trimmed Reverse Reads Suffix: ${params.primer_trimmed_R2_suffix}
+         Filtered Forward Reads Suffix: ${params.filtered_R1_suffix}
+         Filtered Reverse Reads Suffix: ${params.filtered_R2_suffix}
+
+         Cutadapt Parameters:
+         Forward Primer: ${params.F_primer}
+         Reverse Primer: ${params.R_primer}
+         Minimum Trimmed Reads length: ${params.min_cutadapt_len}
+         Primers Are linked: ${params.primers_linked}
+         Discard Untrimmed Reads: ${params.discard_untrimmed}
+
+ 
+         Dada2 Parameters:
+         Truncate left: ${params.left_trunc}bp
+         Truncate right: ${params.right_trunc}bp
+         Max error left: ${params.left_maxEE}
+         Max error right: ${params.right_maxEE}
+         Concatenate Reads: ${params.concatenate_reads_only}
+ 
+         Output Directories:
+         Raw reads: ${params.raw_reads_dir}
+         FastQC: ${params.fastqc_out_dir}
+         Trimmed Reads: ${params.trimmed_reads_dir}
+         Filtered Reads: ${params.filtered_reads_dir}
+         Metadata: ${params.info_out_dir}
+         Plots: ${params.plots_dir}
+         Reports: ${params.final_outputs_dir}
+
+         Genelab Assay Suffix: ${params.assay_suffix}
+         Output Prefix: ${params.output_prefix}
+
+         Conda Environments:
+         qc: ${params.conda.qc}
+         R: ${params.conda.R}
+         genelab: ${params.conda.genelab}
+         cutadapt: ${params.conda.cutadapt}
+         R_visualizations: ${params.conda.R_visualizations}
+         """.stripIndent()
+
+
+// Create GLDS runsheet
 include { GET_RUNSHEET } from "./modules/create_runsheet.nf"
 
 // Read quality check and filtering
@@ -68,8 +205,6 @@ workflow {
                                          tuple( "${row.sample_id}", [file("${row.forward}")], deleteWS(row.paired))
                 }.set{reads_ch} 
 
-     //reads_ch.view()
-     //return
     // Generating a file with sample ids on a new line
     file_ch.map{row -> "${row.sample_id}"}
               .collectFile(name: "${baseDir}/unique-sample-IDs.txt", newLine: true)