Skip to content

Commit 9fc0765

Browse files
committed
Added figshare links and documentation
1 parent 0b779ec commit 9fc0765

File tree

4 files changed

+60
-65
lines changed

4 files changed

+60
-65
lines changed

Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/bin/Illumina-PE-R-processing.R

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
## Developed by Michael D. Lee (Mike.Lee@nasa.gov) ##
55
##################################################################################
66

7-
# as called from the associated Snakefile, this expects to be run as: Rscript full-R-processing.R <left_trunc> <right_trunc> <left_maxEE> <right_maxEE> <TRUE/FALSE - GL trimmed primers or not> <unique-sample-IDs-file> <starting_reads_dir_for_R> <filtered_reads_dir> <input_file_R1_suffix> <input_file_R2_suffix> <filtered_filename_R1_suffix> <filtered_filename_R2_suffix> <final_outputs_directory> <output_prefix> <target_region> <concatenate_reads_only> <assay_suffix>
7+
# as called from the associated process, this expects to be run as: Rscript full-R-processing.R <left_trunc> <right_trunc> <left_maxEE> <right_maxEE> <TRUE/FALSE - GL trimmed primers or not> <unique-sample-IDs-file> <starting_reads_dir_for_R> <filtered_reads_dir> <input_file_R1_suffix> <input_file_R2_suffix> <filtered_filename_R1_suffix> <filtered_filename_R2_suffix> <final_outputs_directory> <output_prefix> <target_region> <concatenate_reads_only> <assay_suffix>
88
# where <left_trim> and <right_trim> are the values to be passed to the truncLen parameter of dada2's filterAndTrim()
99
# and <left_maxEE> and <right_maxEE> are the values to be passed to the maxEE parameter of dada2's filterAndTrim()
1010

@@ -161,8 +161,7 @@ dna <- DNAStringSet(getSequences(seqtab.nochim))
161161
# downloading reference R taxonomy object (at some point this will be stored somewhere on GeneLab's server and we won't download it, but should leave the code here, just commented out)
162162
cat("\n\n Downloading reference database...\n\n")
163163
if ( target_region == "16S" ) {
164-
#download.file("http://www2.decipher.codes/Classification/TrainingSets/SILVA_SSU_r138_2019.RData", "SILVA_SSU_r138_2019.RData")
165-
download.file("https://figshare.com/ndownloader/files/23739737", "SILVA_SSU_r138_2019.RData")
164+
download.file("https://figshare.com/ndownloader/files/46245217", "SILVA_SSU_r138_2019.RData")
166165
# loading reference taxonomy object
167166
load("SILVA_SSU_r138_2019.RData")
168167
# removing downloaded file
@@ -171,22 +170,22 @@ if ( target_region == "16S" ) {
171170

172171
} else if (target_region == "ITS" ) {
173172

174-
download.file("http://www2.decipher.codes/Classification/TrainingSets/UNITE_v2023_July2023.RData", "UNITE_v2023_July2023.RData")
173+
download.file("https://figshare.com/ndownloader/files/46245586", "UNITE_v2020_February2020.RData")
175174
# loading reference taxonomy object
176-
load("UNITE_v2023_July2023.RData")
175+
load("UNITE_v2020_February2020.RData")
177176
# removing downloaded file
178-
file.remove("UNITE_v2023_July2023.RData")
177+
#file.remove("UNITE_v2020_February2020.RData")
179178

180179
ranks <- c("kingdom", "phylum", "class", "order", "family", "genus", "species")
181180

182181
} else if (target_region == "18S" ) {
183182

184-
download.file("http://www2.decipher.codes/Classification/TrainingSets/PR2_v4_13_March2021.RData", "PR2_v4_13_March2021.RData")
183+
download.file("https://figshare.com/ndownloader/files/46241917", "PR2_v4_13_March2021.RData")
185184
# https://github.com/pr2database/pr2database/releases/download/v4.14.0/pr2_version_4.14.0_SSU.decipher.trained.rds
186185
# loading reference taxonomy object
187186
load("PR2_v4_13_March2021.RData")
188187
# removing downloaded file
189-
file.remove("PR2_v4_13_March2021.RData")
188+
#file.remove("PR2_v4_13_March2021.RData")
190189

191190
ranks <- c("kingdom", "division", "phylum", "class", "order", "family", "genus", "species")
192191

Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/bin/Illumina-SE-R-processing.R

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
## Developed by Michael D. Lee (Mike.Lee@nasa.gov) ##
66
##################################################################################
77

8-
# as called from the associated Snakefile, this expects to be run as: Rscript full-R-processing.R <left_trunc> <left_maxEE> <TRUE/FALSE - GL trimmed primers or not> <unique-sample-IDs-file> <starting_reads_dir_for_R> <filtered_reads_dir> <input_file_R1_suffix> <filtered_filename_R1_suffix> <final_outputs_directory> <output_prefix> <target_region> <assay_suffix>
8+
# as called from the associated process, this expects to be run as: Rscript full-R-processing.R <left_trunc> <left_maxEE> <TRUE/FALSE - GL trimmed primers or not> <unique-sample-IDs-file> <starting_reads_dir_for_R> <filtered_reads_dir> <input_file_R1_suffix> <filtered_filename_R1_suffix> <final_outputs_directory> <output_prefix> <target_region> <assay_suffix>
99
# where <left_trim> is the value to be passed to the truncLen parameter of dada2's filterAndTrim()
1010
# and <left_maxEE> is the value to be passed to the maxEE parameter of dada2's filterAndTrim()
1111

@@ -130,8 +130,7 @@ dna <- DNAStringSet(getSequences(seqtab.nochim))
130130
# downloading reference R taxonomy object (at some point this will be stored somewhere on GeneLab's server and we won't download it, but should leave the code here, just commented out)
131131
cat("\n\n Downloading reference database...\n\n")
132132
if ( target_region == "16S" ) {
133-
#download.file("http://www2.decipher.codes/Classification/TrainingSets/SILVA_SSU_r138_2019.RData", "SILVA_SSU_r138_2019.RData")
134-
download.file("https://figshare.com/ndownloader/files/23739737", "SILVA_SSU_r138_2019.RData")
133+
download.file("https://figshare.com/ndownloader/files/46245217", "SILVA_SSU_r138_2019.RData")
135134
# loading reference taxonomy object
136135
load("SILVA_SSU_r138_2019.RData")
137136
# removing downloaded file
@@ -140,22 +139,22 @@ if ( target_region == "16S" ) {
140139

141140
} else if (target_region == "ITS" ) {
142141

143-
download.file("http://www2.decipher.codes/Classification/TrainingSets/UNITE_v2023_July2023.RData", "UNITE_v2023_July2023.RData")
142+
download.file("https://figshare.com/ndownloader/files/46245586", "UNITE_v2020_February2020.RData")
144143
# loading reference taxonomy object
145-
load("UNITE_v2023_July2023.RData")
144+
load("UNITE_v2020_February2020.RData")
146145
# removing downloaded file
147-
file.remove("UNITE_v2023_July2023.RData")
146+
#file.remove("UNITE_v2020_February2020.RData")
148147

149148
ranks <- c("kingdom", "phylum", "class", "order", "family", "genus", "species")
150149

151150
} else if (target_region == "18S" ) {
152151

153-
download.file("http://www2.decipher.codes/Classification/TrainingSets/PR2_v4_13_March2021.RData", "PR2_v4_13_March2021.RData")
152+
download.file("https://figshare.com/ndownloader/files/46241917", "PR2_v4_13_March2021.RData")
153+
# https://github.com/pr2database/pr2database/releases/download/v4.14.0/pr2_version_4.14.0_SSU.decipher.trained.rds
154154
# loading reference taxonomy object
155155
load("PR2_v4_13_March2021.RData")
156156
# removing downloaded file
157-
file.remove("PR2_v4_13_March2021.RData")
158-
157+
#file.remove("PR2_v4_13_March2021.RData")
159158
ranks <- c("kingdom", "division", "phylum", "class", "order", "family", "genus", "species")
160159

161160
} else {

Amplicon/Illumina/Workflow_Documentation/SW_AmpIllumina-B/workflow_code/main.nf

Lines changed: 38 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -20,72 +20,73 @@ if (params.help) {
2020
println("Example 2: : Submit and run jobs with slurm in conda environments.")
2121
println(" > nextflow run main.nf -resume -profile slurm_conda --csv_file SE_file.csv --target_region 1TS --F_primer AGAGTTTGATCCTGGCTCAG --R_primer CTGCCTCCCGTAGGAGT")
2222
println()
23-
println("Example 3: Run jobs locally in conda environments, supplying a GLDS accession, and specify the path to an existing conda environment")
24-
println(" > nextflow run main.nf -resume -profile conda --GLDS_accession OSD-256 --target_region 18S --F_primer AGAGTTTGATCCTGGCTCAG --R_primer CTGCCTCCCGTAGGAGT --conda.qc <path/to/existing/conda/environment>")
23+
println("Example 3: Run jobs locally in conda environments, supplying a GLDS accession, and specifying the path to an existing conda environment")
24+
println(" > nextflow run main.nf -resume -profile conda --GLDS_accession GLDS-487 --target_region 16S --F_primer AGAGTTTGATCCTGGCTCAG --R_primer CTGCCTCCCGTAGGAGT --conda.qc <path/to/existing/conda/environment>")
2525
println()
2626
println("Required arguments:")
27-
println("""-profile [STRING] What profile should be used be use to run the workflow. Options are [singularity, docker, conda, slurm_sing, slurm_conda].
27+
println("""-profile [STRING] What profile should be used to run the workflow. Options are [singularity, docker, conda, slurm_sing, slurm_conda].
2828
singularity, docker and conda will run the pipelne locally using singularity, docker, and conda, respectively.
2929
slurm_sing and slurm_conda will submit and run jobs using slurm in singularity containers and conda environments, respectively. """)
3030
println("--csv_file [PATH] A 3-column (single-end) or 4-column (paired-end) input file (sample_id, forward, [reverse,] paired). Mandatory if a GLDS accession is not provided.")
3131
println(" Please see the files: SE_file.csv and PE_file.csv for single-end and paired-end examples, respectively.")
3232
println(" The sample_id column should contain unique sample ids.")
3333
println(" The forward and reverse columns should contain the absolute or relative path to the sample's forward and reverse reads.")
3434
println(" The paired column should be true for paired-end or anything else for single-end reads.")
35-
println("--target_region [STRING] What is the amplicon target region to be analyzed. Options are one of [16S, 18S, ITS]. Default: 16S")
36-
println("--trim_primers [BOOLEAN] Should primers be trimmed? true or false. Default: true")
37-
println("--raw_R1_suffix [STRING] Raw forward reads suffix (region following the unique part of the sample names). e.g. _R1_raw.fastq.gz")
38-
println("--raw_R2_suffix [STRING] Raw reverse reads suffix (region following the unique part of the sample names). e.g. _R2_raw.fastq.gz")
35+
println("--target_region [STRING] What is the amplicon target region to be analyzed. Options are one of [16S, 18S, ITS]. Default: 16S.")
36+
println("--trim_primers [BOOLEAN] Should primers be trimmed? true or false. Default: true.")
37+
println("PLEASE NOTE: This workflow assumes that all your raw reads end with the same suffix. If they don't please modify your filenames to have the same suffix as shown below.")
38+
println("--raw_R1_suffix [STRING] Raw forward reads suffix (region following the unique part of the sample names). e.g. _R1_raw.fastq.gz.")
39+
println("--raw_R2_suffix [STRING] Raw reverse reads suffix (region following the unique part of the sample names). e.g. _R2_raw.fastq.gz.")
3940

4041
println("Cutadapt (trimming) parameters:")
41-
println(" --F_primer [STRING] Forward primer sequence e.g. AGAGTTTGATCCTGGCTCAG")
42-
println(" --R_primer [STRING] Reverse primer sequence e.g. CTGCCTCCCGTAGGAGT")
43-
println(" --min_cutadapt_len [int] What should be the minimum read length after quality trimming with cutadapt. Default: 130")
44-
println(" --primers_linked [STRING] Are the primers linked?. https://cutadapt.readthedocs.io/en/stable/recipes.html#trimming-amplicon-primers-from-paired-end-reads. Default: TRUE ")
45-
println(" --discard_untrimmed [STRING] Should untrimmed reads be discarded? Any supplied string except TRUE will not discard them. Default: TRUE")
42+
println(" --F_primer [STRING] Forward primer sequence e.g. AGAGTTTGATCCTGGCTCAG. Default: emptry string.")
43+
println(" --R_primer [STRING] Reverse primer sequence e.g. CTGCCTCCCGTAGGAGT. Default: emptry string.")
44+
println(" --min_cutadapt_len [INTEGER] What should be the minimum read length after quality trimming with cutadapt. Default: 130.")
45+
println(" --primers_linked [STRING] Are the primers linked?. https://cutadapt.readthedocs.io/en/stable/recipes.html#trimming-amplicon-primers-from-paired-end-reads. Default: TRUE. ")
46+
println(" --discard_untrimmed [STRING] Should untrimmed reads be discarded? Any supplied string except TRUE will not discard them. Default: TRUE.")
4647

4748
println("Optional arguments:")
48-
println(" --help Print this help message and exit")
49-
println(" --publishDir_mode [STRING] How should nextflow publish file outputs. Options can be found here https://www.nextflow.io/docs/latest/process.html#publishdir Default: link.")
50-
println(" --errorStrategy [STRING] How should nextflow handle errors. Options can be found here https://www.nextflow.io/docs/latest/process.html#errorstrategy. Default: ignore")
51-
println(" --enable_visualizations [BOOLEAN] Should ASV plots be made? true or false. if true supply a path to the ruhnsheet for plotting to the --runsheet option. Default: false")
52-
println(" --runsheet [PATH] A 4-column file with these exact headers [ Sample Name, read1_path, raw_R1_suffix, groups] for plotting. Only relevant if --enable_visualizations is true. Default: null")
53-
println(" --multiqc_config [PATH] Path to a custome multiqc config file. Default: config/multiqc.config")
49+
println(" --help Print this help message and exit.")
50+
println(" --publishDir_mode [STRING] How should nextflow publish file outputs. Options can be found here https://www.nextflow.io/docs/latest/process.html#publishdir. Default: link.")
51+
println(" --errorStrategy [STRING] How should nextflow handle errors. Options can be found here https://www.nextflow.io/docs/latest/process.html#errorstrategy. Default: terminate")
52+
println(" --enable_visualizations [BOOLEAN] Should ASV plots be made? true or false. if true supply a path to the ruhnsheet for plotting to the --runsheet option. Default: false.")
53+
println(" --runsheet [PATH] A 4-column file with these exact headers [Sample Name, read1_path, raw_R1_suffix, groups] for plotting. Only relevant if --enable_visualizations is true. Default: null.")
54+
println(" --multiqc_config [PATH] Path to a custome multiqc config file. Default: config/multiqc.config.")
5455

5556
println("Dada2 parameters passed to filterAndTrim() function:")
56-
println(" --left_trunc [INT] truncate the sequences to the left by this number of bases. Default: 0")
57-
println(" --right_trunc [INT] truncate the sequences to the right by this number of bases. Default: 0")
58-
println(" --left_maxEE [INT] Maximum allowed errors to the left. Default: 1")
59-
println(" --right_maxEE [INT] Maximum allowed errors to the right. Default: 1")
57+
println(" --left_trunc [INTEGER] truncate the sequences to the left by this number of bases. Default: 0.")
58+
println(" --right_trunc [INTEGER] truncate the sequences to the right by this number of bases. Default: 0.")
59+
println(" --left_maxEE [INTEGER] Maximum allowed errors to the left. Default: 1.")
60+
println(" --right_maxEE [INTEGER] Maximum allowed errors to the right. Default: 1.")
6061
println(" --concatenate_reads_only [STRING] Concatenate only with dada2 instead of merging paired reads if TRUE.")
6162
println(" This is typically used with primers like 515-926, that captured 18S fragments that are typically too long to merge.")
62-
println(" Note that 16S and 18S should have been separated already prior to running this workflow. This should likely be left as FALSE for any option other than 18S above")
63-
println(" Values are TRUE or FALSE Default: FALSE")
63+
println(" Note that 16S and 18S should have been separated already prior to running this workflow. This should likely be left as FALSE for any option other than 18S above.")
64+
println(" Values are TRUE or FALSE Default: FALSE.")
6465

6566
println("File Suffixes:")
66-
println(" --primer_trimmed_R1_suffix [STRING] Suffix to use for naming your primer trimmed forward reads. Default: _R1_trimmed.fastq.gz")
67-
println(" --primer_trimmed_R2_suffix [STRING] Suffix to use for naming your primer trimmed reverse reads. Default: _R2_trimmed.fastq.gz")
68-
println(" --filtered_R1_suffix [STRING] Suffix to use for naming your quality filtered forward reads. Default: _R1_filtered.fastq.gz")
69-
println(" --filtered_R2_suffix [STRING] Suffix to use for naming your quality filtered reverse reads. Default: _R2_filtered.fastq.gz")
67+
println(" --primer_trimmed_R1_suffix [STRING] Suffix to use for naming your primer trimmed forward reads. Default: _R1_trimmed.fastq.gz.")
68+
println(" --primer_trimmed_R2_suffix [STRING] Suffix to use for naming your primer trimmed reverse reads. Default: _R2_trimmed.fastq.gz.")
69+
println(" --filtered_R1_suffix [STRING] Suffix to use for naming your quality filtered forward reads. Default: _R1_filtered.fastq.gz.")
70+
println(" --filtered_R2_suffix [STRING] Suffix to use for naming your quality filtered reverse reads. Default: _R2_filtered.fastq.gz.")
7071
println("Output directories:")
71-
println(" --raw_reads_dir [PATH] Where should the fastqc report of the raw reads be stored. Default: Raw_Sequence_Data/")
72-
println(" --fastqc_out_dir [PATH] Where should multiqc outputs be stored. Default: workflow_output/FastQC_Outputs/")
73-
println(" --trimmed_reads_dir [PATH] Where should your cutadapt trimmed reads be stored. Default: workflow_output/Trimmed_Sequence_Data/")
74-
println(" --filtered_reads_dir [PATH] Where should your filtered reads be stored. Default: workflow_output/Filtered_Sequence_Data/")
75-
println(" --info_out_dir [PATH] Where should output metadata be stored. Default: workflow_output/Metadata/")
76-
println(" --plots_dir [PATH] Where should your plots be stored if visualization is enabled. Default: workflow_output/Final_Outputs/Plots/")
77-
println(" --final_outputs_dir [PATH] Where should most outputs and summary reports be stored. Default: workflow_output/Final_Outputs/")
72+
println(" --raw_reads_dir [PATH] Where should the fastqc report of the raw reads be stored. Default: Raw_Sequence_Data/.")
73+
println(" --fastqc_out_dir [PATH] Where should multiqc outputs be stored. Default: workflow_output/FastQC_Outputs/.")
74+
println(" --trimmed_reads_dir [PATH] Where should your cutadapt trimmed reads be stored. Default: workflow_output/Trimmed_Sequence_Data/.")
75+
println(" --filtered_reads_dir [PATH] Where should your filtered reads be stored. Default: workflow_output/Filtered_Sequence_Data/.")
76+
println(" --info_out_dir [PATH] Where should output metadata be stored. Default: workflow_output/Metadata/.")
77+
println(" --plots_dir [PATH] Where should your plots be stored if visualization is enabled. Default: workflow_output/Final_Outputs/Plots/.")
78+
println(" --final_outputs_dir [PATH] Where should most outputs and summary reports be stored. Default: workflow_output/Final_Outputs/.")
7879
println("Genelab specific arguements:")
7980
println(" --GLDS_accession [STRING] A Genelab accession number if the --csv_file parameter is not set. If this parameter is set, it will ignore the --csv_file parameter.")
8081
println(" --assay_suffix [STRING] Genelabs assay suffix. Default: GLAmpSeq.")
81-
println(" --output_prefix [STRING] Unique name to tag onto output files. Default: ''")
82+
println(" --output_prefix [STRING] Unique name to tag onto output files. Default: empty string.")
8283
println("Paths to existing conda environments to use otherwise a new one will be created using the yaml file in envs/.")
8384
println(" --conda.qc [PATH] Path to a conda environment containing fastqc, multiqc, zip and python. Default: null.")
8485
println(" --conda.R [PATH] Path to a conda environment containing R along with the packages decipher and biomformat installed. Default: null.")
8586
println(" --conda.genelab [PATH] Path to a conda environment containing genlab-utils. Default: null.")
8687
println(" --conda.cutadapt [PATH] Path to a conda environment containing cutadapt. Default: null.")
8788
println(" --conda.R_visualizations [PATH] Path to a conda environment containing R packages required for plotting. Default: null.")
88-
print("Advanced users can edit the nextflow.config file for more control over default settings such container choice, number cpus, memory per task etc.")
89+
print("Advanced users can edit the nextflow.config file for more control over default settings such container choice, number of cpus, memory per task etc.")
8990
exit 0
9091
}
9192

0 commit comments

Comments
 (0)