Merge pull request #65 from torres-alexis/rna-microarray-release-prep

asaravia-butler · web-flow · commit 0b756c9da376 · 2024-02-29T14:03:42.000-08:00
NF_RCP-F_1.0.4, NF_MAAffymetrix_1.0.3, NF_MAAgilent1ch_1.0.2 release prep
- typo and path fixes
diff --git a/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110/GL-DPPD-7110.md b/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110/GL-DPPD-7110.md
@@ -217,7 +217,7 @@ string_db <- STRINGdb$new(version = "11.5", species = target_taxid, score_thresh
 string_map <- string_db$map(annot, primary_keytype, removeUnmappedRows = FALSE, takeFirst = FALSE)
 
 ## Create a table using the gene IDs of the primary keytype as row names and a column containing STRING IDs. ##
-## For genes containig multiple STRING IDs, combine all STRING IDs for each gene into one row and separate each ID with a '|' ##
+## For genes containing multiple STRING IDs, combine all STRING IDs for each gene into one row and separate each ID with a '|' ##
 tab_with_multiple_STRINGids_combined <-
     data.frame(row.names = annot[[primary_keytype]])
 
@@ -256,7 +256,7 @@ pthOrganisms(PANTHER.db) <- target_organism
 ## Use ENTREZ IDs to map genes to respective PANTHER GO slim annotation(s) ##
 # Note: Since there can be none (indicated in the annotation table as "NA"), one, or 
 # multiple ENTREZ IDs for a gene, this section contains 3 distinct parts to handle
-# each of those scenarios and create a new column in the annotation table containg the GO slim IDs
+# each of those scenarios and create a new column in the annotation table containing the GO slim IDs
 
 for ( curr_row in 1:dim(annot)[1] ) {
 
diff --git a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable/README.md b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable/README.md
@@ -71,7 +71,7 @@ Rscript GL-DPPD-7110_build-genome-annots-tab.R MOUSE
 
 **Input data:**
 
-- No input files required, but a target organism must be specified as a positional command line argument, `MOUSE` is used in the example above. Run `Rscript GL-DPPD-7110_build-genome-annots-tab.R` with no positional arugments to see the list of currently available organisms. 
+- No input files required, but a target organism must be specified as a positional command line argument, `MOUSE` is used in the example above. Run `Rscript GL-DPPD-7110_build-genome-annots-tab.R` with no positional arguments to see the list of currently available organisms. 
 
 **Output data:**
 
diff --git a/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable/workflow_code/GL-DPPD-7110_build-genome-annots-tab.R b/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable/workflow_code/GL-DPPD-7110_build-genome-annots-tab.R
@@ -263,7 +263,7 @@ string_map <- string_db$map(annot, primary_keytype, removeUnmappedRows = FALSE,
 ## Adding some blank lines just for spacing on print-out ##
 cat("\n\n")
 
-## Create a table using the gene IDs of the primary keytype as row names and a column containing STRING IDs. For genes containig multiple STRING IDs, combine all STRING IDs for each gene into one row and separate each ID with a '|' ##
+## Create a table using the gene IDs of the primary keytype as row names and a column containing STRING IDs. For genes containing multiple STRING IDs, combine all STRING IDs for each gene into one row and separate each ID with a '|' ##
 
 tab_with_multiple_STRINGids_combined <-
     data.frame(row.names = annot[[primary_keytype]])
@@ -303,7 +303,7 @@ pthOrganisms(PANTHER.db) <- target_organism
 
 ## Use ENTREZ IDs to map genes to respective PANTHER GO slim annotation(s) ##
 
-## Note: Since there can be none (indicated in the annotation table as "NA"), one, or multiple ENTREZ IDs for a gene, this section contains 3 distinct parts to handle each of those scenarios and create a new column in the annotation table containg the GO slim IDs ## 
+## Note: Since there can be none (indicated in the annotation table as "NA"), one, or multiple ENTREZ IDs for a gene, this section contains 3 distinct parts to handle each of those scenarios and create a new column in the annotation table containing the GO slim IDs ## 
 
 for ( curr_row in 1:dim(annot)[1] ) {
 
diff --git a/Microarray/Affymetrix/Workflow_Documentation/NF_MAAffymetrix/README.md b/Microarray/Affymetrix/Workflow_Documentation/NF_MAAffymetrix/README.md
@@ -97,7 +97,7 @@ All files required for utilizing the NF_MAAffymetrix GeneLab workflow for proces
 copy of latest NF_MAAffymetrix version on to your system, the code can be downloaded as a zip file from the release page then unzipped after downloading by running the following commands: 
 
 ```bash
-wget https://github.com/asaravia-butler/GeneLab_Data_Processing/releases/download/NF_MAAffymetrix_1.0.3/NF_MAAffymetrix_1.0.3.zip
+wget https://github.com/nasa/GeneLab_Data_Processing/releases/download/NF_MAAffymetrix_1.0.3/NF_MAAffymetrix_1.0.3.zip
 
 unzip NF_MAAffymetrix_1.0.3.zip
 ```
diff --git a/Microarray/Affymetrix/Workflow_Documentation/NF_MAAffymetrix/workflow_code/modules/POST_PROCESSING/GENERATE_PROTOCOL/resources/usr/bin/generate_protocol.sh b/Microarray/Affymetrix/Workflow_Documentation/NF_MAAffymetrix/workflow_code/modules/POST_PROCESSING/GENERATE_PROTOCOL/resources/usr/bin/generate_protocol.sh
@@ -58,7 +58,7 @@ else
 fi
 
 # Read the template file
-template="Data were processed as described in GL-DPPD-7114 (https://github.com/nasa/GeneLab_Data_Processing/blob/master/Microarray/Affymetrix/Pipeline_GL-DPPD-7114_Versions/GL-DPPD-7114.md) using NF_MAAffymetrix version 1.0.3 (GitHub link coming soon).  In short, a RunSheet containing raw data file location and processing metadata from the study's *ISA.zip file was generated using dp_tools (version ${dp_tools_VERSION}). The raw array data files were loaded into R (version ${R_VERSION}) using oligo (version ${oligo_VERSION}). Raw data quality assurance density plot, pseudo images, MA plots, and boxplots were generated using oligo (version ${oligo_VERSION}). The raw probe level intensity data was background corrected and normalized across arrays via the oligo (version ${oligo_VERSION}) quantile method. Normalized probe level data quality assurance density plot, pseudo images, MA plots, and boxplots were generated using oligo (version ${oligo_VERSION}).  Normalized probe level data was summarized to the probeset level using the oligo (version ${oligo_VERSION}) RMA method. ${GENE_MAPPING_STEP} Differential expression analysis was performed in R (version ${R_VERSION}) using limma (version ${limma_VERSION}); all groups were compared pairwise for each probeset to generate a moderated t-statistic and associated p- and adjusted p-value. Gene annotations were assigned for every probeset that mapped to exactly one Ensembl gene ID using the custom annotation tables generated in-house as detailed in GL-DPPD-7110 (https://github.com/nasa/GeneLab_Data_Processing/blob/GL_RefAnnotTable_1.0.0/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110/GL-DPPD-7110.md), with STRINGdb (version 2.8.4), PANTHER.db (version 1.0.11), and ${GENE_ANNOTATION_DB} (version 3.15.0)."
+template="Data were processed as described in GL-DPPD-7114 (https://github.com/nasa/GeneLab_Data_Processing/blob/master/Microarray/Affymetrix/Pipeline_GL-DPPD-7114_Versions/GL-DPPD-7114.md) using NF_MAAffymetrix version 1.0.3 (https://github.com/nasa/GeneLab_Data_Processing/tree/NF_MAAffymetrix_1.0.3/Microarray/Affymetrix/Workflow_Documentation/NF_MAAffymetrix).  In short, a RunSheet containing raw data file location and processing metadata from the study's *ISA.zip file was generated using dp_tools (version ${dp_tools_VERSION}). The raw array data files were loaded into R (version ${R_VERSION}) using oligo (version ${oligo_VERSION}). Raw data quality assurance density plot, pseudo images, MA plots, and boxplots were generated using oligo (version ${oligo_VERSION}). The raw probe level intensity data was background corrected and normalized across arrays via the oligo (version ${oligo_VERSION}) quantile method. Normalized probe level data quality assurance density plot, pseudo images, MA plots, and boxplots were generated using oligo (version ${oligo_VERSION}).  Normalized probe level data was summarized to the probeset level using the oligo (version ${oligo_VERSION}) RMA method. ${GENE_MAPPING_STEP} Differential expression analysis was performed in R (version ${R_VERSION}) using limma (version ${limma_VERSION}); all groups were compared pairwise for each probeset to generate a moderated t-statistic and associated p- and adjusted p-value. Gene annotations were assigned for every probeset that mapped to exactly one Ensembl gene ID using the custom annotation tables generated in-house as detailed in GL-DPPD-7110 (https://github.com/nasa/GeneLab_Data_Processing/blob/GL_RefAnnotTable_1.0.0/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110/GL-DPPD-7110.md), with STRINGdb (version 2.8.4), PANTHER.db (version 1.0.11), and ${GENE_ANNOTATION_DB} (version 3.15.0)."
 
 # Output the filled template
 echo "$template" > PROTOCOL_GLmicroarray.txt
diff --git a/Microarray/Affymetrix/Workflow_Documentation/NF_MAAffymetrix/workflow_code/nextflow.config b/Microarray/Affymetrix/Workflow_Documentation/NF_MAAffymetrix/workflow_code/nextflow.config
@@ -40,8 +40,8 @@ profiles {
 }
 
 manifest {
-    homePage = ''
-    description = 'Affymetrx Microarray Workflow for Document GL-DPPD-7114'
+    homePage = 'https://github.com/nasa/GeneLab_Data_Processing/tree/master/Microarray/Affymetrix'
+    description = 'Affymetrix Microarray Workflow for Document GL-DPPD-7114'
     mainScript = 'main.nf'
     defaultBranch = 'main'
     nextflowVersion = '>=22.10.0'
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/README.md b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/README.md
@@ -93,7 +93,7 @@ We recommend installing Singularity on a system wide level as per the associated
 All files required for utilizing the NF_MAAgilent1ch GeneLab workflow for processing Agilent 1 Channel Microarray data are in the [workflow_code](workflow_code) directory. To get a copy of latest NF_MAAgilent1ch version on to your system, the code can be downloaded as a zip file from the release page then unzipped after downloading by running the following commands: 
 
 ```bash
-wget https://github.com/asaravia-butler/GeneLab_Data_Processing/releases/download/NF_MAAgilent1ch_1.0.2/NF_MAAgilent1ch_1.0.2.zip
+wget https://github.com/nasa/GeneLab_Data_Processing/releases/download/NF_MAAgilent1ch_1.0.2/NF_MAAgilent1ch_1.0.2.zip
 
 unzip NF_MAAgilent1ch_1.0.2.zip
 ```
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/nextflow.config b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/nextflow.config
@@ -40,7 +40,7 @@ profiles {
 }
 
 manifest {
-    homePage = ''
+    homePage = 'https://github.com/nasa/GeneLab_Data_Processing/tree/master/Microarray/Agilent_1-channel'
     description = 'Agilent 1 Channel Microarray Workflow for Document GL-DPPD-7112'
     mainScript = 'main.nf'
     defaultBranch = 'main'
diff --git a/RNAseq/Workflow_Documentation/NF_RCP-F/README.md b/RNAseq/Workflow_Documentation/NF_RCP-F/README.md
@@ -101,7 +101,7 @@ All files required for utilizing the NF_RCP-F GeneLab workflow for processing RN
 copy of latest NF_RCP-F version on to your system, the code can be downloaded as a zip file from the release page then unzipped after downloading by running the following commands: 
 
 ```bash
-wget https://github.com/nasa/GeneLab_Data_Processing/releases/download/NF_RCP-F_4/NF_RCP-F_1.0.4.zip
+wget https://github.com/nasa/GeneLab_Data_Processing/releases/download/NF_RCP-F_1.0.4/NF_RCP-F_1.0.4.zip
 
 unzip NF_RCP-F_1.0.4.zip
 ```
diff --git a/RNAseq/Workflow_Documentation/NF_RCP-F/workflow_code/nextflow.config b/RNAseq/Workflow_Documentation/NF_RCP-F/workflow_code/nextflow.config
@@ -47,7 +47,7 @@ profiles {
 }
 
 manifest {
-    homePage = 'https://github.com/nasa/GeneLab_Data_Processing/tree/jonathan-branch/RNAseq'
+    homePage = 'https://github.com/nasa/GeneLab_Data_Processing/tree/master/RNAseq'
     description = 'RNA-Seq Pipeline for Document GL-DPPD-7101-F'
     mainScript = 'main.nf'
     defaultBranch = 'main'

Original file line number	Diff line number	Diff line change
`@@ -40,7 +40,7 @@ profiles {`
`40`	`40`	`}`
`41`	`41`
`42`	`42`	`manifest {`
`43`		`- homePage = ''`
	`43`	`+ homePage = 'https://github.com/nasa/GeneLab_Data_Processing/tree/master/Microarray/Agilent_1-channel'`
`44`	`44`	`description = 'Agilent 1 Channel Microarray Workflow for Document GL-DPPD-7112'`
`45`	`45`	`mainScript = 'main.nf'`
`46`	`46`	`defaultBranch = 'main'`
Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,7 @@ profiles {`
`47`	`47`	`}`
`48`	`48`
`49`	`49`	`manifest {`
`50`		`- homePage = 'https://github.com/nasa/GeneLab_Data_Processing/tree/jonathan-branch/RNAseq'`
	`50`	`+ homePage = 'https://github.com/nasa/GeneLab_Data_Processing/tree/master/RNAseq'`
`51`	`51`	`description = 'RNA-Seq Pipeline for Document GL-DPPD-7101-F'`
`52`	`52`	`mainScript = 'main.nf'`
`53`	`53`	`defaultBranch = 'main'`