nasa
diff --git a/‎GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A.md
Lines changed: 539 additions & 207 deletions b/‎GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A.md
Lines changed: 539 additions & 207 deletions
diff --git a/‎GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv
Lines changed: 23 additions & 18 deletions b/‎GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv
Lines changed: 23 additions & 18 deletions
diff --git a/‎GeneLab_Reference_Annotations/README.md
Lines changed: 2 additions & 2 deletions b/‎GeneLab_Reference_Annotations/README.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/CHANGELOG.md
Lines changed: 48 additions & 0 deletions b/‎GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/CHANGELOG.md
Lines changed: 48 additions & 0 deletions
diff --git a/‎GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md
Lines changed: 99 additions & 0 deletions b/‎GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A/README.md
Lines changed: 99 additions & 0 deletions
@@ -1,19 +1,24 @@
 name,species,strain,ensemblVersion,ref_source,fasta,gtf,taxon,annotations,genelab_annots_link,genelab_annots_info_link
-ARABIDOPSIS,Arabidopsis thaliana,,58,ensembl_plants,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-58/fasta/arabidopsis_thaliana/dna/Arabidopsis_thaliana.TAIR10.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-58/gtf/arabidopsis_thaliana/Arabidopsis_thaliana.TAIR10.58.gtf.gz,3702,org.At.tair.db,,
-BACSU,Bacillus subtilis,subsp. subtilis 168,58,ensembl_bacteria,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-58/fasta/bacteria_0_collection/bacillus_subtilis_subsp_subtilis_str_168_gca_000009045/dna/Bacillus_subtilis_subsp_subtilis_str_168_gca_000009045.ASM904v1.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-58/gtf/bacteria_0_collection/bacillus_subtilis_subsp_subtilis_str_168_gca_000009045/Bacillus_subtilis_subsp_subtilis_str_168_gca_000009045.ASM904v1.58.gtf.gz,224308,org.MeSH.Bsu.168.db,,
-BRARP,Brassica rapa,,58,ensembl_plants,,,,,,
-WORM,Caenorhabditis elegans,,111,ensembl,https://ftp.ensembl.org/pub/release-111/fasta/caenorhabditis_elegans/dna/Caenorhabditis_elegans.WBcel235.dna.toplevel.fa.gz,https://ftp.ensembl.org/pub/release-111/gtf/caenorhabditis_elegans/Caenorhabditis_elegans.WBcel235.111.gtf.gz,6239,org.Ce.eg.db,,
-ZEBRAFISH,Danio rerio,,111,ensembl,,,7955,org.Dr.eg.db,,
-FLY,Drosophila melanogaster,,111,ensembl,,,7227,org.Dm.eg.db,,
-ERCC,,,,ThermoFisher,,,,,,
-ECOLI,Escherichia coli,str. K-12 substr. MG1655,58,ensembl_bacteria,,,83333,org.EcK12.eg.db,,
-HUMAN,Homo sapiens,,111,ensembl,https://ftp.ensembl.org/pub/release-111/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz,https://ftp.ensembl.org/pub/release-111/gtf/homo_sapiens/Homo_sapiens.GRCh38.111.gtf.gz,9606,org.Hs.eg.db,,
-MOUSE,Mus musculus,,111,ensembl,https://ftp.ensembl.org/pub/release-111/fasta/mus_musculus/dna/Mus_musculus.GRCm39.dna.primary_assembly.fa.gz,https://ftp.ensembl.org/pub/release-111/gtf/mus_musculus/Mus_musculus.GRCm39.111.gtf.gz,10090,org.Mm.eg.db,,
-,Mycobacterium marinum,LHM4,58,ensembl_bacteria,,,,,,
-ORYLA,Oryzias latipes,,111,ensembl,,,,,,
-RAT,Rattus norvegicus,,111,ensembl,,,10116,org.Rn.eg.db,,
-YEAST,Saccharomyces cerevisiae,S288C,111,ensembl,,,559292,org.Sc.sgd.db,,
-STAA8,Staphylococcus aureus,UAMS-1,58,ensembl_bacteria,,,,,,
-,Streptococcus mutans,UA159,58,ensembl_bacteria,,,,,,
-BRADI,Brachypodium distachyon,,58,ensembl_plants,,,15368,,,
-ORYSJ,Oryza sativa,Japonica,58,ensembl_plants,,,4530,BSgenome.Osativa.MSU.MSU7,,
+ARABIDOPSIS,Arabidopsis thaliana,,59,ensembl_plants,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/fasta/arabidopsis_thaliana/dna/Arabidopsis_thaliana.TAIR10.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/gtf/arabidopsis_thaliana/Arabidopsis_thaliana.TAIR10.59.gtf.gz,3702,org.At.tair.db,https://figshare.com/ndownloader/files/48354355,https://figshare.com/ndownloader/files/48354352
+BACSU,Bacillus subtilis,subsp. subtilis 168,59,ensembl_bacteria,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-59/fasta/bacteria_0_collection/bacillus_subtilis_subsp_subtilis_str_168_gca_000009045/dna/Bacillus_subtilis_subsp_subtilis_str_168_gca_000009045.ASM904v1.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-59/gtf/bacteria_0_collection/bacillus_subtilis_subsp_subtilis_str_168_gca_000009045/Bacillus_subtilis_subsp_subtilis_str_168_gca_000009045.ASM904v1.59.gtf.gz,224308,org.Bsubtilissubspsubtilis168.eg.db,https://figshare.com/ndownloader/files/48354346,https://figshare.com/ndownloader/files/48354349
+BRADI,Brachypodium distachyon,,59,ensembl_plants,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/fasta/brachypodium_distachyon/dna/Brachypodium_distachyon.Brachypodium_distachyon_v3.0.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/gtf/brachypodium_distachyon/Brachypodium_distachyon.Brachypodium_distachyon_v3.0.59.gtf.gz,15368,org.Bdistachyon.eg.db,https://figshare.com/ndownloader/files/48354370,https://figshare.com/ndownloader/files/48354361
+BRARP,Brassica rapa,,59,ensembl_plants,http://ftp.ensemblgenomes.org/pub/plants/release-59/fasta/brassica_rapa/dna/Brassica_rapa.Brapa_1.0.dna.toplevel.fa.gz,http://ftp.ensemblgenomes.org/pub/plants/release-59/gtf/brassica_rapa/Brassica_rapa.Brapa_1.0.59.gtf.gz,,,,
+WORM,Caenorhabditis elegans,,112,ensembl,https://ftp.ensembl.org/pub/release-112/fasta/caenorhabditis_elegans/dna/Caenorhabditis_elegans.WBcel235.dna.toplevel.fa.gz,https://ftp.ensembl.org/pub/release-112/gtf/caenorhabditis_elegans/Caenorhabditis_elegans.WBcel235.112.gtf.gz,6239,org.Ce.eg.db,https://figshare.com/ndownloader/files/48354373,https://figshare.com/ndownloader/files/48354364
+ZEBRAFISH,Danio rerio,,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/danio_rerio/dna/Danio_rerio.GRCz11.dna.primary_assembly.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/danio_rerio/Danio_rerio.GRCz11.112.gtf.gz,7955,org.Dr.eg.db,https://figshare.com/ndownloader/files/48354388,https://figshare.com/ndownloader/files/48354367
+FLY,Drosophila melanogaster,,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/drosophila_melanogaster/dna/Drosophila_melanogaster.BDGP6.46.dna.toplevel.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/drosophila_melanogaster/Drosophila_melanogaster.BDGP6.46.112.gtf.gz,7227,org.Dm.eg.db,https://figshare.com/ndownloader/files/48354382,https://figshare.com/ndownloader/files/48354376
+ERCC,,,,ThermoFisher,https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip,https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip,,,,
+ECOLI,Escherichia coli,str. K-12 substr. MG1655,59,ensembl_bacteria,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-59/fasta/bacteria_0_collection/escherichia_coli_str_k_12_substr_mg1655_gca_000005845/dna/Escherichia_coli_str_k_12_substr_mg1655_gca_000005845.ASM584v2.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/bacteria/release-59/gtf/bacteria_0_collection/escherichia_coli_str_k_12_substr_mg1655_gca_000005845/Escherichia_coli_str_k_12_substr_mg1655_gca_000005845.ASM584v2.59.gtf.gz,511145,org.EcolistrK12substrMG1655.eg.db,https://figshare.com/ndownloader/files/48354379,https://figshare.com/ndownloader/files/48354394
+HUMAN,Homo sapiens,,112,ensembl,https://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz,https://ftp.ensembl.org/pub/release-112/gtf/homo_sapiens/Homo_sapiens.GRCh38.112.gtf.gz,9606,org.Hs.eg.db,https://figshare.com/ndownloader/files/48354445,https://figshare.com/ndownloader/files/48354448
+,Lactobacillus acidophilus,NCFM,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/985/GCF_000011985.1_ASM1198v1/GCF_000011985.1_ASM1198v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/985/GCF_000011985.1_ASM1198v1/GCF_000011985.1_ASM1198v1_genomic.gtf.gz,272621,,https://figshare.com/ndownloader/files/48354424,https://figshare.com/ndownloader/files/48354415
+MOUSE,Mus musculus,,112,ensembl,https://ftp.ensembl.org/pub/release-112/fasta/mus_musculus/dna/Mus_musculus.GRCm39.dna.primary_assembly.fa.gz,https://ftp.ensembl.org/pub/release-112/gtf/mus_musculus/Mus_musculus.GRCm39.112.gtf.gz,10090,org.Mm.eg.db,https://figshare.com/ndownloader/files/48354460,https://figshare.com/ndownloader/files/48354457
+,Mycobacterium marinum,M,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/018/345/GCF_000018345.1_ASM1834v1/GCF_000018345.1_ASM1834v1_genomic.gtf.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/018/345/GCF_000018345.1_ASM1834v1/GCF_000018345.1_ASM1834v1_genomic.gtf.gz,216594,,https://figshare.com/ndownloader/files/48354433,https://figshare.com/ndownloader/files/48354430
+ORYSJ,Oryza sativa,Japonica,59,ensembl_plants,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/fasta/oryza_sativa/dna/Oryza_sativa.IRGSP-1.0.dna.toplevel.fa.gz,https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-59/gtf/oryza_sativa/Oryza_sativa.IRGSP-1.0.59.gtf.gz,39947,,https://figshare.com/ndownloader/files/48354451,https://figshare.com/ndownloader/files/48354454
+ORYLA,Oryzias latipes,,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/oryzias_latipes/dna/Oryzias_latipes.ASM223467v1.dna.toplevel.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/oryzias_latipes/Oryzias_latipes.ASM223467v1.112.gtf.gz,8090,org.Olatipes.eg.db,https://figshare.com/ndownloader/files/48354463,https://figshare.com/ndownloader/files/48354466
+,Pseudomonas aeruginosa,UCBPP-PA14,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/014/625/GCF_000014625.1_ASM1462v1/GCF_000014625.1_ASM1462v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/014/625/GCF_000014625.1_ASM1462v1/GCF_000014625.1_ASM1462v1_genomic.gtf.gz,208963,,https://figshare.com/ndownloader/files/48354421,https://figshare.com/ndownloader/files/48354427
+RAT,Rattus norvegicus,,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/rattus_norvegicus/dna/Rattus_norvegicus.mRatBN7.2.dna.toplevel.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/rattus_norvegicus/Rattus_norvegicus.mRatBN7.2.112.gtf.gz,10116,org.Rn.eg.db,https://figshare.com/ndownloader/files/48354472,https://figshare.com/ndownloader/files/48354475
+YEAST,Saccharomyces cerevisiae,S288C,112,ensembl,http://ftp.ensembl.org/pub/release-112/fasta/saccharomyces_cerevisiae/dna/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa.gz,http://ftp.ensembl.org/pub/release-112/gtf/saccharomyces_cerevisiae/Saccharomyces_cerevisiae.R64-1-1.112.gtf.gz,559292,org.Sc.sgd.db,https://figshare.com/ndownloader/files/48354469,https://figshare.com/ndownloader/files/48354478
+SALTY,Salmonella enterica,serovar Typhimurium str. LT2,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/006/945/GCF_000006945.2_ASM694v2/GCF_000006945.2_ASM694v2_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/006/945/GCF_000006945.2_ASM694v2/GCF_000006945.2_ASM694v2_genomic.gtf.gz,99287,org.SentericaserovarTyphimuriumstrLT2.eg.db,https://figshare.com/ndownloader/files/48354385,https://figshare.com/ndownloader/files/48354391
+,Serratia liquefaciens,ATCC 27592,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/422/085/GCF_000422085.1_ASM42208v1/GCF_000422085.1_ASM42208v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/422/085/GCF_000422085.1_ASM42208v1/GCF_000422085.1_ASM42208v1_genomic.gtf.gz,1346614,,https://figshare.com/ndownloader/files/48354436,https://figshare.com/ndownloader/files/48354439
+,Staphylococcus aureus,MRSA252,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/505/GCF_000011505.1_ASM1150v1/GCF_000011505.1_ASM1150v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/505/GCF_000011505.1_ASM1150v1/GCF_000011505.1_ASM1150v1_genomic.gtf.gz,282458,,https://figshare.com/ndownloader/files/48354403,https://figshare.com/ndownloader/files/48354409
+,Streptococcus mutans,UA159,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/007/465/GCF_000007465.2_ASM746v2/GCF_000007465.2_ASM746v2_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/007/465/GCF_000007465.2_ASM746v2/GCF_000007465.2_ASM746v2_genomic.gtf.gz,210007,,https://figshare.com/ndownloader/files/48354397,https://figshare.com/ndownloader/files/48354406
+,Vibrio fischeri,ES114,,ncbi,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/805/GCF_000011805.1_ASM1180v1/GCF_000011805.1_ASM1180v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/011/805/GCF_000011805.1_ASM1180v1/GCF_000011805.1_ASM1180v1_genomic.gtf.gz,312309,,https://figshare.com/ndownloader/files/48354412,https://figshare.com/ndownloader/files/48354418
@@ -20,6 +20,6 @@
 **Developed by:**  
 Mike Lee
 
-**Maintained by:**
-Alexis Torres
+**Maintained by:**  
+Alexis Torres  
 Crystal Han
@@ -0,0 +1,48 @@
+# Changelog  
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [1.1.0](https://github.com/nasa/GeneLab_Data_Processing/blob/DEV_GeneLab_Reference_Annotations_vGL-DPPD-7110-A/GeneLab_Reference_Annotations/Workflow_Documentation/GL_RefAnnotTable-A)  
+
+### Added  
+
+- Added support for:
+    - Bacillus subtilis, subsp. subtilis 168  
+    - Brachypodium distachyon  
+    - Escherichia coli,str. K-12 substr. MG1655  
+    - Oryzias latipes  
+    - Lactobacillus acidophilus NCFM  
+    - Mycobacterium marinum M  
+    - Oryza sativa Japonica  
+    - Pseudomonas aeruginosa UCBPP-PA14  
+    - Salmonella enterica subsp. enterica serovar Typhimurium str. LT2  
+    - Serratia liquefaciens ATCC 27592  
+    - Staphylococcus aureus MRSA252  
+    - Streptococcus mutans UA159  
+    - Vibrio fischeri ES114  
+- Added AnnotationForge helper script install-org-db.R to create organism-specific annotation packages (org.*.eg.db) in R if not available on Bioconductor. Used for:  
+    - Bacillus subtilis, subsp. subtilis 168  
+    - Brachypodium distachyon  
+    - Escherichia coli,str. K-12 substr. MG1655  
+    - Oryzias latipes  
+    - Salmonella enterica subsp. enterica serovar Typhimurium str. LT2  
+- Added NCBI as a source for FASTA and GTF files  
+
+### Fixed  
+
+- Fixed processing for ECOLI
+
+### Changed  
+
+- Updated Ensembl versions
+    - Animals: Ensembl release 112
+    - Plants: Ensembl plants release 59
+    - Bacteria: Ensembl bacteria release 59
+- Removed org.EcK12.eg.db and replaced it with a locally created annotations database, as it is no longer available on Bioconductor
+- Changed the first argument of GL-DPPD-7110-A_build-genome-annots-tab.R from the 'name' column value to the 'species' column value (e.g., 'Mus musculus' instead of 'MOUSE')
+
+
+## [1.0.0](https://github.com/nasa/GeneLab_Data_Processing/releases/tag/GL_RefAnnotTable_1.0.0)
@@ -0,0 +1,99 @@
+# GL_RefAnnotTable Workflow Information and Usage Instructions
+
+## General workflow info
+The current GeneLab Reference Annotation Table (GL_RefAnnotTable) pipeline is implemented as an R workflow that can be run from a command line interface (CLI) using bash. The workflow can be used even if you are unfamiliar with R, but if you want to learn more about R, visit the [R-project about page here](https://www.r-project.org/about.html). Additionally, an introduction to R along with installation help and information about using R for bioinformatics can be found [here at Happy Belly Bioinformatics](https://astrobiomike.github.io/R/basics).  
+
+## Utilizing the workflow
+
+1. [Install R and R packages](#1-install-r-and-r-packages)  
+2. [Download the workflow files](#2-download-the-workflow-files)  
+3. [Setup Execution Permission for Workflow Scripts](#3-setup-execution-permission-for-workflow-scripts)
+4. [Run the workflow](#4-run-the-workflow)  
+5. [Run the annotations database creation function as a stand-alone script](#5-run-the-annotations-database-creation-function-as-a-stand-alone-script)
+<br>
+
+### 1. Install R and R packages
+
+We recommend installing R via the [Comprehensive R Archive Network (CRAN)](https://cran.r-project.org/) as follows: 
+
+1. Select the [CRAN Mirror](https://cran.r-project.org/mirrors.html) closest to your location.
+2. Click the link under the "Download and Install R" section that's consistent with your machine.
+3. Click on the R-4.4.0 package consistent with your machine to download.
+4. Double click on the R-4.4.0.pkg downloaded in step 3 and follow the installation instructions.
+
+Once R is installed, open a CLI terminal and run the following command to activate R:
+
+```bash
+R
+```
+
+Within an active R environment, run the following commands to install the required R packages:
+
+```R
+install.packages("tidyverse", version = 2.0.0, repos = "http://cran.us.r-project.org")
+
+install.packages("BiocManager", version = 3.19, repos = "http://cran.us.r-project.org")
+
+BiocManager::install("STRINGdb", version = 3.19)
+BiocManager::install("PANTHER.db", version = 3.19)
+BiocManager::install("rtracklayer", version = 3.19)
+```
+
+<br>
+
+### 2. Download the Workflow Files
+
+All files required for utilizing the GL_RefAnnotTable workflow for generating reference annotation tables are in the [workflow_code](workflow_code) directory. To get a copy of latest GL_RefAnnotTable version on to your system, run the following command:
+
+```bash
+curl -LO https://github.com/nasa/GeneLab_Data_Processing/releases/download/GL_RefAnnotTable-A_1.1.0/GL_RefAnnotTable-A_1.1.0.zip
+``` 
+
+<br>
+
+### 3. Setup Execution Permission for Workflow Scripts
+
+Once you've downloaded the GL_RefAnnotTable workflow directory as a zip file, unzip the workflow then `cd` into the GL_RefAnnotTable-A_1.1.0 directory on the CLI. Next, run the following command to set the execution permissions for the R script:
+
+```bash
+chmod -R u+x *R
+```
+
+<br>
+
+### 4. Run the Workflow
+
+While in the GL_RefAnnotTable workflow directory, you are now able to run the workflow. Below is an example of how to run the workflow to build an annotation table for Mus musculus (mouse):
+
+```bash
+Rscript GL-DPPD-7110-A_build-genome-annots-tab.R 'Mus musculus'
+```
+
+**Input data:**
+
+- No input files are required. Specify the target organism using a positional command line argument. `Mus musculus` is used in the example above. To see a list of all available organisms, run `Rscript GL-DPPD-7110-A_build-genome-annots-tab.R` without positional arguments. The correct argument for each organism can also be found in the 'species' column of the [GL-DPPD-7110-A_annotations.csv](../../Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv)
+
+- Optional: a reference table CSV can be supplied as a second positional argument instead of using the default [GL-DPPD-7110-A_annotations.csv](../../Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv)
+
+**Output data:**
+
+- *-GL-annotations.tsv (Tab delineated table of gene annotations)
+- *-GL-build-info.txt (Text file containing information used to create the annotation table, including tool and tool versions and date of creation)
+
+### 5. Run the annotations database creation function as a stand-alone script
+
+When the workflow is run, if the reference table does not specify an annotations database for the target_organism in the `annotations` column, the `install_annotations` function, defined in the `install-org-db.R` script, will be executed. This script will locally create and install an annotations database R package using AnnotationForge. This function can also be run as a stand-alone script from the command line:
+
+```bash
+Rscript install-org-db.R 'Bacillus subtilis' /path/to/GL-DPPD-7110-A_annotations.csv
+```
+
+**Input data:**
+
+- The target organism must be specified as the first positional command line argument, `Bacillus subtilis` is used in the example above. The correct argument for each organism can be found in the 'species' column of the [GL-DPPD-7110-A_annotations.csv](../../Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv)
+
+- The path to a local reference table must also be supplied as the second positional argument
+
+Output data:
+
+- org.*.eg.db/ (species-specific annotation database, as a local R package)