Skip to content

Commit 9b2b943

Browse files
committed
[GL_RefAnnotTable] Added makeOrgPackageFromNCBI to DPPD doc
1 parent f6154f7 commit 9b2b943

File tree

1 file changed

+62
-33
lines changed
  • GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A

1 file changed

+62
-33
lines changed

GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A.md

Lines changed: 62 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -114,12 +114,13 @@ The default columns in the annotation table are:
114114
- [Annotation Table Build Overview with Example Commands](#annotation-table-build-overview-with-example-commands)
115115
- [0. Set Up Environment](#0-set-up-environment)
116116
- [1. Define Variables and Output File Names](#1-define-variables-and-output-file-names)
117-
- [2. Load Annotation Databases](#2-load-annotation-databases)
118-
- [3. Build Initial Annotation Table](#3-build-initial-annotation-table)
119-
- [4. Add org.db Keys](#4-add-orgdb-keys)
120-
- [5. Add STRING IDs](#5-add-string-ids)
121-
- [6. Add Gene Ontology (GO) Slim IDs](#6-add-gene-ontology-go-slim-ids)
122-
- [7. Export Annotation Table and Build Info](#7-export-annotation-table-and-build-info)
117+
- [2. Create the Organism Package if it is Not Hosted by Bioconductor](#2-create-the-organism-package-if-it-is-not-hosted-by-bioconductor)
118+
- [3. Load Annotation Databases](#3-load-annotation-databases)
119+
- [4. Build Initial Annotation Table](#4-build-initial-annotation-table)
120+
- [5. Add org.db Keys](#5-add-orgdb-keys)
121+
- [6. Add STRING IDs](#6-add-string-ids)
122+
- [7. Add Gene Ontology (GO) Slim IDs](#7-add-gene-ontology-go-slim-ids)
123+
- [8. Export Annotation Table and Build Info](#8-export-annotation-table-and-build-info)
123124

124125

125126

@@ -234,7 +235,54 @@ if ( file.exists(out_table_filename) ) {
234235

235236
---
236237

237-
## 2. Load Annotation Databases
238+
## 2. Create the Organism Package if it is Not Hosted by Bioconductor
239+
240+
```R
241+
# Use AnnotationForge's makeOrgPackageFromNCBI function with default settings to create the organism-specific org.db R package from available NCBI annotations
242+
243+
# Try to download the org.db from Bioconductor, build it locally if installation fails
244+
BiocManager::install(target_org_db, ask = FALSE)
245+
if (!requireNamespace(target_org_db, quietly = TRUE)) {
246+
tryCatch({
247+
# Parse organism's name in the reference table to create the org.db name (target_org_db)
248+
genus_species <- strsplit(target_species_designation, " ")[[1]]
249+
if (length(genus_species) < 1) {
250+
stop("Species designation is not correctly formatted: ", target_species_designation)
251+
}
252+
genus <- genus_species[1]
253+
species <- ifelse(length(genus_species) > 1, genus_species[2], "")
254+
strain <- ref_table %>%
255+
filter(name == target_organism) %>%
256+
pull(strain) %>%
257+
gsub("[^A-Za-z0-9]", "", .)
258+
if (!is.na(strain) && strain != "") {
259+
species <- paste0(species, strain)
260+
}
261+
target_org_db <- paste0("org.", substr(genus, 1, 1), species, ".eg.db")
262+
263+
BiocManager::install(c("AnnotationForge", "biomaRt", "GO.db"), ask = FALSE)
264+
library(AnnotationForge)
265+
makeOrgPackageFromNCBI(
266+
version = "0.1",
267+
author = "Your Name <your.email@example.com>",
268+
maintainer = "Your Name <your.email@example.com>",
269+
outputDir = "./",
270+
tax_id = target_taxid,
271+
genus = genus,
272+
species = species
273+
)
274+
install.packages(file.path("./", target_org_db), repos = NULL, type = "source", quiet = TRUE)
275+
cat(paste0("'", target_org_db, "' has been successfully built and installed.\n"))
276+
}, error = function(e) {
277+
stop("Failed to build and load the package: ", target_org_db, "\nError: ", e$message)
278+
})
279+
target_org_db <- install_annotations(target_organism, ref_tab_path)
280+
}
281+
```
282+
283+
---
284+
285+
## 3. Load Annotation Databases
238286

239287
```R
240288
# Set timeout time to ensure annotation file downloads will complete
@@ -248,27 +296,8 @@ GTF <- data.frame(GTF)
248296

249297
###### org.db ########
250298

251-
# Define a function to load the specified org.db package for a given target organism
252-
install_and_load_org_db <- function(target_organism, target_org_db, ref_tab_path) {
253-
if (!is.na(target_org_db) && target_org_db != "") {
254-
# Attempt to install the package from Bioconductor
255-
BiocManager::install(target_org_db, ask = FALSE)
256-
257-
# Check if the package was successfully loaded
258-
if (!requireNamespace(target_org_db, quietly = TRUE)) {
259-
# If not, attempt to create it locally using a helper script
260-
source("install-org-db.R")
261-
target_org_db <- install_annotations(target_organism, ref_tab_path)
262-
}
263-
} else {
264-
# If target_org_db is NA or empty, create it locally using the helper script
265-
source("install-org-db.R")
266-
target_org_db <- install_annotations(target_organism, ref_tab_path)
267-
}
268-
269-
# Load the package into the R session
270-
library(target_org_db, character.only = TRUE)
271-
}
299+
# Load the package into the R session
300+
library(target_org_db, character.only = TRUE)
272301

273302
# Define list of supported organisms which do not use annotations from an org.db
274303
no_org_db <- c("NCFM", "MMARINUMM", "ORYSJ", "PA14", "ATCC27592", "MRSA252", "UA159", "ES114")
@@ -283,7 +312,7 @@ if (!(target_organism %in% no_org_db) && (target_organism %in% currently_accepte
283312

284313
---
285314

286-
## 3. Build Initial Annotation Table
315+
## 4. Build Initial Annotation Table
287316

288317
```R
289318
# Initialize table from GTF
@@ -355,7 +384,7 @@ if (target_organism == "SALTY") {
355384

356385
---
357386

358-
## 4. Add org.db Keys
387+
## 5. Add org.db Keys
359388

360389
```R
361390
annot_orgdb <- annot_gtf
@@ -458,7 +487,7 @@ if (target_organism == "YEAST") {
458487

459488
---
460489

461-
## 5. Add STRING IDs
490+
## 6. Add STRING IDs
462491

463492
```R
464493
# Define organisms that do not use STRING annotations
@@ -570,7 +599,7 @@ annot_stringdb <- as.data.frame(annot_stringdb)
570599

571600
---
572601

573-
## 6. Add Gene Ontology (GO) slim IDs
602+
## 7. Add Gene Ontology (GO) slim IDs
574603

575604
```R
576605
# Define organisms that do not use PANTHER annotations
@@ -618,7 +647,7 @@ if (!(target_organism %in% no_panther_db)) {
618647

619648
---
620649

621-
## 7. Export Annotation Table and Build Info
650+
## 8. Export Annotation Table and Build Info
622651

623652
```R
624653
# Group by primary key to remove any remaining unjoined or duplicate rows

0 commit comments

Comments
 (0)