@@ -114,12 +114,13 @@ The default columns in the annotation table are:
114
114
- [ Annotation Table Build Overview with Example Commands] ( #annotation-table-build-overview-with-example-commands )
115
115
- [ 0. Set Up Environment] ( #0-set-up-environment )
116
116
- [ 1. Define Variables and Output File Names] ( #1-define-variables-and-output-file-names )
117
- - [ 2. Load Annotation Databases] ( #2-load-annotation-databases )
118
- - [ 3. Build Initial Annotation Table] ( #3-build-initial-annotation-table )
119
- - [ 4. Add org.db Keys] ( #4-add-orgdb-keys )
120
- - [ 5. Add STRING IDs] ( #5-add-string-ids )
121
- - [ 6. Add Gene Ontology (GO) Slim IDs] ( #6-add-gene-ontology-go-slim-ids )
122
- - [ 7. Export Annotation Table and Build Info] ( #7-export-annotation-table-and-build-info )
117
+ - [ 2. Create the Organism Package if it is Not Hosted by Bioconductor] ( #2-create-the-organism-package-if-it-is-not-hosted-by-bioconductor )
118
+ - [ 3. Load Annotation Databases] ( #3-load-annotation-databases )
119
+ - [ 4. Build Initial Annotation Table] ( #4-build-initial-annotation-table )
120
+ - [ 5. Add org.db Keys] ( #5-add-orgdb-keys )
121
+ - [ 6. Add STRING IDs] ( #6-add-string-ids )
122
+ - [ 7. Add Gene Ontology (GO) Slim IDs] ( #7-add-gene-ontology-go-slim-ids )
123
+ - [ 8. Export Annotation Table and Build Info] ( #8-export-annotation-table-and-build-info )
123
124
124
125
125
126
@@ -234,7 +235,54 @@ if ( file.exists(out_table_filename) ) {
234
235
235
236
---
236
237
237
- ## 2. Load Annotation Databases
238
+ ## 2. Create the Organism Package if it is Not Hosted by Bioconductor
239
+
240
+ ``` R
241
+ # Use AnnotationForge's makeOrgPackageFromNCBI function with default settings to create the organism-specific org.db R package from available NCBI annotations
242
+
243
+ # Try to download the org.db from Bioconductor, build it locally if installation fails
244
+ BiocManager :: install(target_org_db , ask = FALSE )
245
+ if (! requireNamespace(target_org_db , quietly = TRUE )) {
246
+ tryCatch({
247
+ # Parse organism's name in the reference table to create the org.db name (target_org_db)
248
+ genus_species <- strsplit(target_species_designation , " " )[[1 ]]
249
+ if (length(genus_species ) < 1 ) {
250
+ stop(" Species designation is not correctly formatted: " , target_species_designation )
251
+ }
252
+ genus <- genus_species [1 ]
253
+ species <- ifelse(length(genus_species ) > 1 , genus_species [2 ], " " )
254
+ strain <- ref_table %> %
255
+ filter(name == target_organism ) %> %
256
+ pull(strain ) %> %
257
+ gsub(" [^A-Za-z0-9]" , " " , . )
258
+ if (! is.na(strain ) && strain != " " ) {
259
+ species <- paste0(species , strain )
260
+ }
261
+ target_org_db <- paste0(" org." , substr(genus , 1 , 1 ), species , " .eg.db" )
262
+
263
+ BiocManager :: install(c(" AnnotationForge" , " biomaRt" , " GO.db" ), ask = FALSE )
264
+ library(AnnotationForge )
265
+ makeOrgPackageFromNCBI(
266
+ version = " 0.1" ,
267
+ author = " Your Name <your.email@example.com>" ,
268
+ maintainer = " Your Name <your.email@example.com>" ,
269
+ outputDir = " ./" ,
270
+ tax_id = target_taxid ,
271
+ genus = genus ,
272
+ species = species
273
+ )
274
+ install.packages(file.path(" ./" , target_org_db ), repos = NULL , type = " source" , quiet = TRUE )
275
+ cat(paste0(" '" , target_org_db , " ' has been successfully built and installed.\n " ))
276
+ }, error = function (e ) {
277
+ stop(" Failed to build and load the package: " , target_org_db , " \n Error: " , e $ message )
278
+ })
279
+ target_org_db <- install_annotations(target_organism , ref_tab_path )
280
+ }
281
+ ```
282
+
283
+ ---
284
+
285
+ ## 3. Load Annotation Databases
238
286
239
287
``` R
240
288
# Set timeout time to ensure annotation file downloads will complete
@@ -248,27 +296,8 @@ GTF <- data.frame(GTF)
248
296
249
297
# ##### org.db ########
250
298
251
- # Define a function to load the specified org.db package for a given target organism
252
- install_and_load_org_db <- function (target_organism , target_org_db , ref_tab_path ) {
253
- if (! is.na(target_org_db ) && target_org_db != " " ) {
254
- # Attempt to install the package from Bioconductor
255
- BiocManager :: install(target_org_db , ask = FALSE )
256
-
257
- # Check if the package was successfully loaded
258
- if (! requireNamespace(target_org_db , quietly = TRUE )) {
259
- # If not, attempt to create it locally using a helper script
260
- source(" install-org-db.R" )
261
- target_org_db <- install_annotations(target_organism , ref_tab_path )
262
- }
263
- } else {
264
- # If target_org_db is NA or empty, create it locally using the helper script
265
- source(" install-org-db.R" )
266
- target_org_db <- install_annotations(target_organism , ref_tab_path )
267
- }
268
-
269
- # Load the package into the R session
270
- library(target_org_db , character.only = TRUE )
271
- }
299
+ # Load the package into the R session
300
+ library(target_org_db , character.only = TRUE )
272
301
273
302
# Define list of supported organisms which do not use annotations from an org.db
274
303
no_org_db <- c(" NCFM" , " MMARINUMM" , " ORYSJ" , " PA14" , " ATCC27592" , " MRSA252" , " UA159" , " ES114" )
@@ -283,7 +312,7 @@ if (!(target_organism %in% no_org_db) && (target_organism %in% currently_accepte
283
312
284
313
---
285
314
286
- ## 3 . Build Initial Annotation Table
315
+ ## 4 . Build Initial Annotation Table
287
316
288
317
``` R
289
318
# Initialize table from GTF
@@ -355,7 +384,7 @@ if (target_organism == "SALTY") {
355
384
356
385
---
357
386
358
- ## 4 . Add org.db Keys
387
+ ## 5 . Add org.db Keys
359
388
360
389
``` R
361
390
annot_orgdb <- annot_gtf
@@ -458,7 +487,7 @@ if (target_organism == "YEAST") {
458
487
459
488
---
460
489
461
- ## 5 . Add STRING IDs
490
+ ## 6 . Add STRING IDs
462
491
463
492
``` R
464
493
# Define organisms that do not use STRING annotations
@@ -570,7 +599,7 @@ annot_stringdb <- as.data.frame(annot_stringdb)
570
599
571
600
---
572
601
573
- ## 6 . Add Gene Ontology (GO) slim IDs
602
+ ## 7 . Add Gene Ontology (GO) slim IDs
574
603
575
604
``` R
576
605
# Define organisms that do not use PANTHER annotations
@@ -618,7 +647,7 @@ if (!(target_organism %in% no_panther_db)) {
618
647
619
648
---
620
649
621
- ## 7 . Export Annotation Table and Build Info
650
+ ## 8 . Export Annotation Table and Build Info
622
651
623
652
``` R
624
653
# Group by primary key to remove any remaining unjoined or duplicate rows
0 commit comments