fix dge table gene annotation cols ordering

torres-alexis · torres-alexis · commit c06ba5bce9eb · 2025-02-01T17:17:18.000-08:00
diff --git a/RNAseq/Pipeline_GL-DPPD-7101_Versions/GL-DPPD-7101-G.md b/RNAseq/Pipeline_GL-DPPD-7101_Versions/GL-DPPD-7101-G.md
@@ -1521,10 +1521,12 @@ annot <- read.table(annotations_link,
 
 ### Combine annotations table and the DGE table ###
 output_table <- merge(annot, output_table, by='row.names', all.y=TRUE)
-output_table <- output_table %>% 
-  rename(
-    ENSEMBL = Row.names ## Change ENSEMBL to TAIR for plant studies ##
-  )
+output_table <- annot %>%
+    merge(output_table,
+        by = params$gene_id_type,
+        all.y = TRUE
+    ) %>%
+    select(all_of(params$gene_id_type), everything())
 
 ```
 
@@ -1540,6 +1542,7 @@ output_table <- output_table %>%
 
 * `output_table` (data frame containing the following columns:
   - Gene identifier column (ENSEMBL or TAIR for plant studies)
+  - Additional organism-specific gene annotations columns
   - Normalized counts for each sample
   - For each pairwise comparison:
     - Log2 fold change
@@ -1552,7 +1555,6 @@ output_table <- output_table %>%
   - For each experimental group:
     - Group.Mean_(group) (mean within group)
     - Group.Stdev_(group) (standard deviation within group))
-  - Additional organism-specific gene annotations columns
 
 <br>
 
diff --git a/RNAseq/Workflow_Documentation/NF_RCP/workflow_code/bin/add_gene_annotations.Rmd b/RNAseq/Workflow_Documentation/NF_RCP/workflow_code/bin/add_gene_annotations.Rmd
@@ -51,15 +51,20 @@ df <- read.csv(params$input_table_path,
 ```
 
 ```{r, add-annotations}
-### Combine annotations table and the (non-ERCC) normalized counts table
-df2 <- merge(
-                annot,
-                df,
-                by = params$gene_id_type,
-                # ensure all original dge rows are kept.
-                # If unmatched in the annotation database, then fill missing with NAN
-                all.y = TRUE
-            )
+### Check if gene ID column exists in both tables
+if (!(params$gene_id_type %in% colnames(annot)) || !(params$gene_id_type %in% colnames(df))) {
+    # If gene ID column is missing from either table, just write the original DGE table
+    df2 <- df
+    warning(paste("Gene ID column", params$gene_id_type, "not found in one or both tables."))
+} else {
+    ### Combine annotations with data
+    df2 <- annot %>%
+        merge(df,
+            by = params$gene_id_type,
+            all.y = TRUE 
+        ) %>%
+        select(all_of(params$gene_id_type), everything())  # Make sure main gene ID is first column
+}
 
 dir.create(dirname(paste0(params$output_directory)), recursive = TRUE)
 write.csv(