Skip to content

Commit a690b1d

Browse files
committed
clean up dge step
1 parent cc0cb5e commit a690b1d

File tree

2 files changed

+37
-20
lines changed

2 files changed

+37
-20
lines changed

RNAseq/Pipeline_GL-DPPD-7115_Versions/GL-DPPD-7115.md

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,10 +1109,10 @@ rm(contrast.names)
11091109
input_counts <- "/path/to/FeatureCounts_GLbulkRNAseq.tsv"
11101110

11111111
### Load featureCounts data ###
1112-
featurecounts <- read.csv(params$input_counts, header = TRUE, sep = "\t", skip = 1)
1112+
featurecounts <- read.csv(input_counts, header = TRUE, sep = "\t", skip = 1)
11131113

11141114
### Create counts matrix: remove metadata columns from featurecounts table, remove bam file extension from column names ###
1115-
row.names(featurecounts) <- gsub("-", ".", featurecounts$Geneid)
1115+
row.names(featurecounts) <- featurecounts$Geneid
11161116
counts <- featurecounts[,-c(1:6)]
11171117
colnames(counts) <- gsub("\\.bam$", "", colnames(counts))
11181118

@@ -1254,7 +1254,7 @@ res_lrt <- results(dds_lrt)
12541254

12551255
```R
12561256
### Initialize output table with normalized counts ###
1257-
output_table <- tibble::rownames_to_column(normCounts, var = "gene_id")
1257+
output_table <- tibble::rownames_to_column(normCounts, var = gene_id)
12581258

12591259
### Iterate through Wald Tests to generate pairwise comparisons of all groups ###
12601260
compute_contrast <- function(i) {
@@ -1284,23 +1284,23 @@ output_table <- cbind(output_table, res_df)
12841284

12851285
### Add summary statistics ###
12861286
output_table$All.mean <- rowMeans(normCounts, na.rm = TRUE)
1287-
output_table$All.stdev <- rowSds(as.matrix(normCounts), na.rm = TRUE)
1287+
output_table$All.stdev <- rowSds(as.matrix(normCounts), na.rm = TRUE, useNames = FALSE)
12881288
output_table$LRT.p.value <- res_lrt@listData$padj
12891289

12901290
### Add group-wise statistics ###
12911291
tcounts <- as.data.frame(t(normCounts))
12921292
tcounts$group <- names(group)
12931293

12941294
# Calculate group means and standard deviations
1295-
group_means <- as.data.frame(t(aggregate(. ~ group, data = tcounts, mean)))
1296-
group_stdev <- as.data.frame(t(aggregate(. ~ group, data = tcounts, sd)))
1297-
1298-
# Remove group name rows
1299-
group_means <- group_means[-1,]
1300-
group_stdev <- group_stdev[-1,]
1295+
group_means <- aggregate(. ~ group, data = tcounts, mean)
1296+
group_stdev <- aggregate(. ~ group, data = tcounts, sd)
1297+
group_means <- t(group_means[-1])
1298+
group_stdev <- t(group_stdev[-1])
1299+
colnames(group_means) <- names(group)
1300+
colnames(group_stdev) <- names(group)
13011301

13021302
# For each group, add mean and stdev columns
1303-
for (group_name in names(group)) {
1303+
for (group_name in unique(names(group))) {
13041304
mean_col <- paste0("Group.Mean_(", group_name, ")")
13051305
stdev_col <- paste0("Group.Stdev_(", group_name, ")")
13061306
output_table[[mean_col]] <- group_means[, paste0("Group.Mean_", group_means['group',])]
@@ -1320,10 +1320,24 @@ annot <- read.table(annotations_link,
13201320
output_table <- merge(annot, output_table, by='row.names', all.y=TRUE)
13211321
output_table <- annot %>%
13221322
merge(output_table,
1323-
by = params$gene_id_type,
1323+
by = gene_id,
13241324
all.y = TRUE
13251325
) %>%
1326-
select(all_of(params$gene_id_type), everything())
1326+
select(all_of(gene_id), everything())
1327+
1328+
if (!(gene_id %in% colnames(annot)) || !(gene_id %in% colnames(output_table))) {
1329+
# If gene ID column is missing from either table, just write the original DGE table
1330+
output_table2 <- output_table
1331+
warning(paste("Gene ID column", gene_id, "not found in one or both tables."))
1332+
} else {
1333+
### Combine annotations with data
1334+
output_table <- annot %>%
1335+
merge(output_table,
1336+
by = gene_id,
1337+
all.y = TRUE
1338+
) %>%
1339+
select(all_of(gene_id), everything()) # Make sure main gene ID is first column
1340+
}
13271341

13281342
```
13291343

@@ -1334,7 +1348,7 @@ output_table <- annot %>%
13341348
- `contrasts` (matrix defining pairwise comparisons, output from [Step 8c](#8c-configure-metadata-sample-grouping-and-group-comparisons))
13351349
- `dds` (DESeq2 data object containing normalized counts, experimental design, and differential expression results, output from [Step 8e](#8e-perform-dge-analysis))
13361350
- `annotations_link` (variable containing URL to GeneLab gene annotation table, output from [Step 8b](#8b-environment-set-up))
1337-
1351+
- `gene_id` (Gene id type, e.g. ENSEMBL, used to merge the annotations with the DGE results)
13381352
**Output Data:**
13391353

13401354
- `output_table` (data frame containing the following columns:

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/bin/dge_deseq2.Rmd

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ if (params$microbes) {
125125
# Load featureCounts data with tab separator
126126
featurecounts <- read.csv(params$input_counts, header = TRUE, sep = "\t", skip = 1)
127127
# Create counts matrix: remove metadata columns, remove possible .bam from column names
128-
row.names(featurecounts) <- gsub("-", ".", featurecounts$Geneid)
128+
row.names(featurecounts) <- featurecounts$Geneid
129129
counts <- featurecounts[,-c(1:6)]
130130
colnames(counts) <- gsub("\\.bam$", "", colnames(counts))
131131
# Reorder counts columns to match runsheet
@@ -355,17 +355,20 @@ output_table <- cbind(output_table, res_df)
355355
```
356356

357357
```{r calculate-group-statistics}
358+
### Add summary statistics ###
359+
output_table$All.mean <- rowMeans(normCounts, na.rm = TRUE)
360+
output_table$All.stdev <- matrixStats::rowSds(as.matrix(normCounts), na.rm = TRUE, useNames = FALSE)
361+
output_table$LRT.p.value <- res_lrt@listData$padj
358362
## Calculate group statistics
359363
tcounts <- as.data.frame(t(normCounts))
360-
tcounts$group <- unique(names(group))
361-
# Compute group stats
364+
tcounts$group <- names(group)
365+
# Calculate group means and standard deviations
362366
group_means <- aggregate(. ~ group, data = tcounts, mean)
363367
group_stdev <- aggregate(. ~ group, data = tcounts, sd)
364-
# Transform data
365368
group_means <- t(group_means[-1]) # Remove group column and transpose
366369
group_stdev <- t(group_stdev[-1]) # Remove group column and transpose
367-
colnames(group_means) <- unique(names(group))
368-
colnames(group_stdev) <- unique(names(group))
370+
colnames(group_means) <- names(group)
371+
colnames(group_stdev) <- names(group)
369372
# Add stats to output table
370373
for (group_name in unique(names(group))) {
371374
output_table[[paste0("Group.Mean_(", group_name, ")")]] <- group_means[, group_name]

0 commit comments

Comments
 (0)