@@ -1109,10 +1109,10 @@ rm(contrast.names)
1109
1109
input_counts <- " /path/to/FeatureCounts_GLbulkRNAseq.tsv"
1110
1110
1111
1111
# ## Load featureCounts data ###
1112
- featurecounts <- read.csv(params $ input_counts , header = TRUE , sep = " \t " , skip = 1 )
1112
+ featurecounts <- read.csv(input_counts , header = TRUE , sep = " \t " , skip = 1 )
1113
1113
1114
1114
# ## Create counts matrix: remove metadata columns from featurecounts table, remove bam file extension from column names ###
1115
- row.names(featurecounts ) <- gsub( " - " , " . " , featurecounts $ Geneid )
1115
+ row.names(featurecounts ) <- featurecounts $ Geneid
1116
1116
counts <- featurecounts [,- c(1 : 6 )]
1117
1117
colnames(counts ) <- gsub(" \\ .bam$" , " " , colnames(counts ))
1118
1118
@@ -1254,7 +1254,7 @@ res_lrt <- results(dds_lrt)
1254
1254
1255
1255
``` R
1256
1256
# ## Initialize output table with normalized counts ###
1257
- output_table <- tibble :: rownames_to_column(normCounts , var = " gene_id" )
1257
+ output_table <- tibble :: rownames_to_column(normCounts , var = gene_id )
1258
1258
1259
1259
# ## Iterate through Wald Tests to generate pairwise comparisons of all groups ###
1260
1260
compute_contrast <- function (i ) {
@@ -1284,23 +1284,23 @@ output_table <- cbind(output_table, res_df)
1284
1284
1285
1285
# ## Add summary statistics ###
1286
1286
output_table $ All.mean <- rowMeans(normCounts , na.rm = TRUE )
1287
- output_table $ All.stdev <- rowSds(as.matrix(normCounts ), na.rm = TRUE )
1287
+ output_table $ All.stdev <- rowSds(as.matrix(normCounts ), na.rm = TRUE , useNames = FALSE )
1288
1288
output_table $ LRT.p.value <- res_lrt @ listData $ padj
1289
1289
1290
1290
# ## Add group-wise statistics ###
1291
1291
tcounts <- as.data.frame(t(normCounts ))
1292
1292
tcounts $ group <- names(group )
1293
1293
1294
1294
# Calculate group means and standard deviations
1295
- group_means <- as.data.frame(t( aggregate(. ~ group , data = tcounts , mean )) )
1296
- group_stdev <- as.data.frame(t( aggregate(. ~ group , data = tcounts , sd )) )
1297
-
1298
- # Remove group name rows
1299
- group_means <- group_means [ - 1 ,]
1300
- group_stdev <- group_stdev [ - 1 ,]
1295
+ group_means <- aggregate(. ~ group , data = tcounts , mean )
1296
+ group_stdev <- aggregate(. ~ group , data = tcounts , sd )
1297
+ group_means <- t( group_means [ - 1 ])
1298
+ group_stdev <- t( group_stdev [ - 1 ])
1299
+ colnames( group_means ) <- names( group )
1300
+ colnames( group_stdev ) <- names( group )
1301
1301
1302
1302
# For each group, add mean and stdev columns
1303
- for (group_name in names(group )) {
1303
+ for (group_name in unique( names(group ) )) {
1304
1304
mean_col <- paste0(" Group.Mean_(" , group_name , " )" )
1305
1305
stdev_col <- paste0(" Group.Stdev_(" , group_name , " )" )
1306
1306
output_table [[mean_col ]] <- group_means [, paste0(" Group.Mean_" , group_means [' group' ,])]
@@ -1320,10 +1320,24 @@ annot <- read.table(annotations_link,
1320
1320
output_table <- merge(annot , output_table , by = ' row.names' , all.y = TRUE )
1321
1321
output_table <- annot %> %
1322
1322
merge(output_table ,
1323
- by = params $ gene_id_type ,
1323
+ by = gene_id ,
1324
1324
all.y = TRUE
1325
1325
) %> %
1326
- select(all_of(params $ gene_id_type ), everything())
1326
+ select(all_of(gene_id ), everything())
1327
+
1328
+ if (! (gene_id %in% colnames(annot )) || ! (gene_id %in% colnames(output_table ))) {
1329
+ # If gene ID column is missing from either table, just write the original DGE table
1330
+ output_table2 <- output_table
1331
+ warning(paste(" Gene ID column" , gene_id , " not found in one or both tables." ))
1332
+ } else {
1333
+ # ## Combine annotations with data
1334
+ output_table <- annot %> %
1335
+ merge(output_table ,
1336
+ by = gene_id ,
1337
+ all.y = TRUE
1338
+ ) %> %
1339
+ select(all_of(gene_id ), everything()) # Make sure main gene ID is first column
1340
+ }
1327
1341
1328
1342
```
1329
1343
@@ -1334,7 +1348,7 @@ output_table <- annot %>%
1334
1348
- ` contrasts ` (matrix defining pairwise comparisons, output from [ Step 8c] ( #8c-configure-metadata-sample-grouping-and-group-comparisons ) )
1335
1349
- ` dds ` (DESeq2 data object containing normalized counts, experimental design, and differential expression results, output from [ Step 8e] ( #8e-perform-dge-analysis ) )
1336
1350
- ` annotations_link ` (variable containing URL to GeneLab gene annotation table, output from [ Step 8b] ( #8b-environment-set-up ) )
1337
-
1351
+ - ` gene_id ` (Gene id type, e.g. ENSEMBL, used to merge the annotations with the DGE results)
1338
1352
** Output Data:**
1339
1353
1340
1354
- ` output_table ` (data frame containing the following columns:
0 commit comments