@@ -115,16 +115,16 @@ rm(contrast.names)
115
115
```
116
116
117
117
### 4. Load Gene Counts
118
- ``` {r load-gene-counts }
119
- ##### Import Bowtie 2 or RSEM raw (gene) count data #####
118
+ ``` {r load-gene-counts}
119
+ ##### Import FeatureCounts or RSEM count data #####
120
120
if (params$microbes) {
121
121
# For microbes, look for FeatureCounts TSV file
122
122
if (!file.exists(params$input_counts)) {
123
123
stop(paste("FeatureCounts file not found at:", params$input_counts))
124
124
}
125
125
# Load featureCounts data with tab separator
126
126
featurecounts <- read.csv(params$input_counts, header = TRUE, sep = "\t", skip = 1)
127
- # Create counts matrix: remove metadata columns from featurecounts table , remove possible .bam from column names
127
+ # Create counts matrix: remove metadata columns, remove possible .bam from column names
128
128
row.names(featurecounts) <- gsub("-", ".", featurecounts$Geneid)
129
129
counts <- featurecounts[,-c(1:6)]
130
130
colnames(counts) <- gsub("\\.bam$", "", colnames(counts))
@@ -138,36 +138,45 @@ if (params$microbes) {
138
138
full.names = TRUE
139
139
)
140
140
samples = rownames(study)
141
- # Reorder files based on sample names without specifying "Rsem_gene_counts/" folder
141
+ # Reorder files based on sample names
142
142
reordering <- sapply(samples, function(x) grep(paste0(x, ".genes.results$"), files, value = FALSE))
143
143
files <- files[reordering]
144
144
names(files) <- samples
145
145
txi.rsem <- tximport(files, type = "rsem", txIn = FALSE, txOut = FALSE)
146
146
if (dim(txi.rsem$counts)[2] != nrow(study)) {
147
147
stop("Sample count mismatch between imported gene results and runsheet")
148
148
}
149
- ## Add 1 to genes with lengths of zero - needed to make DESeqDataSet object
150
- print(sprintf("DEBUG: %s: Converting %d zero length genes to 1-length of %d genes (%f %% total)", Sys.time(), length(txi.rsem$length[txi.rsem$length == 0]), length(txi.rsem$length), length(txi.rsem$length[txi.rsem$length == 0])/length(txi.rsem$length)))
151
- txi.rsem$length[txi.rsem$length == 0] <- 1
152
149
}
150
+ ##### Apply debug options if enabled #####
153
151
if (params$DEBUG_MODE_LIMIT_GENES) {
154
152
if (params$microbes) {
155
- counts <- counts[, 1:150]
156
- print(sprintf("DEBUG: %s: Limiting analysis to last 150 genes", Sys.time()))
153
+ counts <- counts[1:150,]
157
154
} else {
158
- txi.rsem$counts <- txi.rsem$counts[, 1:150]
159
- print(sprintf("DEBUG: %s: Limiting analysis to last 150 genes", Sys.time()))
155
+ txi.rsem$counts <- txi.rsem$counts[1:150,]
160
156
}
157
+ print(sprintf("DEBUG: %s: Limiting analysis to first 150 genes", Sys.time()))
161
158
}
162
159
if (params$DEBUG_MODE_ADD_DUMMY_COUNTS) {
163
160
set.seed(1)
164
161
if (params$microbes) {
165
- counts <- counts + matrix(sample( 0:5000, NROW(counts)*NCOL(counts), replace=TRUE),nrow=NROW(counts))
162
+ counts <- counts + matrix(
163
+ sample(0:5000, NROW(counts)*NCOL(counts), replace=TRUE),
164
+ nrow=NROW(counts)
165
+ )
166
166
} else {
167
- txi.rsem$counts <- txi.rsem$counts + matrix(sample( 0:5000, NROW(txi.rsem$counts)*NCOL(txi.rsem$counts), replace=TRUE),nrow=NROW(txi.rsem$counts))
167
+ txi.rsem$counts <- txi.rsem$counts + matrix(
168
+ sample(0:5000, NROW(txi.rsem$counts)*NCOL(txi.rsem$counts), replace=TRUE),
169
+ nrow=NROW(txi.rsem$counts)
170
+ )
168
171
}
169
172
print(sprintf("DEBUG: %s: Replacing original counts with random values from 0 to 5000", Sys.time()))
170
173
}
174
+ if (params$microbes) {
175
+ } else {
176
+ ## Add 1 to genes with lengths of zero - needed to make DESeqDataSet object
177
+ print(sprintf("DEBUG: %s: Converting %d zero length genes to 1-length of %d genes (%f %% total)", Sys.time(), length(txi.rsem$length[txi.rsem$length == 0]), length(txi.rsem$length), length(txi.rsem$length[txi.rsem$length == 0])/length(txi.rsem$length)))
178
+ txi.rsem$length[txi.rsem$length == 0] <- 1
179
+ }
171
180
```
172
181
173
182
``` {r create-sample-table}
@@ -281,12 +290,6 @@ dds <- DESeq(dds, parallel = TRUE, BPPARAM = BPPARAM)
281
290
``` {r output-counts-related-files}
282
291
normCounts <- as.data.frame(counts(dds, normalized = TRUE))
283
292
VSTCounts <- as.data.frame(assay(vst(dds)))
284
- unnormalized_counts_filename <- if (params$microbes) {
285
- "FeatureCounts_Unnormalized_Counts_GLbulkRNAseq.csv"
286
- } else {
287
- "RSEM_Unnormalized_Counts_GLbulkRNAseq.csv"
288
- }
289
- normalized_counts_filename <- paste0("Normalized_Counts", params$output_filename_suffix, ".csv")
290
293
write.csv(
291
294
if (params$microbes) {
292
295
counts
@@ -312,12 +315,7 @@ write.csv(
312
315
313
316
``` {r prep-counts-for-dge}
314
317
## Add 1 to all counts to avoid issues with log transformation
315
- print(sprintf("DEBUG: %s Printing head of: '%s' below", Sys.time(), 'normCounts'))
316
- print(head(normCounts), quote = TRUE)
317
- print(sprintf("DEBUG: %s: Adding 1 to all normalized counts to avoid issues with log transformation", Sys.time()))
318
318
normCounts <- normCounts + 1
319
- print(sprintf("DEBUG: %s Printing head of: '%s' below", Sys.time(), 'normCounts'))
320
- print(head(normCounts), quote = TRUE)
321
319
## output table 1 will be used to generate computer-readable DGE table,
322
320
## which is used to create GeneLab visualization plots
323
321
output_table <- tibble::rownames_to_column(normCounts, var = params$gene_id_type)
@@ -334,7 +332,7 @@ lrt_pvalues <- res_lrt@listData$padj
334
332
335
333
``` {r wald-test-iteration}
336
334
## Iterate through Wald Tests to generate pairwise comparisons of all groups
337
- compute_contrast <- function(i) {
335
+ compute_contrast <- function(dds, i) {
338
336
res <- results(
339
337
dds,
340
338
contrast = c("condition", contrasts[1, i], contrasts[2, i]),
@@ -349,8 +347,7 @@ compute_contrast <- function(i) {
349
347
)
350
348
return(res_df)
351
349
}
352
- # Use bplapply to compute results in parallel
353
- res_list <- bplapply(1:dim(contrasts)[2], compute_contrast, BPPARAM = BPPARAM)
350
+ res_list <- bplapply(1:dim(contrasts)[2], function(i) compute_contrast(dds, i), BPPARAM = BPPARAM)
354
351
# Combine the list of data frames into a single data frame
355
352
res_df <- do.call(cbind, res_list)
356
353
# Combine with the existing output_table
0 commit comments