Skip to content

Commit 1867a12

Browse files
committed
rnaseq doc updates
1 parent c4c895f commit 1867a12

File tree

3 files changed

+30
-24
lines changed

3 files changed

+30
-24
lines changed

RNAseq/Pipeline_GL-DPPD-7101_Versions/GL-DPPD-7101-G.md

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -169,10 +169,14 @@ DESeq2 Analysis Workflow
169169
|read_distribution|5.0.4|[http://rseqc.sourceforge.net/#read-distribution-py](http://rseqc.sourceforge.net/#read-distribution-py)|
170170
|R|4.4.2|[https://www.r-project.org/](https://www.r-project.org/)|
171171
|Bioconductor|3.20|[https://bioconductor.org](https://bioconductor.org)|
172+
|BiocParallel|1.40.0|[https://bioconductor.org/packages/release/bioc/html/BiocParallel.html](https://bioconductor.org/packages/release/bioc/html/BiocParallel.html)|
172173
|DESeq2|1.46.0|[https://bioconductor.org/packages/release/bioc/html/DESeq2.html](https://bioconductor.org/packages/release/bioc/html/DESeq2.html)|
173174
|tximport|1.34.0|[https://github.com/mikelove/tximport](https://github.com/mikelove/tximport)|
174175
|tidyverse|2.0.0|[https://www.tidyverse.org](https://www.tidyverse.org)|
176+
|dplyr|1.1.4|[https://dplyr.tidyverse.org/](https://dplyr.tidyverse.org/)|
177+
|knitr|1.49|[https://yihui.org/knitr/](https://yihui.org/knitr/)|
175178
|stringr|1.5.1|[https://github.com/tidyverse/stringr](https://github.com/tidyverse/stringr)|
179+
|yaml|2.3.10|[https://github.com/yaml/yaml](https://github.com/yaml/yaml)|
176180
|dp_tools|1.3.5|[https://github.com/J-81/dp_tools](https://github.com/J-81/dp_tools)|
177181
|pandas|2.2.3|[https://github.com/pandas-dev/pandas](https://github.com/pandas-dev/pandas)|
178182
|seaborn|0.13.2|[https://seaborn.pydata.org/](https://seaborn.pydata.org/)|
@@ -1320,7 +1324,7 @@ txi.rsem$length[txi.rsem$length == 0] <- 1
13201324
```
13211325

13221326
**Input Data:**
1323-
1327+
* *genes.results (RSEM counts per gene, output from [Step 8a](#8a-count-aligned-reads-with-rsem) or from [Step 8dii](#8dii-filter-rrna-genes-from-rsem-genes-results) when using rRNA-removed count data)
13241328
* `study` (data frame containing sample condition values, output from [Step 9c](#9c-create-study-group-and-contrasts))
13251329

13261330
**Output Data:**
@@ -1440,7 +1444,7 @@ res_lrt <- results(dds_lrt)
14401444
**Output Data:**
14411445

14421446
* `sampleTable` (data frame mapping samples to groups)
1443-
* `dds` (DESeq2 data object containing normalized counts and experimental design)
1447+
* `dds` (DESeq2 data object containing normalized counts, experimental design, and differential expression results)
14441448
* `normCounts` (data frame of normalized count values + 1)
14451449
* `VSTCounts` (data frame of variance stabilized transformed counts)
14461450
* `dds_lrt` (DESeq2 data object from likelihood ratio test)
@@ -1455,24 +1459,21 @@ res_lrt <- results(dds_lrt)
14551459
### Initialize output table with normalized counts ###
14561460
output_table <- tibble::rownames_to_column(normCounts, var = "ENSEMBL")
14571461

1458-
### Add LRT p-values ###
1459-
output_table$LRT.p.value <- res_lrt@listData$padj
1460-
14611462
### Iterate through Wald Tests to generate pairwise comparisons of all groups ###
14621463
compute_contrast <- function(i) {
1463-
res_1 <- results(
1464-
dds_1,
1464+
res <- results(
1465+
dds,
14651466
contrast = c("condition", contrasts[1, i], contrasts[2, i]),
14661467
parallel = FALSE # Disable internal parallelization
14671468
)
1468-
res_1_df <- as.data.frame(res_1@listData)[, c(2, 4, 5, 6)]
1469-
colnames(res_1_df) <- c(
1469+
res_df <- as.data.frame(res@listData)[, c(2, 4, 5, 6)]
1470+
colnames(res_df) <- c(
14701471
paste0("Log2fc_", colnames(contrasts)[i]),
14711472
paste0("Stat_", colnames(contrasts)[i]),
14721473
paste0("P.value_", colnames(contrasts)[i]),
14731474
paste0("Adj.p.value_", colnames(contrasts)[i])
14741475
)
1475-
return(res_1_df)
1476+
return(res_df)
14761477
}
14771478

14781479
### Use bplapply to compute results in parallel ###
@@ -1487,7 +1488,7 @@ output_table <- cbind(output_table, res_df)
14871488
### Add summary statistics ###
14881489
output_table$All.mean <- rowMeans(normCounts, na.rm = TRUE)
14891490
output_table$All.stdev <- rowSds(as.matrix(normCounts), na.rm = TRUE)
1490-
output_table$LRT.p.value <- res_1_lrt@listData$padj
1491+
output_table$LRT.p.value <- res_lrt@listData$padj
14911492

14921493
### Add group-wise statistics ###
14931494
tcounts <- as.data.frame(t(normCounts))
@@ -1532,6 +1533,7 @@ output_table <- output_table %>%
15321533
* `normCounts` (data frame of normalized counts, output from [Step 9e](#9e-perform-dge-analysis))
15331534
* `res_lrt` (results object from likelihood ratio test, output from [Step 9e](#9e-perform-dge-analysis))
15341535
* `contrasts` (matrix defining pairwise comparisons, output from [Step 9c](#9c-configure-metadata-sample-grouping-and-group-comparisons))
1536+
* `dds` (DESeq2 data object containing normalized counts, experimental design, and differential expression results, output from [Step 9e](#9e-perform-dge-analysis))
15351537
* `annotations_link` (variable containing URL to GeneLab annotation table, output from [Step 9b](#9b-environment-set-up))
15361538

15371539
**Output Data:**
@@ -1604,6 +1606,7 @@ sessionInfo()
16041606
* **contrasts_GLbulkRNAseq.csv** (table listing all pairwise group comparisons)
16051607
* **differential_expression_GLbulkRNAseq.csv** (DGE results table containing the following columns:
16061608
- Gene identifier column (ENSEMBL or TAIR for plant studies)
1609+
- Additional organism-specific gene annotations columns
16071610
- Normalized counts
16081611
- For each pairwise group comparison:
16091612
- Log2 fold change
@@ -1615,8 +1618,7 @@ sessionInfo()
16151618
- LRT.p.value (likelihood ratio test adjusted p-value)
16161619
- For each group:
16171620
- Group.Mean_(group) (mean within group)
1618-
- Group.Stdev_(group) (standard deviation within group)
1619-
- Additional organism-specific gene annotations columns)
1621+
- Group.Stdev_(group) (standard deviation within group))
16201622

16211623
> Note: Datasets with technical replicates are handled by collapsing them such that the minimum number of equal technical replicates is retained across all samples. Before normalization, the counts of technical replicates are summed to combine them into a single sample representing the biological replicate.
16221624

RNAseq/Workflow_Documentation/NF_RCP/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
3535
- RSeQC tools 5.0.4
3636
- R 4.4.2
3737
- Bioconductor 3.20
38+
- BiocParallel 1.40.0
3839
- DESeq2 1.46.0
3940
- tximport 1.34.0
4041
- tidyverse 2.0.0
42+
- dplyr 1.1.4
43+
- knitr 1.49
4144
- stringr 1.5.1
45+
- yaml 2.3.10
4246
- dp_tools 1.3.5
4347
- pandas 2.2.3
4448
- seaborn 0.13.2

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/bin/deseq2_dge.Rmd

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -302,12 +302,12 @@ dim(dds)
302302
if (length(grep("ERCC-", rownames(dds))) != 0) {
303303
dds <- dds[-c(grep("ERCC-", rownames(dds))), ]
304304
}
305-
dds_1 <- DESeq(dds, parallel = TRUE, BPPARAM = BPPARAM)
305+
dds <- DESeq(dds, parallel = TRUE, BPPARAM = BPPARAM)
306306
```
307307

308308
```{r output-counts-related-files}
309-
normCounts <- as.data.frame(counts(dds_1, normalized = TRUE))
310-
VSTCounts <- as.data.frame(assay(vst(dds_1)))
309+
normCounts <- as.data.frame(counts(dds, normalized = TRUE))
310+
VSTCounts <- as.data.frame(assay(vst(dds)))
311311
unnormalized_counts_filename <- if (params$microbes) {
312312
"FeatureCounts_Unnormalized_Counts_GLbulkRNAseq.csv"
313313
} else {
@@ -351,28 +351,28 @@ output_table <- tibble::rownames_to_column(normCounts, var = params$gene_id_type
351351
##### Generate F statistic p-value (similar to ANOVA p-value) using DESeq2 likelihood ratio test (LRT) design #####
352352
353353
print(sprintf("DEBUG: %s: Generating Likelihood Ratio Test Based Statistics", Sys.time()))
354-
dds_1_lrt <- DESeq(dds_1, test = "LRT", reduced = ~1)
355-
res_1_lrt <- results(dds_1_lrt)
354+
dds_lrt <- DESeq(dds, test = "LRT", reduced = ~1)
355+
res_lrt <- results(dds_lrt)
356356
# Store LRT p-value (add after means and stdevs)
357-
lrt_pvalues <- res_1_lrt@listData$padj
357+
lrt_pvalues <- res_lrt@listData$padj
358358
```
359359

360360
```{r wald-test-iteration}
361361
## Iterate through Wald Tests to generate pairwise comparisons of all groups
362362
compute_contrast <- function(i) {
363-
res_1 <- results(
364-
dds_1,
363+
res <- results(
364+
dds,
365365
contrast = c("condition", contrasts[1, i], contrasts[2, i]),
366366
parallel = FALSE # Disable internal parallelization
367367
)
368-
res_1_df <- as.data.frame(res_1@listData)[, c(2, 4, 5, 6)]
369-
colnames(res_1_df) <- c(
368+
res_df <- as.data.frame(res@listData)[, c(2, 4, 5, 6)]
369+
colnames(res_df) <- c(
370370
paste0("Log2fc_", colnames(contrasts)[i]),
371371
paste0("Stat_", colnames(contrasts)[i]),
372372
paste0("P.value_", colnames(contrasts)[i]),
373373
paste0("Adj.p.value_", colnames(contrasts)[i])
374374
)
375-
return(res_1_df)
375+
return(res_df)
376376
}
377377
378378
# Use bplapply to compute results in parallel

0 commit comments

Comments
 (0)