@@ -54,9 +54,9 @@ Software Updates:
54
54
| dp_tools | 1.18* , 1.3.4* | 1.3.5 |
55
55
| pandas | 1.5.0 | 2.2.3 |
56
56
| seaborn | 0.12.0 | 0.13.2 |
57
- | matplotlib | 3.6.0 | 3.8.3 - DOES NOT MATCH VERSION SPECIFIED IN SOFTWARE TABLE BELOW |
58
- | numpy | 1.23.3 | 1.26.4 - DOES NOT MATCH VERSION SPECIFIED IN SOFTWARE TABLE BELOW |
59
- | scipy | 1.9.1 | 1.14 .1 - DOES NOT MATCH VERSION SPECIFIED IN SOFTWARE TABLE BELOW |
57
+ | matplotlib | 3.6.0 | 3.10.0 |
58
+ | numpy | 1.23.3 | 2.2.1 |
59
+ | scipy | 1.9.1 | 1.15 .1 |
60
60
61
61
STAR Alignment
62
62
- Added unaligned reads FASTQ output file(s) via STAR ` -outReadsUnmapped Fastq ` :
@@ -227,7 +227,7 @@ zip -r raw_multiqc_GLbulkRNAseq_report.zip raw_multiqc_GLbulkRNAseq_report
227
227
- ` --interactive ` – force reports to use interactive plots
228
228
- ` -n ` – prefix name for output files
229
229
- ` -o ` – the output directory to store results
230
- - ` /path/to/directory/containing/raw_fastqc/files ` – the directory holding the output data from the fastqc run, provided as a positional argument
230
+ - ` /path/to/directory/containing/raw_fastqc/files ` – the directory holding the output data from the FastQC run, provided as a positional argument
231
231
232
232
** Input Data:**
233
233
@@ -236,8 +236,8 @@ zip -r raw_multiqc_GLbulkRNAseq_report.zip raw_multiqc_GLbulkRNAseq_report
236
236
** Output Data:**
237
237
238
238
* ** raw_multiqc_GLbulkRNAseq_report.zip** (zip containing the following)
239
- * ** raw_multiqc_GLbulkRNAseq.html** (multiqc output html summary)
240
- * ** raw_multiqc_GLbulkRNAseq_data** (directory containing multiqc output data)
239
+ * ** raw_multiqc_GLbulkRNAseq.html** (MultiQC output html summary)
240
+ * ** raw_multiqc_GLbulkRNAseq_data** (directory containing MultiQC output data)
241
241
242
242
<br >
243
243
@@ -317,7 +317,7 @@ zip -r trimmed_multiqc_GLbulkRNAseq_report.zip /path/to/trimmed_multiqc/output/t
317
317
- ` --interactive ` – force reports to use interactive plots
318
318
- ` -n ` – prefix name for output files
319
319
- ` -o ` – the output directory to store results
320
- - ` /path/to/directory/containing/trimmed_fastqc/files ` – the directory holding the output data from the fastqc run, provided as a positional argument
320
+ - ` /path/to/directory/containing/trimmed_fastqc/files ` – the directory holding the output data from the FastQC run, provided as a positional argument
321
321
322
322
** Input Data:**
323
323
@@ -326,8 +326,8 @@ zip -r trimmed_multiqc_GLbulkRNAseq_report.zip /path/to/trimmed_multiqc/output/t
326
326
** Output Data:**
327
327
328
328
* ** trimmed_multiqc_GLbulkRNAseq_report.zip** (zip containing the following)
329
- * ** trimmed_multiqc_GLbulkRNAseq.html** (multiqc output html summary)
330
- * ** trimmed_multiqc_GLbulkRNAseq_data** (directory containing multiqc output data)
329
+ * ** trimmed_multiqc_GLbulkRNAseq.html** (MultiQC output html summary)
330
+ * ** trimmed_multiqc_GLbulkRNAseq_data** (directory containing MultiQC output data)
331
331
332
332
<br >
333
333
@@ -495,8 +495,8 @@ zip -r align_multiqc_GLbulkRNAseq_report.zip /path/to/align_multiqc/output/align
495
495
** Output Data:**
496
496
497
497
* ** align_multiqc_GLbulkRNAseq_report.zip** (zip containing the following)
498
- * ** align_multiqc_GLbulkRNAseq.html** (multiqc output html summary)
499
- * ** align_multiqc_GLbulkRNAseq_data** (directory containing multiqc output data)
498
+ * ** align_multiqc_GLbulkRNAseq.html** (MultiQC output html summary)
499
+ * ** align_multiqc_GLbulkRNAseq_data** (directory containing MultiQC output data)
500
500
501
501
<br >
502
502
@@ -708,8 +708,8 @@ zip -r infer_exp_multiqc_GLbulkRNAseq_report.zip /path/to/infer_exp_multiqc/outp
708
708
** Output Data:**
709
709
710
710
* ** infer_exp_multiqc_GLbulkRNAseq_report.zip** (zip containing the following)
711
- * ** infer_exp_multiqc_GLbulkRNAseq.html** (multiqc output html summary)
712
- * ** infer_exp_multiqc_GLbulkRNAseq_data** (directory containing multiqc output data)
711
+ * ** infer_exp_multiqc_GLbulkRNAseq.html** (MultiQC output html summary)
712
+ * ** infer_exp_multiqc_GLbulkRNAseq_data** (directory containing MultiQC output data)
713
713
714
714
<br >
715
715
@@ -764,8 +764,8 @@ zip -r genebody_cov_multiqc_GLbulkRNAseq_report.zip /path/to/genebody_cov_multiq
764
764
** Output Data:**
765
765
766
766
* ** genebody_cov_multiqc_GLbulkRNAseq_report.zip** (zip containing the following)
767
- * ** genebody_cov_multiqc_GLbulkRNAseq.html** (multiqc output html summary)
768
- * ** genebody_cov_multiqc_GLbulkRNAseq_data** (directory containing multiqc output data)
767
+ * ** genebody_cov_multiqc_GLbulkRNAseq.html** (MultiQC output html summary)
768
+ * ** genebody_cov_multiqc_GLbulkRNAseq_data** (directory containing MultiQC output data)
769
769
770
770
<br >
771
771
@@ -776,7 +776,7 @@ inner_distance.py -r /path/to/annotation/BED/file \
776
776
-i /path/to/* Aligned.sortedByCoord_sorted.out.bam \
777
777
-k 15000000 \
778
778
-l -(max read length) \
779
- -u 350 \ [SHOULD WE INCREASE THIS VALUE ? ]
779
+ -u 350 \
780
780
-o /path/to/inner_distance/output/directory
781
781
```
782
782
@@ -826,8 +826,8 @@ zip -r inner_dist_multiqc_GLbulkRNAseq_report.zip /path/to/align_multiqc/output/
826
826
** Output Data:**
827
827
828
828
* ** inner_dist_multiqc_GLbulkRNAseq_report.zip** (zip containing the following)
829
- * ** inner_dist_multiqc_GLbulkRNAseq.html** (multiqc output html summary)
830
- * ** inner_dist_multiqc_GLbulkRNAseq_data** (directory containing multiqc output data)
829
+ * ** inner_dist_multiqc_GLbulkRNAseq.html** (MultiQC output html summary)
830
+ * ** inner_dist_multiqc_GLbulkRNAseq_data** (directory containing MultiQC output data)
831
831
832
832
<br >
833
833
@@ -879,8 +879,8 @@ zip -r read_dist_multiqc_GLbulkRNAseq_report.zip /path/to/read_dist_multiqc/outp
879
879
** Output Data:**
880
880
881
881
* ** read_dist_multiqc_GLbulkRNAseq_report.zip** (zip containing the following)
882
- * ** read_dist_multiqc_GLbulkRNAseq.html** (multiqc output html summary)
883
- * ** read_dist_multiqc_GLbulkRNAseq_data** (directory containing multiqc output data)
882
+ * ** read_dist_multiqc_GLbulkRNAseq.html** (MultiQC output html summary)
883
+ * ** read_dist_multiqc_GLbulkRNAseq_data** (directory containing MultiQC output data)
884
884
885
885
<br >
886
886
@@ -996,8 +996,8 @@ zip -r RSEM_count_multiqc_GLbulkRNAseq_report.zip /path/to/raw_multiqc/output/RS
996
996
** Output Data:**
997
997
998
998
* ** RSEM_count_multiqc_GLbulkRNAseq_report.zip** (zip containing the following)
999
- * ** RSEM_count_multiqc_GLbulkRNAseq.html** (multiqc output html summary)
1000
- * ** RSEM_count_multiqc_GLbulkRNAseq_data** (directory containing multiqc output data)
999
+ * ** RSEM_count_multiqc_GLbulkRNAseq.html** (MultiQC output html summary)
1000
+ * ** RSEM_count_multiqc_GLbulkRNAseq_data** (directory containing MultiQC output data)
1001
1001
1002
1002
<br >
1003
1003
@@ -1101,7 +1101,7 @@ echo "*: ${rRNA_count} rRNA entries removed." > *_rRNA_counts.txt
1101
1101
1102
1102
## 9. Normalize Read Counts and Perform Differential Gene Expression Analysis
1103
1103
1104
- > ** Note:** DGE Analysis is performed twice with different sets of input files:
1104
+ > Note: DGE Analysis is performed twice with different sets of input files:
1105
1105
> 1 . Using RSEM genes.results files (* genes.results, output from [ Step 8a] ( #8a-count-aligned-reads-with-rsem ) ))
1106
1106
> 2 . Using rRNA-removed RSEM genes.results files (* rRNA_removed.genes.results, output from [ Step 8dii] ( #8dii-filter-rrna-genes-from-rsem-genes-results ) )
1107
1107
@@ -1219,11 +1219,13 @@ setwd(file.path(work_dir))
1219
1219
# ## Pull all factors for each sample in the study from the runsheet created in Step 9a ###
1220
1220
1221
1221
compare_csv_from_runsheet <- function (runsheet_path ) {
1222
- df = read.csv(runsheet_path )
1223
- # get only Factor Value columns
1224
- factors = as.data.frame(df [,grep(" Factor.Value" , colnames(df ), ignore.case = TRUE )])
1225
- colnames(factors ) = paste(" factor" ,1 : dim(factors )[2 ], sep = " _" )
1226
- result = data.frame (sample_id = df [,c(" Sample.Name" )], factors )
1222
+ df <- read.csv(runsheet_path )
1223
+ factors <- df %> %
1224
+ select(matches(" Factor.Value" , ignore.case = TRUE )) %> %
1225
+ rename_with(~ paste0(" factor_" , seq_along(. )))
1226
+ result <- df %> %
1227
+ select(sample_id = Sample.Name ) %> %
1228
+ bind_cols(factors )
1227
1229
return (result )
1228
1230
}
1229
1231
@@ -1235,20 +1237,19 @@ compare_csv <- compare_csv_from_runsheet(runsheet_path)
1235
1237
1236
1238
# ## Create data frame containing all samples and respective factors ###
1237
1239
1238
- study <- as.data.frame(compare_csv [,2 : dim(compare_csv )[2 ]])
1239
- colnames(study ) <- colnames(compare_csv )[2 : dim(compare_csv )[2 ]]
1240
- rownames(study ) <- compare_csv [,1 ]
1240
+ study <- compare_csv [, - 1 , drop = FALSE ] # Exclude sample_id
1241
+ rownames(study ) <- compare_csv $ sample_id
1241
1242
1242
1243
1243
1244
# ## Format groups and indicate the group that each sample belongs to ###
1244
1245
1245
- if (dim (study ) > = 2 ){
1246
- group <- apply(study ,1 , paste ,collapse = " & " ) # # concatenate multiple factors into one condition per sample
1247
- } else {
1248
- group <- study [, 1 ]
1246
+ group <- if (ncol (study ) > = 2 ) {
1247
+ apply(study , 1 , paste , collapse = " & " )
1248
+ } else {
1249
+ study [[ 1 ] ]
1249
1250
}
1250
- group_names <- paste0(" (" ,group ," ) " , sep = " " ) # # human readable group names
1251
- group <- sub(" ^BLOCKER_" , " " , make.names(paste0(" BLOCKER_" , group ))) # group naming compatible with R models, this maintains the default behaviour of make.names with the exception that 'X' is never prepended to group names
1251
+ group_names <- paste0(" (" , group , " ) " ) # # human readable group names
1252
+ group <- sub(" ^BLOCKER_" , " " , make.names(paste0(" BLOCKER_" , group ))) # group naming compatible with R models, this maintains the default behaviour of make.names with the exception that 'X' is never prepended to group names
1252
1253
names(group ) <- group_names
1253
1254
rm(group_names )
1254
1255
@@ -1492,6 +1493,9 @@ write.csv(txi.rsem$counts,
1492
1493
write.csv(normCounts ,
1493
1494
file.path(norm_output , " Normalized_Counts_GLbulkRNAseq.csv" ))
1494
1495
1496
+ write.csv(VSTCounts ,
1497
+ file.path(norm_output , " VST_Counts_GLbulkRNAseq.csv" ))
1498
+
1495
1499
# ## Export sample grouping and contrasts tables ###
1496
1500
write.csv(sampleTable ,
1497
1501
file.path(DGE_output , " SampleTable_GLbulkRNAseq.csv" ))
0 commit comments