Skip to content

Commit 4852bec

Browse files
authored
Merge pull request #113 from greenelab/envest/41-ensure_numeric_gex
Envest/41 ensure numeric gex
2 parents 6be34a2 + e65eba7 commit 4852bec

15 files changed

+250
-74
lines changed

2A-small_n_differential_expression.R

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ ncores <- min(parallel::detectCores() - 1,
4242
# set seed
4343
initial.seed <- opt$seed
4444
set.seed(initial.seed)
45+
46+
# set additional random seeds for reproducibility within foreach dopar loops
47+
random_seeds <- sample(1:10000, size = 9)
48+
4549
message(paste("\nInitial seed set to:", initial.seed))
4650

4751
# define directories
@@ -92,7 +96,7 @@ seq.dt <- data.table(seq.data[,
9296
samples.to.keep))])
9397
sample.df <- sample.df[which(sample.df$sample %in% samples.to.keep), ]
9498

95-
smaller_subtype_size <- min(table(droplevels(sample.df$category)))
99+
smaller_subtype_size <- min(table(as.character(sample.df$category)))
96100

97101
# different sizes of n to test
98102
no.samples <- c(3, 4, 5, 6, 8, 10, 15, 25, 50)
@@ -112,6 +116,9 @@ doParallel::registerDoParallel(cl)
112116
# at each titration level (0-100% RNA-seq)
113117
stats.df.list[1:9] <- foreach(seq_prop = seq(0.1, .9, 0.1), .packages = c("tidyverse")) %dopar% {
114118

119+
# random_seeds indexed by 1 through 9, corresponding to seq_prop 0.1 through 0.9
120+
set.seed(random_seeds[seq_prop*10])
121+
115122
# we're going to repeat the small n experiment 10 times
116123
stats.df.iter_list <- list() # this is returned to stats.df.list each iteration
117124
for (trial.iter in 1:10) {

3-combine_category_kappa.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,8 @@ summary.df <- test.df %>%
157157
dplyr::group_by(Classifier, Normalization, Platform, Perc.Seq) %>%
158158
dplyr::summarise(Median = median(Kappa),
159159
Mean = mean(Kappa),
160-
SD = sd(Kappa)) %>%
161-
dplyr::ungroup()
160+
SD = sd(Kappa),
161+
.groups = "drop")
162162

163163
readr::write_tsv(summary.df,
164164
summary.df.filename) # delta or not delta in file name

6-save_recon_error_kappa_data.R

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,8 @@ kappa.summary.df <-
100100
dplyr::group_by(Classifier, Normalization, Platform, Perc.seq) %>%
101101
dplyr::summarise(Median = median(Kappa),
102102
Mean = mean(Kappa),
103-
SD = sd(Kappa)) %>%
104-
dplyr::ungroup()
103+
SD = sd(Kappa),
104+
.groups = "drop")
105105
readr::write_tsv(kappa.summary.df,
106106
file.path(rcn.res.dir,
107107
paste0(file_identifier,
@@ -138,10 +138,9 @@ error.master.df$comp.method <- as.factor(error.master.df$comp.method)
138138

139139
# take the average of each genes error across replicates
140140
error.mean.df <- error.master.df %>%
141-
dplyr::group_by(gene, perc.seq, norm.method, comp.method,
142-
platform) %>%
143-
dplyr::summarise(mean_mase = mean(MASE)) %>%
144-
dplyr::ungroup()
141+
dplyr::group_by(gene, perc.seq, norm.method, comp.method, platform) %>%
142+
dplyr::summarise(mean_mase = mean(MASE),
143+
.groups = "drop")
145144
rm(error.master.df)
146145
colnames(error.mean.df) <- c("Gene", "Perc.seq", "Normalization",
147146
"Method", "Platform", "Mean_Value")

7-extract_plier_pathways.R

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -375,9 +375,8 @@ if (length(jaccard_list) > 0) {
375375
"seed_index" = "L1"
376376
)
377377

378-
readr::write_tsv(
379-
x = jaccard_df,
380-
path = plot_data_filename
378+
readr::write_tsv(jaccard_df,
379+
plot_data_filename
381380
)
382381

383382
}

check_sums.tsv

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,23 @@
1-
1a89ea769381e300e5a88ec61713ad9e data/BRCAarray.pcl
2-
8c76a476c5b6f4f8deec017c876db156 data/BRCAClin.tsv
3-
7f00ea6ef1f309773b02e6118046550f data/BRCARNASeq.pcl
4-
d4486dde14da14b4f8887a7415e2866f data/BRCARNASeqClin.tsv
5-
7fafc537807d5b3ddf0bb89665279a9d data/broad.mit.edu_PANCAN_Genome_Wide_SNP_6_whitelisted.seg
6-
639ad8f8386e98dacc22e439188aa8fa data/mc3.v0.2.8.PUBLIC.maf.gz
7-
a4591b2dcee39591f59e5e25a6ce75fa data/TCGA-CDR-SupplementalTableS1.xlsx
8-
02e72c33071307ff6570621480d3c90b data/EBPlusPlusAdjustPANCAN_IlluminaHiSeq_RNASeqV2.geneExp.tsv
9-
76a0454f911aeb17276725abb760ce89 data/GSE83130/GSE83130/GSE83130.tsv
10-
1d8834a51282396e07e3ce9a5417d024 data/gbm_clinical_table_S7.xlsx
11-
e5df57691b44c47b8c916116b5ac7acf data/PanCan-General_Open_GDC-Manifest_2.txt
1+
2f4f2fcd97eff5385c0b1205b719b8dc data/BRCAClin.tsv
2+
7f00ea6ef1f309773b02e6118046550f data/BRCARNASeq.pcl
3+
d4486dde14da14b4f8887a7415e2866f data/BRCARNASeqClin.tsv
4+
1a89ea769381e300e5a88ec61713ad9e data/BRCAarray.pcl
5+
02e72c33071307ff6570621480d3c90b data/EBPlusPlusAdjustPANCAN_IlluminaHiSeq_RNASeqV2.geneExp.tsv
6+
90feb5edb48d7619568d2de21697509e data/GBMClin.tsv
7+
949938abadd336eb9a2f698b3102e1bb data/GBMRNASeq.pcl
8+
3bbdad11c322ebf3b03ada07263c6444 data/GBMarray.pcl
9+
e5df57691b44c47b8c916116b5ac7acf data/PanCan-General_Open_GDC-Manifest_2.txt
10+
a4591b2dcee39591f59e5e25a6ce75fa data/TCGA-CDR-SupplementalTableS1.xlsx
11+
7fafc537807d5b3ddf0bb89665279a9d data/broad.mit.edu_PANCAN_Genome_Wide_SNP_6_whitelisted.seg
12+
d79d2399598ac2e6c11a1b44c5c603df data/combined_clinical_data.BRCA.tsv
13+
90f745d4eac485168cb2b50be29104b1 data/combined_clinical_data.GBM.tsv
14+
1d8834a51282396e07e3ce9a5417d024 data/gbm_clinical_table_S7.xlsx
15+
639ad8f8386e98dacc22e439188aa8fa data/mc3.v0.2.8.PUBLIC.maf.gz
16+
7583a5fb4d23d50b79813b26469f6385 data/mutations.BRCA.tsv
17+
15cae05325c1b0562be8029efba5534a data/mutations.GBM.tsv
18+
5484229fa691a721dd7fd08ade2233e7 data/mutations.maf
19+
e56585bd0c2e59658b1d54fc8b0c9df2 data/mutations.tsv
20+
b62634d9eccbb548499ce384605fe47a data/GSE83130/LICENSE.TXT
21+
9ed2fa92d31d51f17fc048b98158a5e1 data/GSE83130/README.md
22+
76a0454f911aeb17276725abb760ce89 data/GSE83130/GSE83130/GSE83130.tsv
23+
dca310d9643a18d35e694425c56b9d2b data/GSE83130/GSE83130/metadata_GSE83130.tsv

combine_clinical_data.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,4 @@ combined_df <- clinical_df %>%
6060
################################################################################
6161

6262
write_tsv(combined_df,
63-
path = combined_output_filepath)
63+
combined_output_filepath)

download_TCGA_data.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,6 @@ else
7272
wget -O $data/gbm_clinical_table_S7.xlsx $gbm_clinical_link
7373
fi
7474

75-
# check md5 sums of downloaded files
76-
echo Checking md5 sums of downloaded files ...
77-
md5sum --check check_sums.tsv
78-
echo All files downloaded match expected md5 sums!
79-
8075
# modify BRCA clinical file column PAM50 to be subtype
8176
sed -i 's/PAM50/subtype/' $data/BRCAClin.tsv
8277

@@ -110,6 +105,11 @@ Rscript combine_clinical_data.R \
110105
--mutation_input $data/mutations.GBM.tsv \
111106
--combined_output $data/combined_clinical_data.GBM.tsv
112107

108+
# check md5 sums of downloaded files
109+
echo Checking md5 sums of downloaded files ...
110+
md5sum --check --quiet check_sums.tsv
111+
echo All data files match expected md5 sums!
112+
113113
# get BRCA array expression data from TCGA Legacy Archive
114114
# data/gdc_legacy_archive_brca_manifest.txt obtained from https://portal.gdc.cancer.gov/legacy-archive
115115
# with search parameters

plots/scripts/2A-plot_small_n_differential_expression.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ plot_small_n <- function(subtypes){
133133
y = ifelse(using_single_measure,
134134
unique(stats_df$measure),
135135
"Measure of Similarity"),
136-
title = paste(cancer_type, subtypes_nice, "FDR < 10%")) +
136+
title = str_c("Small n Experiment: ", paste(cancer_type, subtypes_nice, "FDR < 10%"))) +
137137
scale_colour_manual(values = cbPalette[c(2, 3)])
138138

139139
if (using_single_measure) {

plots/scripts/3-plot_category_kappa.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,5 +98,5 @@ plot_obj <- ggplot(plot_df,
9898

9999
ggsave(output_filename,
100100
plot = plot_obj,
101-
height = 5,
102-
width = 7.5)
101+
height = 4,
102+
width = 7.25)

plots/scripts/6-plot_recon_error.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,5 +87,5 @@ plot_obj <- ggplot(plot_df,
8787

8888
ggsave(output_filename,
8989
plot = plot_obj,
90-
height = 5,
91-
width = 7.5)
90+
height = 4,
91+
width = 7.25)

0 commit comments

Comments
 (0)