Skip to content

Commit 96bed81

Browse files
authored
AggregatePRSResults updates (#122)
* checks for allowed condition combos * differentiates samples high risk in any condition in pca plot * control sample shift threshold is dynamic
1 parent 82c0466 commit 96bed81

File tree

4 files changed

+44
-5
lines changed

4 files changed

+44
-5
lines changed

ImputationPipeline/AggregatePRSResults.wdl

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@ workflow AggregatePRSResults {
99
File population_pc_projections
1010
String population_name = "Reference Population"
1111
File expected_control_results
12+
File allowed_condition_groups
1213
String lab_batch
1314
Int group_n
15+
Float control_sample_diff_threshold
1416
}
1517

1618
call AggregateResults {
@@ -43,7 +45,9 @@ workflow AggregatePRSResults {
4345
batch_pcs = AggregateResults.batch_pcs,
4446
population_pc_projections = population_pc_projections,
4547
population_name = population_name,
46-
high_risk_thresholds = high_risk_thresholds
48+
high_risk_thresholds = high_risk_thresholds,
49+
allowed_condition_groups = allowed_condition_groups,
50+
control_sample_diff_threshold = control_sample_diff_threshold
4751
}
4852

4953
output {
@@ -204,9 +208,12 @@ task BuildHTMLReport {
204208
File high_risk_thresholds
205209
File batch_pcs
206210
File population_pc_projections
211+
File allowed_condition_groups
207212
String population_name
208213
String lab_batch
209214
Int group_n
215+
216+
Float control_sample_diff_threshold
210217
}
211218
212219
String output_prefix = lab_batch + if group_n > 1 then "_group_" + group_n else ""
@@ -242,6 +249,13 @@ task BuildHTMLReport {
242249
batch_summary <- read_tsv("~{batch_summarised_results}")
243250
batch_summary <- batch_summary %>% rename_with(.cols = -condition, ~ str_to_title(gsub("_"," ", .x)))
244251
condition_thresholds <- read_tsv("~{high_risk_thresholds}")
252+
allowed_condition_groups <- read_tsv("~{allowed_condition_groups}") %>% group_by(group) %>% summarise(conditions = paste0(sort(condition), collapse=","))
253+
254+
observed_condition_groups <- batch_pivoted_results %>% filter(risk == "HIGH" | risk == "NOT_HIGH") %>% group_by(sample_id) %>%
255+
summarise(conditions = paste0(sort(condition), collapse=",")) %>% left_join(allowed_condition_groups) %>% mutate(group=replace_na(group, "not allowed")) %>%
256+
group_by(conditions) %>% summarise(group=group[[1]], n=n(), samples = ifelse(group=="not allowed",
257+
paste0(sample_id, collapse=", "),""))
258+
245259
get_probs_n_high_per_sample_distribution <- function(thresholds_list) {
246260
probs_n_high <- tibble(n_high = seq(0,length(thresholds_list)), prob=c(1,rep(0,length(thresholds_list - 1))))
247261
for (threshold in thresholds_list) {
@@ -267,6 +281,8 @@ task BuildHTMLReport {
267281
summarise(\`high risk conditions\` = paste(condition, collapse = ","), n=n()) %>%
268282
filter(n>1) %>% inner_join(threshold_set_per_sample) %>% group_by(sample_id, \`high risk conditions\`, n, thresholds) %>% filter(n_high >= n) %>%
269283
summarise(significance=paste0(signif(qnorm(1-sum(prob)),2), "\\U03C3")) %>% select(-n,-thresholds)
284+
285+
samples_high_risk <- batch_pivoted_results %>% filter(risk == "HIGH") %>% pull(sample_id) %>% unique()
270286
\`\`\`
271287
272288
\`\`\`{css, echo=FALSE}
@@ -284,7 +300,7 @@ task BuildHTMLReport {
284300
## Control Sample
285301
\`\`\`{r control, echo = FALSE, results = "asis", warning = FALSE}
286302
control_and_expected <- bind_rows(list(batch_control_results, expected_control_results)) %>% select(ends_with('_adjusted'))
287-
delta_frame_colored <- (control_and_expected[-1,] - control_and_expected[-nrow(control_and_expected),]) %>% mutate(across(everything(), ~ round(.x, digits=2))) %>% mutate(across(everything(), ~ kableExtra::cell_spec(.x, color=ifelse(is.na(.x) || abs(.x) > 0.12, "red", "green"))))
303+
delta_frame_colored <- (control_and_expected[-1,] - control_and_expected[-nrow(control_and_expected),]) %>% mutate(across(everything(), ~ round(.x, digits=2))) %>% mutate(across(everything(), ~ kableExtra::cell_spec(.x, color=ifelse(is.na(.x) || abs(.x) > ~{control_sample_diff_threshold}, "red", "green"))))
288304
control_and_expected_char <- control_and_expected %>% mutate(across(everything(), ~ format(round(.x, digits=2), nsmall=2)))
289305
control_table <- bind_rows(list(control_and_expected_char, delta_frame_colored)) %>% select(order(colnames(.)))
290306
kable(control_table %>% add_column(sample=c('batch_control', 'expected_control', 'delta'), .before=1), escape = FALSE, digits = 2, format = "pandoc")
@@ -295,6 +311,12 @@ task BuildHTMLReport {
295311
kable(batch_summary, digits = 2, escape = FALSE, format = "pandoc")
296312
\`\`\`
297313
314+
# Conditions Scored per Sample
315+
\`\`\`{r conditions scored per sample, echo = FALSE, results = "asis", warning = FALSE}
316+
observed_condition_groups <- observed_condition_groups %>% mutate(across(everything(), ~kableExtra::cell_spec(.x, color=ifelse(group=="not allowed", "red", "black"))))
317+
kable(observed_condition_groups, escape = FALSE, format = "pandoc")
318+
\`\`\`
319+
298320
## Samples High Risk for Multiple Conditions
299321
\`r if (multi_high_samples %>% nrow() == 0) {"No Samples were high risk for multiple conditions."} else {"The following samples were high risk for multiple conditions. Significance represents the likelihood that a sample scored for the same conditions as this sample would be high for at least as many conditions, assuming all conditions are uncorrelated."}\`
300322
\`\`\`{r multi high samples table, echo = FALSE, results = "asis", warning = FALSE }
@@ -325,11 +347,13 @@ task BuildHTMLReport {
325347
#### Hover for sample ID
326348
\`\`\`{r pca plot, echo=FALSE, message=FALSE, warning=FALSE, results="asis", fig.align='center'}
327349
target_pcs <- read_tsv("~{batch_pcs}")
350+
target_pcs <- target_pcs %>% mutate(text = paste0("Sample ID: ", sample_id), color=factor(ifelse(sample_id %in% samples_high_risk, "~{lab_batch} High Risk", "~{lab_batch} Not High Risk"), levels=c("~{lab_batch} Not High Risk", "~{lab_batch} High Risk")))
328351
population_pcs <- read_tsv("~{population_pc_projections}")
329352
330353
p <- ggplot(population_pcs, aes(x=PC1, y=PC2, color="~{population_name}")) +
331354
geom_point() +
332-
geom_point(data=target_pcs, aes(color="~{lab_batch}", text=paste0("Sample ID: ", sample_id))) +
355+
geom_point(data=target_pcs, aes( color = color, text=text)) +
356+
scale_color_manual(values=c("~{population_name}"="grey", "~{lab_batch} Not High Risk"="#619CFF", "~{lab_batch} High Risk"="#F8766D")) +
333357
theme_bw()
334358
ggplotly(p, tooltip="text")
335359
\`\`\`
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
group condition
2+
group_1 condition_1
3+
group_1 condition_2
4+
group_1 condition_3
5+
group_2 condition_1
6+
group_2 condition_2
7+
group_2 condition_3
8+
group_2 condition_4
9+
group_3 condition_4

test/AggregatePRSResults/test_AggregatePRSResults.wdl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ workflow test_AggregatePRSResults {
1818
File expected_batch_control_results
1919
File expected_batch_summarised_results
2020
File expected_batch_pcs
21+
File allowed_condition_groups
22+
Float control_sample_diff_threshold
2123
}
2224

2325
call AggregatePRSResults.AggregatePRSResults {
@@ -30,7 +32,9 @@ workflow test_AggregatePRSResults {
3032
population_name = population_name,
3133
expected_control_results = expected_control_results,
3234
lab_batch = lab_batch,
33-
group_n = group_n
35+
group_n = group_n,
36+
allowed_condition_groups = allowed_condition_groups,
37+
control_sample_diff_threshold = control_sample_diff_threshold
3438
}
3539

3640
call CompareTextFiles {

test/AggregatePRSResults/test_AggregatePRSResults_json/test_plumbing_inputs.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,7 @@
1717
"test_AggregatePRSResults.expected_batch_all_results":"/home/circleci/project/test/AggregatePRSResults/plumbing_data/expected_batch_all_results.tsv",
1818
"test_AggregatePRSResults.expected_batch_control_results":"/home/circleci/project/test/AggregatePRSResults/plumbing_data/expected_batch_control_results.tsv",
1919
"test_AggregatePRSResults.expected_batch_summarised_results": "/home/circleci/project/test/AggregatePRSResults/plumbing_data/expected_batch_summarised_results.tsv",
20-
"test_AggregatePRSResults.expected_batch_pcs":"/home/circleci/project/test/AggregatePRSResults/plumbing_data/expected_batch_pcs.tsv"
20+
"test_AggregatePRSResults.expected_batch_pcs":"/home/circleci/project/test/AggregatePRSResults/plumbing_data/expected_batch_pcs.tsv",
21+
"test_AggregatePRSResults.allowed_condition_groups":"/home/circleci/project/test/AggregatePRSResults/plumbing_data/condition_groups.tsv",
22+
"test_AggregatePRSResults.control_sample_diff_threshold" : 0.12
2123
}

0 commit comments

Comments
 (0)