@@ -9,8 +9,10 @@ workflow AggregatePRSResults {
9
9
File population_pc_projections
10
10
String population_name = "Reference Population"
11
11
File expected_control_results
12
+ File allowed_condition_groups
12
13
String lab_batch
13
14
Int group_n
15
+ Float control_sample_diff_threshold
14
16
}
15
17
16
18
call AggregateResults {
@@ -43,7 +45,9 @@ workflow AggregatePRSResults {
43
45
batch_pcs = AggregateResults .batch_pcs ,
44
46
population_pc_projections = population_pc_projections ,
45
47
population_name = population_name ,
46
- high_risk_thresholds = high_risk_thresholds
48
+ high_risk_thresholds = high_risk_thresholds ,
49
+ allowed_condition_groups = allowed_condition_groups ,
50
+ control_sample_diff_threshold = control_sample_diff_threshold
47
51
}
48
52
49
53
output {
@@ -204,9 +208,12 @@ task BuildHTMLReport {
204
208
File high_risk_thresholds
205
209
File batch_pcs
206
210
File population_pc_projections
211
+ File allowed_condition_groups
207
212
String population_name
208
213
String lab_batch
209
214
Int group_n
215
+
216
+ Float control_sample_diff_threshold
210
217
}
211
218
212
219
String output_prefix = lab_batch + if group_n > 1 then "_group_" + group_n else ""
@@ -242,6 +249,13 @@ task BuildHTMLReport {
242
249
batch_summary <- read_tsv("~{batch_summarised_results}")
243
250
batch_summary <- batch_summary %>% rename_with(.cols = -condition, ~ str_to_title(gsub("_"," ", .x)))
244
251
condition_thresholds <- read_tsv("~{high_risk_thresholds}")
252
+ allowed_condition_groups <- read_tsv("~{allowed_condition_groups}") %>% group_by(group) %>% summarise(conditions = paste0(sort(condition), collapse=","))
253
+
254
+ observed_condition_groups <- batch_pivoted_results %>% filter(risk == "HIGH" | risk == "NOT_HIGH") %>% group_by(sample_id) %>%
255
+ summarise(conditions = paste0(sort(condition), collapse=",")) %>% left_join(allowed_condition_groups) %>% mutate(group=replace_na(group, "not allowed")) %>%
256
+ group_by(conditions) %>% summarise(group=group[[1]], n=n(), samples = ifelse(group=="not allowed",
257
+ paste0(sample_id, collapse=", "),""))
258
+
245
259
get_probs_n_high_per_sample_distribution <- function(thresholds_list) {
246
260
probs_n_high <- tibble(n_high = seq(0,length(thresholds_list)), prob=c(1,rep(0,length(thresholds_list - 1))))
247
261
for (threshold in thresholds_list) {
@@ -267,6 +281,8 @@ task BuildHTMLReport {
267
281
summarise(\`high risk conditions\` = paste(condition, collapse = ","), n=n()) %>%
268
282
filter(n>1) %>% inner_join(threshold_set_per_sample) %>% group_by(sample_id, \`high risk conditions\`, n, thresholds) %>% filter(n_high >= n) %>%
269
283
summarise(significance=paste0(signif(qnorm(1-sum(prob)),2), "\\U03C3")) %>% select(-n,-thresholds)
284
+
285
+ samples_high_risk <- batch_pivoted_results %>% filter(risk == "HIGH") %>% pull(sample_id) %>% unique()
270
286
\`\`\`
271
287
272
288
\`\`\`{css, echo=FALSE}
@@ -284,7 +300,7 @@ task BuildHTMLReport {
284
300
## Control Sample
285
301
\`\`\`{r control, echo = FALSE, results = "asis", warning = FALSE}
286
302
control_and_expected <- bind_rows(list(batch_control_results, expected_control_results)) %>% select(ends_with('_adjusted'))
287
- delta_frame_colored <- (control_and_expected[-1,] - control_and_expected[-nrow(control_and_expected),]) %>% mutate(across(everything(), ~ round(.x, digits=2))) %>% mutate(across(everything(), ~ kableExtra::cell_spec(.x, color=ifelse(is.na(.x) || abs(.x) > 0.12 , "red", "green"))))
303
+ delta_frame_colored <- (control_and_expected[-1,] - control_and_expected[-nrow(control_and_expected),]) %>% mutate(across(everything(), ~ round(.x, digits=2))) %>% mutate(across(everything(), ~ kableExtra::cell_spec(.x, color=ifelse(is.na(.x) || abs(.x) > ~{control_sample_diff_threshold} , "red", "green"))))
288
304
control_and_expected_char <- control_and_expected %>% mutate(across(everything(), ~ format(round(.x, digits=2), nsmall=2)))
289
305
control_table <- bind_rows(list(control_and_expected_char, delta_frame_colored)) %>% select(order(colnames(.)))
290
306
kable(control_table %>% add_column(sample=c('batch_control', 'expected_control', 'delta'), .before=1), escape = FALSE, digits = 2, format = "pandoc")
@@ -295,6 +311,12 @@ task BuildHTMLReport {
295
311
kable(batch_summary, digits = 2, escape = FALSE, format = "pandoc")
296
312
\`\`\`
297
313
314
+ # Conditions Scored per Sample
315
+ \`\`\`{r conditions scored per sample, echo = FALSE, results = "asis", warning = FALSE}
316
+ observed_condition_groups <- observed_condition_groups %>% mutate(across(everything(), ~kableExtra::cell_spec(.x, color=ifelse(group=="not allowed", "red", "black"))))
317
+ kable(observed_condition_groups, escape = FALSE, format = "pandoc")
318
+ \`\`\`
319
+
298
320
## Samples High Risk for Multiple Conditions
299
321
\`r if (multi_high_samples %>% nrow() == 0) {"No Samples were high risk for multiple conditions."} else {"The following samples were high risk for multiple conditions. Significance represents the likelihood that a sample scored for the same conditions as this sample would be high for at least as many conditions, assuming all conditions are uncorrelated."}\`
300
322
\`\`\`{r multi high samples table, echo = FALSE, results = "asis", warning = FALSE }
@@ -325,11 +347,13 @@ task BuildHTMLReport {
325
347
#### Hover for sample ID
326
348
\`\`\`{r pca plot, echo=FALSE, message=FALSE, warning=FALSE, results="asis", fig.align='center'}
327
349
target_pcs <- read_tsv("~{batch_pcs}")
350
+ target_pcs <- target_pcs %>% mutate(text = paste0("Sample ID: ", sample_id), color=factor(ifelse(sample_id %in% samples_high_risk, "~{lab_batch} High Risk", "~{lab_batch} Not High Risk"), levels=c("~{lab_batch} Not High Risk", "~{lab_batch} High Risk")))
328
351
population_pcs <- read_tsv("~{population_pc_projections}")
329
352
330
353
p <- ggplot(population_pcs, aes(x=PC1, y=PC2, color="~{population_name}")) +
331
354
geom_point() +
332
- geom_point(data=target_pcs, aes(color="~{lab_batch}", text=paste0("Sample ID: ", sample_id))) +
355
+ geom_point(data=target_pcs, aes( color = color, text=text)) +
356
+ scale_color_manual(values=c("~{population_name}"="grey", "~{lab_batch} Not High Risk"="#619CFF", "~{lab_batch} High Risk"="#F8766D")) +
333
357
theme_bw()
334
358
ggplotly(p, tooltip="text")
335
359
\`\`\`
0 commit comments