@@ -9,8 +9,10 @@ workflow AggregatePRSResults {
99 File population_pc_projections
1010 String population_name = "Reference Population"
1111 File expected_control_results
12+ File allowed_condition_groups
1213 String lab_batch
1314 Int group_n
15+ Float control_sample_diff_threshold
1416 }
1517
1618 call AggregateResults {
@@ -43,7 +45,9 @@ workflow AggregatePRSResults {
4345 batch_pcs = AggregateResults .batch_pcs ,
4446 population_pc_projections = population_pc_projections ,
4547 population_name = population_name ,
46- high_risk_thresholds = high_risk_thresholds
48+ high_risk_thresholds = high_risk_thresholds ,
49+ allowed_condition_groups = allowed_condition_groups ,
50+ control_sample_diff_threshold = control_sample_diff_threshold
4751 }
4852
4953 output {
@@ -204,9 +208,12 @@ task BuildHTMLReport {
204208 File high_risk_thresholds
205209 File batch_pcs
206210 File population_pc_projections
211+ File allowed_condition_groups
207212 String population_name
208213 String lab_batch
209214 Int group_n
215+
216+ Float control_sample_diff_threshold
210217 }
211218
212219 String output_prefix = lab_batch + if group_n > 1 then "_group_" + group_n else ""
@@ -242,6 +249,13 @@ task BuildHTMLReport {
242249 batch_summary <- read_tsv("~{batch_summarised_results }")
243250 batch_summary <- batch_summary %>% rename_with(.cols = -condition, ~ str_to_title(gsub("_"," ", .x)))
244251 condition_thresholds <- read_tsv("~{high_risk_thresholds }")
252+ allowed_condition_groups <- read_tsv("~{allowed_condition_groups }") %>% group_by(group) %>% summarise(conditions = paste0(sort(condition), collapse=","))
253+
254+ observed_condition_groups <- batch_pivoted_results %>% filter(risk == "HIGH" | risk == "NOT_HIGH") %>% group_by(sample_id) %>%
255+ summarise(conditions = paste0(sort(condition), collapse=",")) %>% left_join(allowed_condition_groups) %>% mutate(group=replace_na(group, "not allowed")) %>%
256+ group_by(conditions) %>% summarise(group=group[[1]], n=n(), samples = ifelse(group=="not allowed",
257+ paste0(sample_id, collapse=", "),""))
258+
245259 get_probs_n_high_per_sample_distribution <- function(thresholds_list) {
246260 probs_n_high <- tibble(n_high = seq(0,length(thresholds_list)), prob=c(1,rep(0,length(thresholds_list - 1))))
247261 for (threshold in thresholds_list) {
@@ -267,6 +281,8 @@ task BuildHTMLReport {
267281 summarise(\`high risk conditions\` = paste(condition, collapse = ","), n=n()) %>%
268282 filter(n>1) %>% inner_join(threshold_set_per_sample) %>% group_by(sample_id, \`high risk conditions\`, n, thresholds) %>% filter(n_high >= n) %>%
269283 summarise(significance=paste0(signif(qnorm(1-sum(prob)),2), "\\U03C3")) %>% select(-n,-thresholds)
284+
285+ samples_high_risk <- batch_pivoted_results %>% filter(risk == "HIGH") %>% pull(sample_id) %>% unique()
270286 \`\`\`
271287
272288 \`\`\`{css, echo=FALSE}
@@ -284,7 +300,7 @@ task BuildHTMLReport {
284300 ## Control Sample
285301 \`\`\`{r control, echo = FALSE, results = "asis", warning = FALSE}
286302 control_and_expected <- bind_rows(list(batch_control_results, expected_control_results)) %>% select(ends_with('_adjusted'))
287- delta_frame_colored <- (control_and_expected[-1,] - control_and_expected[-nrow(control_and_expected),]) %>% mutate(across(everything(), ~ round(.x, digits=2))) %>% mutate(across(everything(), ~ kableExtra::cell_spec(.x, color=ifelse(is.na(.x) || abs(.x) > 0.12 , "red", "green"))))
303+ delta_frame_colored <- (control_and_expected[-1,] - control_and_expected[-nrow(control_and_expected),]) %>% mutate(across(everything(), ~ round(.x, digits=2))) %>% mutate(across(everything(), ~ kableExtra::cell_spec(.x, color=ifelse(is.na(.x) || abs(.x) > ~{ control_sample_diff_threshold } , "red", "green"))))
288304 control_and_expected_char <- control_and_expected %>% mutate(across(everything(), ~ format(round(.x, digits=2), nsmall=2)))
289305 control_table <- bind_rows(list(control_and_expected_char, delta_frame_colored)) %>% select(order(colnames(.)))
290306 kable(control_table %>% add_column(sample=c('batch_control', 'expected_control', 'delta'), .before=1), escape = FALSE, digits = 2, format = "pandoc")
@@ -295,6 +311,12 @@ task BuildHTMLReport {
295311 kable(batch_summary, digits = 2, escape = FALSE, format = "pandoc")
296312 \`\`\`
297313
314+ # Conditions Scored per Sample
315+ \`\`\`{r conditions scored per sample, echo = FALSE, results = "asis", warning = FALSE}
316+ observed_condition_groups <- observed_condition_groups %>% mutate(across(everything(), ~kableExtra::cell_spec(.x, color=ifelse(group=="not allowed", "red", "black"))))
317+ kable(observed_condition_groups, escape = FALSE, format = "pandoc")
318+ \`\`\`
319+
298320 ## Samples High Risk for Multiple Conditions
299321 \`r if (multi_high_samples %>% nrow() == 0) {"No Samples were high risk for multiple conditions."} else {"The following samples were high risk for multiple conditions. Significance represents the likelihood that a sample scored for the same conditions as this sample would be high for at least as many conditions, assuming all conditions are uncorrelated."}\`
300322 \`\`\`{r multi high samples table, echo = FALSE, results = "asis", warning = FALSE }
@@ -325,11 +347,13 @@ task BuildHTMLReport {
325347 #### Hover for sample ID
326348 \`\`\`{r pca plot, echo=FALSE, message=FALSE, warning=FALSE, results="asis", fig.align='center'}
327349 target_pcs <- read_tsv("~{batch_pcs }")
350+ target_pcs <- target_pcs %>% mutate(text = paste0("Sample ID: ", sample_id), color=factor(ifelse(sample_id %in% samples_high_risk, "~{lab_batch } High Risk", "~{lab_batch } Not High Risk"), levels=c("~{lab_batch } Not High Risk", "~{lab_batch } High Risk")))
328351 population_pcs <- read_tsv("~{population_pc_projections }")
329352
330353 p <- ggplot(population_pcs, aes(x=PC1, y=PC2, color="~{population_name }")) +
331354 geom_point() +
332- geom_point(data=target_pcs, aes(color="~{lab_batch }", text=paste0("Sample ID: ", sample_id))) +
355+ geom_point(data=target_pcs, aes( color = color, text=text)) +
356+ scale_color_manual(values=c("~{population_name }"="grey", "~{lab_batch } Not High Risk"="#619CFF", "~{lab_batch } High Risk"="#F8766D")) +
333357 theme_bw()
334358 ggplotly(p, tooltip="text")
335359 \`\`\`
0 commit comments