Skip to content

Commit 8a3d78f

Browse files
authored
PRS check z-score before APOL1 correction (#131)
1 parent da3e318 commit 8a3d78f

File tree

2 files changed

+52
-20
lines changed

2 files changed

+52
-20
lines changed

ImputationPipeline/AggregatePRSResults.wdl

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -108,12 +108,9 @@ task AggregateResults {
108108
write_tsv(results %>% filter(is_control_sample), "~{output_prefix}_control_results.tsv")
109109
110110
results_pivoted <- results %>% select(-starts_with("pc")) %>% filter(!is_control_sample) %>% pivot_longer(!c(sample_id, lab_batch, is_control_sample), names_to=c("condition",".value"), names_pattern="(.+)(?<!not)(?<!reason)_(.+)$")
111-
results_pivoted <- results_pivoted %T>% {options(warn=-1)} %>% mutate(adjusted = as.numeric(adjusted),
112-
raw = as.numeric(raw),
113-
percentile = as.numeric(percentile)) %T>% {options(warn=0)}
114111
115112
results_summarised <- results_pivoted %>% group_by(condition) %>%
116-
summarise(across(c(adjusted,percentile), ~mean(.x, na.rm=TRUE), .names = "mean_{.col}"),
113+
summarise(across(c(adjusted,percentile), ~mean(as.numeric(.x), na.rm=TRUE), .names = "mean_{.col}"),
117114
num_samples=n(),
118115
num_scored = sum(!is.na(risk)),
119116
num_high = sum(risk=="HIGH", na.rm=TRUE),
@@ -122,7 +119,7 @@ task AggregateResults {
122119
123120
write_tsv(results_summarised, "~{output_prefix}_summarised_results.tsv")
124121
125-
ggplot(results_pivoted, aes(x=adjusted)) +
122+
ggplot(results_pivoted, aes(x=as.numeric(adjusted))) +
126123
geom_density(aes(color=condition), fill=NA, position = "identity") +
127124
xlim(-5,5) + theme_bw() + xlab("z-score") + geom_function(fun=dnorm) +
128125
ylab("density")
@@ -333,10 +330,10 @@ task BuildHTMLReport {
333330
p_dist <- ggplot()
334331
if (n_density > 0) {
335332
p_dist <- p_dist + stat_density(data = batch_pivoted_results %>% filter(condition %in% conditions_with_more_than_4_samples),
336-
aes(color=condition, text=condition, x = adjusted), geom="line", position = "identity")
333+
aes(color=condition, text=condition, x = as.numeric(adjusted)), geom="line", position = "identity")
337334
}
338335
if (n_point > 0) {
339-
p_dist <- p_dist + geom_point(data = batch_pivoted_results %>% filter(!(condition %in% conditions_with_more_than_4_samples)), aes(color=condition, x = adjusted, text=condition), y=0)
336+
p_dist <- p_dist + geom_point(data = batch_pivoted_results %>% filter(!(condition %in% conditions_with_more_than_4_samples)), aes(color=condition, x = as.numeric(adjusted), text=condition), y=0)
340337
}
341338
p_dist <- p_dist + xlim(-5,5) + theme_bw() + geom_line(data=normal_dist, aes(x=x, y=y), color="black") + xlab("z-score") + ylab("density")
342339

ImputationPipeline/PRSWrapper.wdl

Lines changed: 48 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
version 1.0
22
import "ScoringPart.wdl" as Score
3-
import "CKDRiskAdjustment.wdl" as CKDRiskAdjustment
3+
import "CKDRiskAdjustment.wdl" as CKDRiskAdjustmentWF
44
import "Structs.wdl"
55

66
workflow PRSWrapper {
@@ -37,22 +37,28 @@ workflow PRSWrapper {
3737
}
3838

3939
if (condition_resource.named_weight_set.condition_name == "ckd") {
40-
call CKDRiskAdjustment.CKDRiskAdjustment {
40+
call CKDRiskAdjustmentWF.CKDRiskAdjustment {
4141
input:
4242
adjustedScores = select_first([ScoringImputedDataset.adjusted_array_scores]),
4343
vcf = vcf,
4444
risk_alleles = ckd_risk_alleles
4545
}
4646
}
4747

48+
call CheckZScoreAgainstReportableRange {
49+
input:
50+
score_result = select_first([ScoringImputedDataset.adjusted_array_scores]),
51+
z_score_reportable_range = z_score_reportable_range
52+
}
4853

4954
call SelectValuesOfInterest {
5055
input:
5156
score_result = select_first([CKDRiskAdjustment.adjusted_scores_with_apol1, ScoringImputedDataset.adjusted_array_scores]),
5257
sample_id = sample_id,
5358
condition_name = condition_resource.named_weight_set.condition_name,
5459
threshold = condition_resource.percentile_threshold,
55-
z_score_reportable_range = z_score_reportable_range
60+
z_score_reportable_range = z_score_reportable_range,
61+
out_of_reportable_range = CheckZScoreAgainstReportableRange.out_of_reportable_range
5662
}
5763
}
5864

@@ -94,12 +100,44 @@ workflow PRSWrapper {
94100
}
95101

96102

103+
task CheckZScoreAgainstReportableRange {
104+
input {
105+
File score_result
106+
Float z_score_reportable_range
107+
}
108+
109+
command <<<
110+
Rscript - <<- "EOF"
111+
library(dplyr)
112+
library(readr)
113+
score <- read_tsv("~{score_result}")
114+
if (nrow(score) != 1) {
115+
quit(status=1)
116+
}
117+
118+
adjusted_score <- (score %>% pull(adjusted_score))[[1]]
119+
write(abs(adjusted_score) > ~{z_score_reportable_range}, "out_of_reportable_range.bool")
120+
EOF
121+
>>>
122+
123+
runtime {
124+
docker: "rocker/tidyverse@sha256:aaace6c41a258e13da76881f0b282932377680618fcd5d121583f9455305e727"
125+
disks: "local-disk 100 HDD"
126+
memory: "4 GB"
127+
}
128+
129+
output {
130+
Boolean out_of_reportable_range = read_boolean("out_of_reportable_range.bool")
131+
}
132+
}
133+
97134
task SelectValuesOfInterest {
98135
input {
99136
File score_result
100137
String sample_id
101138
String condition_name
102139
Float threshold
140+
Boolean out_of_reportable_range
103141
Float z_score_reportable_range
104142
}
105143
@@ -120,24 +158,21 @@ task SelectValuesOfInterest {
120158
percentile <- (score %>% pull(percentile))[[1]]
121159
risk <- ifelse(percentile > ~{threshold}, "HIGH", "NOT_HIGH")
122160
123-
raw_score_output <- ifelse(abs(adjusted_score) > ~{z_score_reportable_range}, "NOT_RESULTED", raw_score)
124-
adjusted_score_output <- ifelse(abs(adjusted_score) > ~{z_score_reportable_range}, "NOT_RESULTED", adjusted_score)
125-
percentile_output <- ifelse(abs(adjusted_score) > ~{z_score_reportable_range}, "NOT_RESULTED", percentile)
126-
risk_output <- ifelse(abs(adjusted_score) > ~{z_score_reportable_range}, "NOT_RESULTED", risk)
127-
reason_not_resulted <- ifelse(abs(adjusted_score) > ~{z_score_reportable_range},
128-
ifelse(adjusted_score > 0, paste("Z-SCORE ABOVE + ", ~{z_score_reportable_range}),
129-
paste("Z-SCORE BELOW - ", ~{z_score_reportable_range})
130-
),
161+
raw_score_output <- ~{if out_of_reportable_range then '"NOT_RESULTED"' else 'raw_score'}
162+
adjusted_score_output <- ~{if out_of_reportable_range then '"NOT_RESULTED"' else 'adjusted_score'}
163+
percentile_output <- ~{if out_of_reportable_range then '"NOT_RESULTED"' else 'percentile'}
164+
risk_output <- ~{if out_of_reportable_range then '"NOT_RESULTED"' else 'risk'}
165+
reason_not_resulted <- ~{if out_of_reportable_range then
166+
'ifelse(adjusted_score > 0, "Z-SCORE ABOVE + ' + z_score_reportable_range +'", "Z-SCORE BELOW - ' + z_score_reportable_range +'")' else
131167
"NA"
132-
)
168+
}
133169
134170
result <- tibble(sample_id = "~{sample_id}", ~{condition_name}_raw = raw_score_output,
135171
~{condition_name}_adjusted = adjusted_score_output,
136172
~{condition_name}_percentile = percentile_output,
137173
~{condition_name}_risk = risk_output,
138174
~{condition_name}_reason_not_resulted = reason_not_resulted)
139175
write_csv(result, "results.csv")
140-
141176
EOF
142177
>>>
143178

0 commit comments

Comments
 (0)