@@ -15,28 +15,32 @@ workflow AggregatePRSResults {
15
15
16
16
call AggregateResults {
17
17
input :
18
+ group_n = group_n ,
18
19
results = results ,
19
20
missing_sites_shifts = missing_sites_shifts ,
20
- lab_batch = lab_batch
21
+ lab_batch = lab_batch ,
22
+ target_pc_projections = target_pc_projections
21
23
}
22
24
23
25
call PlotPCA {
24
26
input :
27
+ group_n = group_n ,
25
28
lab_batch = lab_batch ,
26
29
population_name = population_name ,
27
- target_pc_projections = target_pc_projections ,
30
+ batch_pcs = AggregateResults . batch_pcs ,
28
31
population_pc_projections = population_pc_projections
29
32
}
30
33
31
34
call BuildHTMLReport {
32
35
input :
36
+ group_n = group_n ,
33
37
lab_batch = lab_batch ,
34
38
batch_control_results = AggregateResults .batch_control_results ,
35
39
batch_missing_sites_shifts = AggregateResults .batch_missing_sites_shifts ,
36
40
expected_control_results = expected_control_results ,
37
41
batch_summarised_results = AggregateResults .batch_summarised_results ,
38
42
batch_pivoted_results = AggregateResults .batch_pivoted_results ,
39
- target_pc_projections = target_pc_projections ,
43
+ batch_pcs = AggregateResults . batch_pcs ,
40
44
population_pc_projections = population_pc_projections ,
41
45
population_name = population_name ,
42
46
high_risk_thresholds = high_risk_thresholds
@@ -50,13 +54,15 @@ workflow AggregatePRSResults {
50
54
File score_distribution = AggregateResults .batch_score_distribution
51
55
File pc_plot = PlotPCA .pc_plot
52
56
File report = BuildHTMLReport .report
57
+ File batch_pcs = AggregateResults .batch_pcs
53
58
}
54
59
}
55
60
56
61
task AggregateResults {
57
62
input {
58
63
Array [File ] results
59
64
Array [File ] missing_sites_shifts
65
+ Array [File ] target_pc_projections
60
66
String lab_batch
61
67
Int group_n
62
68
}
@@ -72,6 +78,9 @@ task AggregateResults {
72
78
library(ggplot2)
73
79
74
80
results <- c("~{sep='","' results}") %>% map(read_csv, col_types=cols(is_control_sample='l', .default='c')) %>% reduce(bind_rows)
81
+ target_pcs <- c("~{sep='","' target_pc_projections}") %>% map(read_tsv) %>% reduce(bind_rows) %>% select(-FID) %>% rename(sample_id = IID)
82
+
83
+ results <- inner_join(results, target_pcs)
75
84
76
85
lab_batch <- results %>% pull(lab_batch) %>% unique()
77
86
@@ -94,7 +103,7 @@ task AggregateResults {
94
103
95
104
write_tsv(results %>% filter(is_control_sample), "~{output_prefix}_control_results.tsv")
96
105
97
- results_pivoted <- results %>% filter(!is_control_sample) %>% pivot_longer(!c(sample_id, lab_batch, is_control_sample), names_to=c("condition",".value"), names_pattern="([^_]+) _(.+)")
106
+ results_pivoted <- results %>% select(-starts_with("PC")) %>% filter(!is_control_sample) %>% pivot_longer(!c(sample_id, lab_batch, is_control_sample), names_to=c("condition",".value"), names_pattern="(.+)(?<!not)(?<!reason) _(.+)$ ")
98
107
results_pivoted <- results_pivoted %T>% {options(warn=-1)} %>% mutate(adjusted = as.numeric(adjusted),
99
108
raw = as.numeric(raw),
100
109
percentile = as.numeric(percentile)) %T>% {options(warn=0)}
@@ -121,6 +130,7 @@ task AggregateResults {
121
130
122
131
missing_sites_shifts <- c("~{sep='","' missing_sites_shifts}") %>% map(read_tsv) %>% reduce(bind_rows)
123
132
write_tsv(missing_sites_shifts, "~{output_prefix}_missing_sites_shifts.tsv")
133
+ write_tsv(target_pcs, "~{output_prefix}_pcs.tsv")
124
134
125
135
EOF
126
136
>>>
@@ -138,12 +148,13 @@ task AggregateResults {
138
148
File batch_pivoted_results = "~{output_prefix}_pivoted_results.tsv"
139
149
File batch_score_distribution = "~{output_prefix}_score_distribution.png"
140
150
File batch_missing_sites_shifts = "~{output_prefix}_missing_sites_shifts.tsv"
151
+ File batch_pcs = "~{output_prefix}_pcs.tsv"
141
152
}
142
153
}
143
154
144
155
task PlotPCA {
145
156
input {
146
- Array[ File] target_pc_projections
157
+ File batch_pcs
147
158
File population_pc_projections
148
159
String lab_batch
149
160
Int group_n
@@ -158,7 +169,7 @@ task PlotPCA {
158
169
library(purrr)
159
170
library(ggplot2)
160
171
161
- target_pcs <- c ("~{sep='","' target_pc_projections}") %>% map(read_tsv) %>% reduce(bind_rows )
172
+ target_pcs <- read_tsv ("~{batch_pcs}" )
162
173
population_pcs <- read_tsv("~{population_pc_projections}")
163
174
164
175
ggplot(population_pcs, aes(x=PC1, y=PC2, color="~{population_name}")) +
@@ -191,7 +202,7 @@ task BuildHTMLReport {
191
202
File batch_summarised_results
192
203
File batch_pivoted_results
193
204
File high_risk_thresholds
194
- Array[ File] target_pc_projections
205
+ File batch_pcs
195
206
File population_pc_projections
196
207
String population_name
197
208
String lab_batch
@@ -295,23 +306,30 @@ task BuildHTMLReport {
295
306
\`\`\`{r score distributions, echo=FALSE, message=FALSE, warning=FALSE, results="asis", fig.align='center'}
296
307
normal_dist <- tibble(x=seq(-5,5,0.01)) %>% mutate(y=dnorm(x)) # needed because plotly doesn't work with geom_function
297
308
conditions_with_more_than_4_samples <- batch_pivoted_results %>% group_by(condition) %>% filter(!is.na(adjusted)) %>% count() %>% filter(n>4) %>% pull(condition)
298
- p_dist <- ggplot(batch_pivoted_results %>% filter(condition %in% conditions_with_more_than_4_samples), aes(x=adjusted)) +
299
- stat_density(aes(color=condition, text=condition), geom="line", position = "identity") +
300
- xlim(-5,5) + theme_bw() + xlab("z-score") + geom_line(data=normal_dist, aes(x=x, y=y), color="black") +
301
- geom_point(data = batch_pivoted_results %>% filter(!(condition %in% conditions_with_more_than_4_samples)), aes(color=condition, x = adjusted, text=condition), y=0) +
302
- ylab("density")
309
+ n_density <- batch_pivoted_results %>% filter(condition %in% conditions_with_more_than_4_samples) %>% nrow()
310
+ n_point <- batch_pivoted_results %>% filter(!(condition %in% conditions_with_more_than_4_samples)) %>% nrow()
311
+ p_dist <- ggplot()
312
+ if (n_density > 0) {
313
+ p_dist <- p_dist + stat_density(data = batch_pivoted_results %>% filter(condition %in% conditions_with_more_than_4_samples),
314
+ aes(color=condition, text=condition, x = adjusted), geom="line", position = "identity")
315
+ }
316
+ if (n_point > 0) {
317
+ p_dist <- p_dist + geom_point(data = batch_pivoted_results %>% filter(!(condition %in% conditions_with_more_than_4_samples)), aes(color=condition, x = adjusted, text=condition), y=0)
318
+ }
319
+ p_dist <- p_dist + xlim(-5,5) + theme_bw() + geom_line(data=normal_dist, aes(x=x, y=y), color="black") + xlab("z-score") + ylab("density")
320
+
303
321
ggplotly(p_dist, tooltip="text")
304
322
\`\`\`
305
323
306
324
## PCA
307
325
#### Hover for sample ID
308
326
\`\`\`{r pca plot, echo=FALSE, message=FALSE, warning=FALSE, results="asis", fig.align='center'}
309
- target_pcs <- c ("~{sep='","' target_pc_projections}") %>% map(read_tsv) %>% reduce(bind_rows )
327
+ target_pcs <- read_tsv ("~{batch_pcs}" )
310
328
population_pcs <- read_tsv("~{population_pc_projections}")
311
329
312
330
p <- ggplot(population_pcs, aes(x=PC1, y=PC2, color="~{population_name}")) +
313
331
geom_point() +
314
- geom_point(data=target_pcs, aes(color="~{lab_batch}", text=paste0("Sample ID: ", IID ))) +
332
+ geom_point(data=target_pcs, aes(color="~{lab_batch}", text=paste0("Sample ID: ", sample_id ))) +
315
333
theme_bw()
316
334
ggplotly(p, tooltip="text")
317
335
\`\`\`
0 commit comments