Skip to content

Commit f13b2dc

Browse files
authored
Merge pull request #87 from epiforecasts/model-flow
Add model eligibility flowchart to supplement
2 parents 4f3b6c7 + 3b497fc commit f13b2dc

File tree

4 files changed

+140
-12
lines changed

4 files changed

+140
-12
lines changed

R/model-flow.R

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# Display model inclusion criteria in the style of participant flow diagram
2+
# Examples
3+
# flow <- create_model_flow()
4+
# flow |> fc_merge() |> fc_draw()
5+
6+
library(here)
7+
library(dplyr)
8+
library(tidyr)
9+
library(lubridate)
10+
library(stringr)
11+
library(purrr)
12+
library(flowchart)
13+
14+
create_model_flow <- function() {
15+
forecasts <- arrow::read_parquet(here(
16+
"data",
17+
"covid19-forecast-hub-europe.parquet"
18+
))
19+
fc_clean <- forecasts
20+
# Data cleaning
21+
fc_clean$horizon = as.numeric(substr(fc_clean$target, 1,2))
22+
fc_clean$target_variable = str_extract(fc_clean$target, "case|death|hosp")
23+
# Set forecast date to corresponding submission date
24+
fc_clean$forecast_date = fc_clean$target_end_date - weeks(fc_clean$horizon) + days(1)
25+
26+
fc_clean <- fc_clean[c("model", "target_variable", "location", "forecast_date",
27+
"horizon", "target_end_date", "quantile", "value")]
28+
# Study period: between start of hub and until end of JHU data
29+
fc_clean <- fc_clean[fc_clean$forecast_date >= as.Date("2021-03-07") &
30+
fc_clean$target_end_date <= as.Date("2023-03-10"),]
31+
models0 <- distinct(fc_clean, target_variable, model)
32+
33+
# Exclusions -----
34+
# (1) Only include predictions from models with all quantiles
35+
rm_quantiles <- fc_clean |>
36+
group_by(model, target_variable, forecast_date, location) |>
37+
summarise(q = length(unique(quantile))) |>
38+
filter(q < 23)
39+
fc_clean <- anti_join(fc_clean, rm_quantiles,
40+
by = c("model", "target_variable",
41+
"forecast_date", "location")
42+
)
43+
models1 <- distinct(fc_clean, target_variable, model) |>
44+
mutate(inc_quantile = TRUE)
45+
46+
# (3) Only forecasts up to 4 weeks ahead
47+
fc_clean <- filter(fc_clean, horizon <= 4)
48+
models2 <- distinct(fc_clean, target_variable, model) |>
49+
mutate(inc_horizon = TRUE)
50+
51+
# (2) Only forecasts for cases and deaths
52+
fc_clean <- filter(fc_clean, target_variable %in% c("case", "death"))
53+
models3 <- distinct(fc_clean, target_variable, model) |>
54+
mutate(inc_target = TRUE)
55+
56+
# (4) Exclude Hub-created models
57+
fc_clean <- filter(fc_clean, !grepl("EuroCOVIDhub-", model))
58+
models4 <- distinct(fc_clean, target_variable, model) |>
59+
mutate(inc_xhub = TRUE)
60+
61+
# Count models at each processing step
62+
models <- left_join(models0, models1) |>
63+
left_join(models2) |>
64+
left_join(models3) |>
65+
left_join(models4) |>
66+
mutate(across(starts_with("inc_"), ~ if_else(is.na(.), FALSE, .))) |>
67+
filter(target_variable != "hosp")
68+
69+
flow <- imap(c("case", "death"),
70+
~ models |>
71+
filter(target_variable == .x) |>
72+
as_fc(label = paste0("Models forecasting ",
73+
.x, "s"),
74+
text_pattern = "{label}\n") |>
75+
fc_filter(inc_quantile,
76+
label = "Provided 23 quantiles",
77+
show_exc = TRUE) |>
78+
fc_filter(inc_horizon,
79+
label = "Provided 1:4 week predictions",
80+
show_exc = TRUE) |>
81+
fc_filter(inc_xhub,
82+
label = "Not created by Hub",
83+
show_exc = TRUE) |>
84+
fc_draw()
85+
)
86+
87+
flow_chart <- flow |>
88+
fc_merge() |>
89+
fc_draw() |>
90+
fc_export(filename = "flowchart.png", path = here("plots"),
91+
width = 3000, height = 3000, res = 500)
92+
}

plots/flowchart.png

156 KB
Loading

report/supplement/Supplement.Rmd

Lines changed: 48 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
---
2-
title: "Supplement"
2+
title: "The influence of model structure and geographic specificity on predictive accuracy among European COVID-19 forecasts"
3+
subtitle: "Supplementary information"
34
output:
45
bookdown::pdf_document2
56
---
@@ -19,6 +20,45 @@ knitr::opts_chunk$set(
1920
)
2021
```
2122

23+
24+
# Code and data availability
25+
26+
## Code
27+
28+
The codebase for this paper is publicly available at:
29+
30+
- Github: <https://github.com/epiforecasts/eval-by-method>
31+
- Zenodo with DOI: <https://doi.org/10.5281/zenodo.14903162>
32+
33+
Comments and code contributions are welcome - please use Github [Issues](https://github.com/epiforecasts/eval-by-method/issues).
34+
35+
Please cite code using:
36+
37+
- Katharine Sherratt & Sebastian Funk. (2025). epiforecasts/eval-by-method: Zenodo. <https://doi.org/10.5281/zenodo.14903162>
38+
39+
## Source data
40+
41+
Forecast and and observed data were sourced from the European COVID-19 Forecast Hub, available to view at <https://covid19forecasthub.eu/> . All Hub data are now archived at:
42+
43+
- Github: <https://github.com/european-modelling-hubs/covid19-forecast-hub-europe_archive>
44+
- Zenodo with DOI: <https://doi.org/10.5281/zenodo.13986751>
45+
46+
Data for this work were downloaded on 30th May 2023. These data are available in the Github repository for this paper at: <https://github.com/epiforecasts/eval-by-method/tree/main/data>
47+
48+
\newpage
49+
50+
# Model characteristics
51+
52+
## Eligibility criteria
53+
54+
```{r model-flow, fig.cap="Eligibility criteria for models contributing case (left) and death (right) forecasts to the European COVID-19 Forecast Hub, March 2021 - March 2023"}
55+
# source(here("R", "model-flow.R"))
56+
# flow_chart <- create_model_flow()
57+
knitr::include_graphics(here("plots", "flowchart.png"))
58+
```
59+
60+
## Model characteristics
61+
2262
```{r load-data}
2363
# Load data
2464
source(here("R", "prep-data.R"))
@@ -31,12 +71,6 @@ scores <- scores |>
3171
n_forecasts <- nrow(scores)
3272
```
3373

34-
Code is available at: <https://github.com/epiforecasts/model-structure-evaluation>.
35-
36-
\newpage
37-
38-
# Model characteristics
39-
4074
```{r metadata}
4175
table_metadata(scores) |>
4276
select(-Description) |>
@@ -45,7 +79,11 @@ table_metadata(scores) |>
4579

4680
\newpage
4781

48-
# Trend identification
82+
# Statistical methods
83+
84+
## Epidemic trend identification
85+
86+
We retrospectively categorised each week as “Stable”, “Decreasing”, or “Increasing”, based on the difference over a three-week moving average of incidence (with a change of +/-5% as “Stable”).
4987

5088
```{r trends,fig.cap="Trends (cases)", fig.height = 8, fig.width = 10}
5189
scores |>
@@ -63,14 +101,12 @@ scores |>
63101

64102
\newpage
65103

66-
67-
# Model fitting
104+
## Model fitting
68105

69106
```{r model-wis}
70107
results <- readRDS(here("output", "results.rds"))
71108
```
72109

73-
74110
## Model formula
75111

76112
`r results$formula`
@@ -79,7 +115,7 @@ results <- readRDS(here("output", "results.rds"))
79115

80116
### Cases
81117

82-
```{r gamm-diagnostics-cases, echo = FALSE}
118+
```{r gamm-diagnostics-cases}
83119
# QQ plot, residuals
84120
knitr::include_graphics(here("plots", "check_Cases.pdf"))
85121
```

report/supplement/Supplement.pdf

174 KB
Binary file not shown.

0 commit comments

Comments
 (0)