epiforecasts
diff --git a/‎R/model-flow.R
Lines changed: 92 additions & 0 deletions b/‎R/model-flow.R
Lines changed: 92 additions & 0 deletions
diff --git a/‎plots/flowchart.png
156 KB b/‎plots/flowchart.png
156 KB
diff --git a/‎report/supplement/Supplement.Rmd
Lines changed: 48 additions & 12 deletions b/‎report/supplement/Supplement.Rmd
Lines changed: 48 additions & 12 deletions
diff --git a/‎report/supplement/Supplement.pdf
174 KB b/‎report/supplement/Supplement.pdf
174 KB
@@ -0,0 +1,92 @@
+# Display model inclusion criteria in the style of participant flow diagram
+# Examples
+#   flow <- create_model_flow()
+#   flow |> fc_merge() |> fc_draw()
+
+library(here)
+library(dplyr)
+library(tidyr)
+library(lubridate)
+library(stringr)
+library(purrr)
+library(flowchart)
+
+create_model_flow <- function() {
+  forecasts <- arrow::read_parquet(here(
+    "data",
+    "covid19-forecast-hub-europe.parquet"
+  ))
+  fc_clean <- forecasts
+  # Data cleaning
+  fc_clean$horizon = as.numeric(substr(fc_clean$target, 1,2))
+  fc_clean$target_variable = str_extract(fc_clean$target, "case|death|hosp")
+  # Set forecast date to corresponding submission date
+  fc_clean$forecast_date = fc_clean$target_end_date - weeks(fc_clean$horizon) + days(1)
+
+  fc_clean <- fc_clean[c("model", "target_variable", "location", "forecast_date",
+                         "horizon", "target_end_date", "quantile", "value")]
+  # Study period: between start of hub and until end of JHU data
+  fc_clean <- fc_clean[fc_clean$forecast_date >= as.Date("2021-03-07") &
+                         fc_clean$target_end_date <= as.Date("2023-03-10"),]
+  models0 <- distinct(fc_clean, target_variable, model)
+
+  # Exclusions -----
+  # (1) Only include predictions from models with all quantiles
+  rm_quantiles <- fc_clean |>
+    group_by(model, target_variable, forecast_date, location) |>
+    summarise(q = length(unique(quantile))) |>
+    filter(q < 23)
+  fc_clean <- anti_join(fc_clean, rm_quantiles,
+                        by = c("model", "target_variable",
+                               "forecast_date", "location")
+  )
+  models1 <- distinct(fc_clean, target_variable, model) |>
+    mutate(inc_quantile = TRUE)
+
+  # (3) Only forecasts up to 4 weeks ahead
+  fc_clean <- filter(fc_clean, horizon <= 4)
+  models2 <- distinct(fc_clean, target_variable, model) |>
+    mutate(inc_horizon = TRUE)
+
+  # (2) Only forecasts for cases and deaths
+  fc_clean <- filter(fc_clean, target_variable %in% c("case", "death"))
+  models3 <- distinct(fc_clean, target_variable, model) |>
+    mutate(inc_target = TRUE)
+
+  # (4) Exclude Hub-created models
+  fc_clean <- filter(fc_clean, !grepl("EuroCOVIDhub-", model))
+  models4 <- distinct(fc_clean, target_variable, model) |>
+    mutate(inc_xhub = TRUE)
+
+  # Count models at each processing step
+  models <- left_join(models0, models1) |>
+    left_join(models2) |>
+    left_join(models3) |>
+    left_join(models4) |>
+    mutate(across(starts_with("inc_"), ~ if_else(is.na(.), FALSE, .))) |>
+    filter(target_variable != "hosp")
+
+  flow <- imap(c("case", "death"),
+               ~ models |>
+                 filter(target_variable == .x) |>
+                 as_fc(label = paste0("Models forecasting ",
+                                      .x, "s"),
+                       text_pattern = "{label}\n") |>
+                 fc_filter(inc_quantile,
+                           label = "Provided 23 quantiles",
+                           show_exc = TRUE) |>
+                 fc_filter(inc_horizon,
+                           label = "Provided 1:4 week predictions",
+                           show_exc = TRUE) |>
+                 fc_filter(inc_xhub,
+                           label = "Not created by Hub",
+                           show_exc = TRUE) |>
+                 fc_draw()
+  )
+
+  flow_chart <- flow |>
+    fc_merge() |>
+    fc_draw() |>
+    fc_export(filename = "flowchart.png", path = here("plots"),
+              width = 3000, height = 3000, res = 500)
+}
@@ -1,5 +1,6 @@
 ---
-title: "Supplement"
+title: "The influence of model structure and geographic specificity on predictive accuracy among European COVID-19 forecasts"
+subtitle: "Supplementary information"
 output: 
   bookdown::pdf_document2
 ---
@@ -19,6 +20,45 @@ knitr::opts_chunk$set(
 )
 ```
 
+
+# Code and data availability
+
+## Code
+
+The codebase for this paper is publicly available at:
+
+- Github: <https://github.com/epiforecasts/eval-by-method>
+- Zenodo with DOI: <https://doi.org/10.5281/zenodo.14903162>
+
+Comments and code contributions are welcome - please use Github [Issues](https://github.com/epiforecasts/eval-by-method/issues).
+
+Please cite code using:
+
+- Katharine Sherratt & Sebastian Funk. (2025). epiforecasts/eval-by-method: Zenodo. <https://doi.org/10.5281/zenodo.14903162>
+
+## Source data
+
+Forecast and and observed data were sourced from the European COVID-19 Forecast Hub, available to view at <https://covid19forecasthub.eu/> . All Hub data are now archived at:
+
+   - Github: <https://github.com/european-modelling-hubs/covid19-forecast-hub-europe_archive>
+   - Zenodo with DOI: <https://doi.org/10.5281/zenodo.13986751>
+   
+Data for this work were downloaded on 30th May 2023. These data are available in the Github repository for this paper at: <https://github.com/epiforecasts/eval-by-method/tree/main/data>
+
+\newpage
+
+# Model characteristics
+
+## Eligibility criteria
+
+```{r model-flow, fig.cap="Eligibility criteria for models contributing case (left) and death (right) forecasts to the European COVID-19 Forecast Hub, March 2021 - March 2023"}
+# source(here("R", "model-flow.R"))
+# flow_chart <- create_model_flow()
+knitr::include_graphics(here("plots", "flowchart.png"))
+```
+
+## Model characteristics
+
 ```{r load-data}
 # Load data
 source(here("R", "prep-data.R"))
@@ -31,12 +71,6 @@ scores <- scores |>
 n_forecasts <- nrow(scores)
 ```
 
-Code is available at: <https://github.com/epiforecasts/model-structure-evaluation>. 
-
-\newpage
-
-# Model characteristics
-
 ```{r metadata}
 table_metadata(scores) |>
   select(-Description) |>
@@ -45,7 +79,11 @@ table_metadata(scores) |>
 
 \newpage
 
-# Trend identification
+# Statistical methods
+
+## Epidemic trend identification
+
+We retrospectively categorised each week as “Stable”, “Decreasing”, or “Increasing”, based on the difference over a three-week moving average of incidence (with a change of +/-5% as “Stable”).
 
 ```{r trends,fig.cap="Trends (cases)", fig.height = 8, fig.width = 10}
 scores |>
@@ -63,14 +101,12 @@ scores |>
 
 \newpage
 
-
-# Model fitting
+## Model fitting 
 
 ```{r model-wis}
 results <- readRDS(here("output", "results.rds"))
 ```
 
-
 ## Model formula
 
 `r results$formula`
@@ -79,7 +115,7 @@ results <- readRDS(here("output", "results.rds"))
 
 ### Cases
 
-```{r gamm-diagnostics-cases, echo = FALSE}
+```{r gamm-diagnostics-cases}
 # QQ plot, residuals
 knitr::include_graphics(here("plots", "check_Cases.pdf"))
 ```