|
| 1 | +# Display model inclusion criteria in the style of participant flow diagram |
| 2 | +# Examples |
| 3 | +# flow <- create_model_flow() |
| 4 | +# flow |> fc_merge() |> fc_draw() |
| 5 | + |
| 6 | +library(here) |
| 7 | +library(dplyr) |
| 8 | +library(tidyr) |
| 9 | +library(lubridate) |
| 10 | +library(stringr) |
| 11 | +library(purrr) |
| 12 | +library(flowchart) |
| 13 | + |
| 14 | +create_model_flow <- function() { |
| 15 | + forecasts <- arrow::read_parquet(here( |
| 16 | + "data", |
| 17 | + "covid19-forecast-hub-europe.parquet" |
| 18 | + )) |
| 19 | + fc_clean <- forecasts |
| 20 | + # Data cleaning |
| 21 | + fc_clean$horizon = as.numeric(substr(fc_clean$target, 1,2)) |
| 22 | + fc_clean$target_variable = str_extract(fc_clean$target, "case|death|hosp") |
| 23 | + # Set forecast date to corresponding submission date |
| 24 | + fc_clean$forecast_date = fc_clean$target_end_date - weeks(fc_clean$horizon) + days(1) |
| 25 | + |
| 26 | + fc_clean <- fc_clean[c("model", "target_variable", "location", "forecast_date", |
| 27 | + "horizon", "target_end_date", "quantile", "value")] |
| 28 | + # Study period: between start of hub and until end of JHU data |
| 29 | + fc_clean <- fc_clean[fc_clean$forecast_date >= as.Date("2021-03-07") & |
| 30 | + fc_clean$target_end_date <= as.Date("2023-03-10"),] |
| 31 | + models0 <- distinct(fc_clean, target_variable, model) |
| 32 | + |
| 33 | + # Exclusions ----- |
| 34 | + # (1) Only include predictions from models with all quantiles |
| 35 | + rm_quantiles <- fc_clean |> |
| 36 | + group_by(model, target_variable, forecast_date, location) |> |
| 37 | + summarise(q = length(unique(quantile))) |> |
| 38 | + filter(q < 23) |
| 39 | + fc_clean <- anti_join(fc_clean, rm_quantiles, |
| 40 | + by = c("model", "target_variable", |
| 41 | + "forecast_date", "location") |
| 42 | + ) |
| 43 | + models1 <- distinct(fc_clean, target_variable, model) |> |
| 44 | + mutate(inc_quantile = TRUE) |
| 45 | + |
| 46 | + # (3) Only forecasts up to 4 weeks ahead |
| 47 | + fc_clean <- filter(fc_clean, horizon <= 4) |
| 48 | + models2 <- distinct(fc_clean, target_variable, model) |> |
| 49 | + mutate(inc_horizon = TRUE) |
| 50 | + |
| 51 | + # (2) Only forecasts for cases and deaths |
| 52 | + fc_clean <- filter(fc_clean, target_variable %in% c("case", "death")) |
| 53 | + models3 <- distinct(fc_clean, target_variable, model) |> |
| 54 | + mutate(inc_target = TRUE) |
| 55 | + |
| 56 | + # (4) Exclude Hub-created models |
| 57 | + fc_clean <- filter(fc_clean, !grepl("EuroCOVIDhub-", model)) |
| 58 | + models4 <- distinct(fc_clean, target_variable, model) |> |
| 59 | + mutate(inc_xhub = TRUE) |
| 60 | + |
| 61 | + # Count models at each processing step |
| 62 | + models <- left_join(models0, models1) |> |
| 63 | + left_join(models2) |> |
| 64 | + left_join(models3) |> |
| 65 | + left_join(models4) |> |
| 66 | + mutate(across(starts_with("inc_"), ~ if_else(is.na(.), FALSE, .))) |> |
| 67 | + filter(target_variable != "hosp") |
| 68 | + |
| 69 | + flow <- imap(c("case", "death"), |
| 70 | + ~ models |> |
| 71 | + filter(target_variable == .x) |> |
| 72 | + as_fc(label = paste0("Models forecasting ", |
| 73 | + .x, "s"), |
| 74 | + text_pattern = "{label}\n") |> |
| 75 | + fc_filter(inc_quantile, |
| 76 | + label = "Provided 23 quantiles", |
| 77 | + show_exc = TRUE) |> |
| 78 | + fc_filter(inc_horizon, |
| 79 | + label = "Provided 1:4 week predictions", |
| 80 | + show_exc = TRUE) |> |
| 81 | + fc_filter(inc_xhub, |
| 82 | + label = "Not created by Hub", |
| 83 | + show_exc = TRUE) |> |
| 84 | + fc_draw() |
| 85 | + ) |
| 86 | + |
| 87 | + flow_chart <- flow |> |
| 88 | + fc_merge() |> |
| 89 | + fc_draw() |> |
| 90 | + fc_export(filename = "flowchart.png", path = here("plots"), |
| 91 | + width = 3000, height = 3000, res = 500) |
| 92 | +} |
0 commit comments