From 0fca7fec10f23c8c0891674d749c103cc2bbcf11 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 3 Jun 2021 18:36:00 -0400 Subject: [PATCH 1/2] support hospitalizations in data pipeline fetch hosp data with all aheads --- Report/create_reports.R | 28 ++++++++++++++++++++++------ Report/score.R | 32 +++++++++++++++++++------------- 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index 6f3f822..c9bd4fd 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -32,20 +32,30 @@ state_geos = locations %>% filter(nchar(.data$geo_value) == 2) %>% pull(.data$geo_value) signals = c("confirmed_incidence_num", - "deaths_incidence_num") + "deaths_incidence_num", + "confirmed_admissions_covid_1d") predictions_cards = get_covidhub_predictions(forecasters, signal = signals, + ahead = 1:28, geo_values = state_geos, verbose = TRUE, - use_disk = TRUE) + use_disk = TRUE) %>% + filter(!(incidence_period == "epiweek" & ahead > 4)) + predictions_cards = predictions_cards %>% - filter(!is.na(predictions_cards$target_end_date)) -predictions_cards = predictions_cards %>% filter(target_end_date < today()) + filter(!is.na(predictions_cards$target_end_date)) %>% + filter(target_end_date < today()) -# Only accept forecasts made Monday or earlier +# For epiweek predictions, only accept forecasts made Monday or earlier. +# target_end_date is the date of the last day (Saturday) in the epiweek +# For daily predictions, accept any forecast where the target_end_date is later +# than the forecast_date. predictions_cards = predictions_cards %>% - filter(target_end_date - (forecast_date + 7 * ahead) >= -2) + filter( + (incidence_period == "epiweek" & target_end_date - (forecast_date + 7 * ahead) >= -2) | + (incidence_period == "day" & target_end_date > forecast_date) + ) # And only a forecaster's last forecast if multiple were made predictions_cards = predictions_cards %>% @@ -97,6 +107,9 @@ save_score_cards(state_scores, "state", signal_name = "confirmed_incidence_num", print("Saving state deaths incidence...") save_score_cards(state_scores, "state", signal_name = "deaths_incidence_num", output_dir = opt$dir) +print("Saving state hospitalizations...") +save_score_cards(state_scores, "state", signal_name = "confirmed_admissions_covid_1d", + output_dir = opt$dir) print("Evaluating national forecasts") # COVIDcast does not return national level data, using CovidHubUtils instead @@ -108,5 +121,8 @@ save_score_cards(nation_scores, "nation", print("Saving nation deaths incidence...") save_score_cards(nation_scores, "nation", signal_name = "deaths_incidence_num", output_dir = opt$dir) +print("Saving nation hospitalizations...") +save_score_cards(nation_scores, "nation", signal_name = "confirmed_admissions_covid_1d", + output_dir = opt$dir) print("Done") diff --git a/Report/score.R b/Report/score.R index 701b115..ac61312 100644 --- a/Report/score.R +++ b/Report/score.R @@ -3,7 +3,8 @@ library("assertthat") save_score_cards = function(score_card, geo_type = c("state", "nation"), signal_name = c("confirmed_incidence_num", - "deaths_incidence_num"), + "deaths_incidence_num", + "confirmed_admissions_covid_1d"), output_dir = ".") { signal_name = match.arg(signal_name) geo_type = match.arg(geo_type) @@ -13,11 +14,11 @@ save_score_cards = function(score_card, geo_type = c("state", "nation"), assert_that(signal_name %in% signals, msg = "signal is not in score_card") score_card = score_card %>% filter(signal == signal_name) - if (signal_name == "confirmed_incidence_num") { - sig_suffix = "cases" - } else { - sig_suffix = "deaths" - } + + type_map <- list("confirmed_incidence_num" = "cases", + "deaths_incidence_num" = "deaths", + "confirmed_admissions_covid_1d" = "hospitalizations") + sig_suffix <- type_map[[signal_name]] output_file_name = file.path(output_dir, paste0("score_cards_", geo_type, "_", sig_suffix, ".rds")) @@ -37,20 +38,25 @@ save_score_cards = function(score_card, geo_type = c("state", "nation"), evaluate_chu = function(predictions, signals, err_measures) { allowed_signals = c("confirmed_incidence_num", - "deaths_incidence_num") + "deaths_incidence_num", + "confirmed_admissions_covid_1d") assert_that(all(signals %in% allowed_signals), msg = paste("Signal not allowed:", setdiff(signals, allowed_signals))) + + target_map <- list("confirmed_incidence_num" = "inc case", + "deaths_incidence_num" = "inc death", + "confirmed_admissions_covid_1d" = "inc hosp") + source_map <- list("confirmed_incidence_num" = "JHU", + "deaths_incidence_num" = "JHU", + "confirmed_admissions_covid_1d" = "HealthData") scores = c() for (signal_name in signals) { preds_signal = predictions %>% filter(signal == signal_name) - if (signal_name == "confirmed_incidence_num") { - jhu_signal = "inc case" - } else { - jhu_signal = "inc death" - } - chu_truth = covidHubUtils::load_truth("JHU", jhu_signal) + signal <- target_map[[signal_name]] + source <- source_map[[signal_name]] + chu_truth = covidHubUtils::load_truth(source, signal) chu_truth = chu_truth %>% rename(actual = value) %>% select(-c(model, From 62d868b588f8f88c2360a582fedb22e568d01845 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 14 Jun 2021 17:42:49 -0400 Subject: [PATCH 2/2] add warnings if cases/deaths not generated; prevent hosp from failing if not --- Report/create_reports.R | 60 ++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 18 deletions(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index c9bd4fd..3e53201 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -101,28 +101,52 @@ state_scores = evaluate_covid_predictions(state_predictions, geo_type = "state") source("score.R") -print("Saving state confirmed incidence...") -save_score_cards(state_scores, "state", signal_name = "confirmed_incidence_num", - output_dir = opt$dir) -print("Saving state deaths incidence...") -save_score_cards(state_scores, "state", signal_name = "deaths_incidence_num", - output_dir = opt$dir) -print("Saving state hospitalizations...") -save_score_cards(state_scores, "state", signal_name = "confirmed_admissions_covid_1d", - output_dir = opt$dir) +if ( "confirmed_incidence_num" %in% unique(state_scores$signal)) { + print("Saving state confirmed incidence...") + save_score_cards(state_scores, "state", signal_name = "confirmed_incidence_num", + output_dir = opt$dir) +} else { + warning("State confirmed incidence should generally be available. Please + verify that you expect not to have any cases incidence forecasts") +} +if ( "deaths_incidence_num" %in% unique(state_scores$signal)) { + print("Saving state deaths incidence...") + save_score_cards(state_scores, "state", signal_name = "deaths_incidence_num", + output_dir = opt$dir) +} else { + warning("State deaths incidence should generally be available. Please + verify that you expect not to have any deaths incidence forecasts") +} +if ( "confirmed_admissions_covid_1d" %in% unique(state_scores$signal)) { + print("Saving state hospitalizations...") + save_score_cards(state_scores, "state", signal_name = "confirmed_admissions_covid_1d", + output_dir = opt$dir) +} print("Evaluating national forecasts") # COVIDcast does not return national level data, using CovidHubUtils instead nation_scores = evaluate_chu(nation_predictions, signals, err_measures) -print("Saving nation confirmed incidence...") -save_score_cards(nation_scores, "nation", - signal_name = "confirmed_incidence_num", output_dir = opt$dir) -print("Saving nation deaths incidence...") -save_score_cards(nation_scores, "nation", signal_name = "deaths_incidence_num", - output_dir = opt$dir) -print("Saving nation hospitalizations...") -save_score_cards(nation_scores, "nation", signal_name = "confirmed_admissions_covid_1d", - output_dir = opt$dir) +if ( "confirmed_incidence_num" %in% unique(state_scores$signal)) { + print("Saving nation confirmed incidence...") + save_score_cards(nation_scores, "nation", + signal_name = "confirmed_incidence_num", output_dir = opt$dir) +} else { + warning("Nation confirmed incidence should generally be available. Please + verify that you expect not to have any cases incidence forecasts") +} +if ( "deaths_incidence_num" %in% unique(state_scores$signal)) { + print("Saving nation deaths incidence...") + save_score_cards(nation_scores, "nation", signal_name = "deaths_incidence_num", + output_dir = opt$dir) +} else { + warning("Nation deaths incidence should generally be available. Please + verify that you expect not to have any deaths incidence forecasts") +} +if ( "confirmed_admissions_covid_1d" %in% unique(state_scores$signal)) { + print("Saving nation hospitalizations...") + save_score_cards(nation_scores, "nation", signal_name = "confirmed_admissions_covid_1d", + output_dir = opt$dir) +} print("Done")