From 0fca7fec10f23c8c0891674d749c103cc2bbcf11 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 3 Jun 2021 18:36:00 -0400 Subject: [PATCH 01/26] support hospitalizations in data pipeline fetch hosp data with all aheads --- Report/create_reports.R | 28 ++++++++++++++++++++++------ Report/score.R | 32 +++++++++++++++++++------------- 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index 6f3f822..c9bd4fd 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -32,20 +32,30 @@ state_geos = locations %>% filter(nchar(.data$geo_value) == 2) %>% pull(.data$geo_value) signals = c("confirmed_incidence_num", - "deaths_incidence_num") + "deaths_incidence_num", + "confirmed_admissions_covid_1d") predictions_cards = get_covidhub_predictions(forecasters, signal = signals, + ahead = 1:28, geo_values = state_geos, verbose = TRUE, - use_disk = TRUE) + use_disk = TRUE) %>% + filter(!(incidence_period == "epiweek" & ahead > 4)) + predictions_cards = predictions_cards %>% - filter(!is.na(predictions_cards$target_end_date)) -predictions_cards = predictions_cards %>% filter(target_end_date < today()) + filter(!is.na(predictions_cards$target_end_date)) %>% + filter(target_end_date < today()) -# Only accept forecasts made Monday or earlier +# For epiweek predictions, only accept forecasts made Monday or earlier. +# target_end_date is the date of the last day (Saturday) in the epiweek +# For daily predictions, accept any forecast where the target_end_date is later +# than the forecast_date. predictions_cards = predictions_cards %>% - filter(target_end_date - (forecast_date + 7 * ahead) >= -2) + filter( + (incidence_period == "epiweek" & target_end_date - (forecast_date + 7 * ahead) >= -2) | + (incidence_period == "day" & target_end_date > forecast_date) + ) # And only a forecaster's last forecast if multiple were made predictions_cards = predictions_cards %>% @@ -97,6 +107,9 @@ save_score_cards(state_scores, "state", signal_name = "confirmed_incidence_num", print("Saving state deaths incidence...") save_score_cards(state_scores, "state", signal_name = "deaths_incidence_num", output_dir = opt$dir) +print("Saving state hospitalizations...") +save_score_cards(state_scores, "state", signal_name = "confirmed_admissions_covid_1d", + output_dir = opt$dir) print("Evaluating national forecasts") # COVIDcast does not return national level data, using CovidHubUtils instead @@ -108,5 +121,8 @@ save_score_cards(nation_scores, "nation", print("Saving nation deaths incidence...") save_score_cards(nation_scores, "nation", signal_name = "deaths_incidence_num", output_dir = opt$dir) +print("Saving nation hospitalizations...") +save_score_cards(nation_scores, "nation", signal_name = "confirmed_admissions_covid_1d", + output_dir = opt$dir) print("Done") diff --git a/Report/score.R b/Report/score.R index 701b115..ac61312 100644 --- a/Report/score.R +++ b/Report/score.R @@ -3,7 +3,8 @@ library("assertthat") save_score_cards = function(score_card, geo_type = c("state", "nation"), signal_name = c("confirmed_incidence_num", - "deaths_incidence_num"), + "deaths_incidence_num", + "confirmed_admissions_covid_1d"), output_dir = ".") { signal_name = match.arg(signal_name) geo_type = match.arg(geo_type) @@ -13,11 +14,11 @@ save_score_cards = function(score_card, geo_type = c("state", "nation"), assert_that(signal_name %in% signals, msg = "signal is not in score_card") score_card = score_card %>% filter(signal == signal_name) - if (signal_name == "confirmed_incidence_num") { - sig_suffix = "cases" - } else { - sig_suffix = "deaths" - } + + type_map <- list("confirmed_incidence_num" = "cases", + "deaths_incidence_num" = "deaths", + "confirmed_admissions_covid_1d" = "hospitalizations") + sig_suffix <- type_map[[signal_name]] output_file_name = file.path(output_dir, paste0("score_cards_", geo_type, "_", sig_suffix, ".rds")) @@ -37,20 +38,25 @@ save_score_cards = function(score_card, geo_type = c("state", "nation"), evaluate_chu = function(predictions, signals, err_measures) { allowed_signals = c("confirmed_incidence_num", - "deaths_incidence_num") + "deaths_incidence_num", + "confirmed_admissions_covid_1d") assert_that(all(signals %in% allowed_signals), msg = paste("Signal not allowed:", setdiff(signals, allowed_signals))) + + target_map <- list("confirmed_incidence_num" = "inc case", + "deaths_incidence_num" = "inc death", + "confirmed_admissions_covid_1d" = "inc hosp") + source_map <- list("confirmed_incidence_num" = "JHU", + "deaths_incidence_num" = "JHU", + "confirmed_admissions_covid_1d" = "HealthData") scores = c() for (signal_name in signals) { preds_signal = predictions %>% filter(signal == signal_name) - if (signal_name == "confirmed_incidence_num") { - jhu_signal = "inc case" - } else { - jhu_signal = "inc death" - } - chu_truth = covidHubUtils::load_truth("JHU", jhu_signal) + signal <- target_map[[signal_name]] + source <- source_map[[signal_name]] + chu_truth = covidHubUtils::load_truth(source, signal) chu_truth = chu_truth %>% rename(actual = value) %>% select(-c(model, From 62d868b588f8f88c2360a582fedb22e568d01845 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 14 Jun 2021 17:42:49 -0400 Subject: [PATCH 02/26] add warnings if cases/deaths not generated; prevent hosp from failing if not --- Report/create_reports.R | 60 ++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 18 deletions(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index c9bd4fd..3e53201 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -101,28 +101,52 @@ state_scores = evaluate_covid_predictions(state_predictions, geo_type = "state") source("score.R") -print("Saving state confirmed incidence...") -save_score_cards(state_scores, "state", signal_name = "confirmed_incidence_num", - output_dir = opt$dir) -print("Saving state deaths incidence...") -save_score_cards(state_scores, "state", signal_name = "deaths_incidence_num", - output_dir = opt$dir) -print("Saving state hospitalizations...") -save_score_cards(state_scores, "state", signal_name = "confirmed_admissions_covid_1d", - output_dir = opt$dir) +if ( "confirmed_incidence_num" %in% unique(state_scores$signal)) { + print("Saving state confirmed incidence...") + save_score_cards(state_scores, "state", signal_name = "confirmed_incidence_num", + output_dir = opt$dir) +} else { + warning("State confirmed incidence should generally be available. Please + verify that you expect not to have any cases incidence forecasts") +} +if ( "deaths_incidence_num" %in% unique(state_scores$signal)) { + print("Saving state deaths incidence...") + save_score_cards(state_scores, "state", signal_name = "deaths_incidence_num", + output_dir = opt$dir) +} else { + warning("State deaths incidence should generally be available. Please + verify that you expect not to have any deaths incidence forecasts") +} +if ( "confirmed_admissions_covid_1d" %in% unique(state_scores$signal)) { + print("Saving state hospitalizations...") + save_score_cards(state_scores, "state", signal_name = "confirmed_admissions_covid_1d", + output_dir = opt$dir) +} print("Evaluating national forecasts") # COVIDcast does not return national level data, using CovidHubUtils instead nation_scores = evaluate_chu(nation_predictions, signals, err_measures) -print("Saving nation confirmed incidence...") -save_score_cards(nation_scores, "nation", - signal_name = "confirmed_incidence_num", output_dir = opt$dir) -print("Saving nation deaths incidence...") -save_score_cards(nation_scores, "nation", signal_name = "deaths_incidence_num", - output_dir = opt$dir) -print("Saving nation hospitalizations...") -save_score_cards(nation_scores, "nation", signal_name = "confirmed_admissions_covid_1d", - output_dir = opt$dir) +if ( "confirmed_incidence_num" %in% unique(state_scores$signal)) { + print("Saving nation confirmed incidence...") + save_score_cards(nation_scores, "nation", + signal_name = "confirmed_incidence_num", output_dir = opt$dir) +} else { + warning("Nation confirmed incidence should generally be available. Please + verify that you expect not to have any cases incidence forecasts") +} +if ( "deaths_incidence_num" %in% unique(state_scores$signal)) { + print("Saving nation deaths incidence...") + save_score_cards(nation_scores, "nation", signal_name = "deaths_incidence_num", + output_dir = opt$dir) +} else { + warning("Nation deaths incidence should generally be available. Please + verify that you expect not to have any deaths incidence forecasts") +} +if ( "confirmed_admissions_covid_1d" %in% unique(state_scores$signal)) { + print("Saving nation hospitalizations...") + save_score_cards(nation_scores, "nation", signal_name = "confirmed_admissions_covid_1d", + output_dir = opt$dir) +} print("Done") From 87c947aae2f8365b53782dda296e476d08d99518 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 18 Jun 2021 19:20:29 -0400 Subject: [PATCH 03/26] force container build to always pull newest covidcast image --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 64beeec..ace2021 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ S3_BUCKET=s3://forecast-eval build: build_dashboard r_build: - docker build -t forecast-eval-build docker_build + docker build --no-cache --pull -t forecast-eval-build docker_build predictions_cards.rds score_cards_state_deaths.rds score_cards_state_cases.rds score_cards_nation_cases.rds score_cards_nation_deaths.rds: dist test -f dist/$@ || curl -o dist/$@ $(S3_URL)/$@ From eddf2cd443700f3ba34f06af2ad885db7f7b29e8 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 21 Jun 2021 09:59:26 -0400 Subject: [PATCH 04/26] generalize to all docker run/build targets --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index ace2021..6df94a4 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ deploy: score_forecast # Starts a docker image with a full preconfigured R environment start_dev: r_build - docker run -ti --rm \ + docker run --pull=always -ti --rm \ -v ${PWD}/Report:/var/forecast-eval \ -v ${PWD}/dashboard:/var/forecast-eval-dashboard \ -v ${PWD}/dist:/var/dist \ @@ -40,13 +40,13 @@ start_dev: r_build ghcr.io/cmu-delphi/forecast-eval:latest bash build_dashboard_dev: pull_data - docker build -t ghcr.io/cmu-delphi/forecast-eval:latest -f docker_dashboard/Dockerfile . + docker build --no-cache --pull -t ghcr.io/cmu-delphi/forecast-eval:latest -f docker_dashboard/Dockerfile . build_dashboard: pull_data - docker build --no-cache=true -t ghcr.io/cmu-delphi/forecast-eval:$(imageTag) -f docker_dashboard/Dockerfile . + docker build --no-cache=true --pull -t ghcr.io/cmu-delphi/forecast-eval:$(imageTag) -f docker_dashboard/Dockerfile . deploy_dashboard: build_dashboard docker push ghcr.io/cmu-delphi/forecast-eval:$(imageTag) start_dashboard: build_dashboard_dev - docker run --rm -p 3838:3838 ghcr.io/cmu-delphi/forecast-eval:latest + docker run --pull=always --rm -p 3838:3838 ghcr.io/cmu-delphi/forecast-eval:latest From 7cd9036877b8f622bc4f3c42300fc0478e802a05 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 24 Jun 2021 15:13:07 -0400 Subject: [PATCH 05/26] report 20, 50, 80th %iles with scores --- Report/create_reports.R | 7 ++++++- Report/error_measures.R | 17 +++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index 6f3f822..c49bccc 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -76,7 +76,10 @@ err_measures = c(wis = weighted_interval_score, underprediction = underprediction, sharpness = sharpness, ae = absolute_error, - coverage_functions) + coverage_functions, + value_20 = get_quantile_prediction_factory(0.2), + value_50 = get_quantile_prediction_factory(0.5), + value_80 = get_quantile_prediction_factory(0.8)) nation_predictions = predictions_cards %>% filter(geo_value == "us") state_predictions = predictions_cards %>% filter(geo_value != "us") @@ -98,8 +101,10 @@ print("Saving state deaths incidence...") save_score_cards(state_scores, "state", signal_name = "deaths_incidence_num", output_dir = opt$dir) + print("Evaluating national forecasts") # COVIDcast does not return national level data, using CovidHubUtils instead + nation_scores = evaluate_chu(nation_predictions, signals, err_measures) print("Saving nation confirmed incidence...") diff --git a/Report/error_measures.R b/Report/error_measures.R index 46c21c1..522fba0 100644 --- a/Report/error_measures.R +++ b/Report/error_measures.R @@ -72,6 +72,23 @@ find_quantile_match <- function(quantiles, val_to_match, tol=1e-8){ return(abs(quantiles - val_to_match) < tol & !is.na(quantiles)) } +get_quantile_prediction_factory <- function(val_to_match, tol=1e-8) { + get_quantile_prediction <- function(quantile, value, actual_value) { + if (all(is.na(quantile))) return(NA) + + value <- value[!is.na(quantile)] + quantile <- quantile[!is.na(quantile)] + + val <- value[find_quantile_match(quantile, val_to_match, tol)] + + if (length(val) != 1L) return(NA) + + return(val) + } + + return(get_quantile_prediction) +} + score_func_param_checker <- function(quantiles, values, actual_value, id = ""){ id_str = paste0(id, ": ") if (length(actual_value) > 1) { From ecb0998eaa582fc00888e040f9440a41af206fde Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Wed, 7 Jul 2021 11:20:26 -0400 Subject: [PATCH 06/26] include secondary forecasters --- Report/create_reports.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index 3e53201..02a9e65 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -23,7 +23,7 @@ prediction_cards_filepath = case_when( TRUE~prediction_cards_filename ) -forecasters = c(get_covidhub_forecaster_names(designations = "primary"), +forecasters = c(get_covidhub_forecaster_names(designations = c("primary", "secondary")), "COVIDhub-baseline") locations = covidHubUtils::hub_locations From b7c296c387f408577b3ad163a7d10e2e874deccd Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Wed, 7 Jul 2021 12:45:34 -0400 Subject: [PATCH 07/26] adding COVIDhub-trained_ensemble to included forecasters --- Report/create_reports.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index 02a9e65..e27dd04 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -24,7 +24,7 @@ prediction_cards_filepath = case_when( ) forecasters = c(get_covidhub_forecaster_names(designations = c("primary", "secondary")), - "COVIDhub-baseline") + "COVIDhub-baseline", "COVIDhub-trained_ensemble") locations = covidHubUtils::hub_locations # also includes "us", which is national level data From 58ca740c392a039ed33768687141d952deb6954a Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Wed, 7 Jul 2021 13:04:30 -0400 Subject: [PATCH 08/26] Update README.md Updating release notes with cleanup instructions --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 38753fa..eaeafc6 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,8 @@ git tag -a v1.0 -m "Version 1.0" git push origin release_v1.0 git push origin v1.0 ``` +Create a PR into `main` +After code is merged to `main`, perform cleanup by merging `main` into `dev` so that `dev` stays up to date. ## Note on Scoring Script From 615ac11e2bd7627c6bda570282025991ced734b5 Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Wed, 7 Jul 2021 13:05:34 -0400 Subject: [PATCH 09/26] Update README.md Updating release notes with cleanup instructions --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index eaeafc6..5b06fd9 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ git tag -a v1.0 -m "Version 1.0" git push origin release_v1.0 git push origin v1.0 ``` -Create a PR into `main` +Create a PR into `main`. After code is merged to `main`, perform cleanup by merging `main` into `dev` so that `dev` stays up to date. ## Note on Scoring Script From e2a323cc9b046fe2569a8c6f1c67fb60066f843b Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Thu, 8 Jul 2021 11:09:28 -0400 Subject: [PATCH 10/26] add nation hospitalizations --- dashboard/app.R | 72 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/dashboard/app.R b/dashboard/app.R index 6f1bf49..2f2ef1d 100644 --- a/dashboard/app.R +++ b/dashboard/app.R @@ -12,7 +12,10 @@ library(aws.s3) COVERAGE_INTERVALS = c("10", "20", "30", "40", "50", "60", "70", "80", "90", "95", "98") DEATH_FILTER = "deaths_incidence_num" CASE_FILTER = "confirmed_incidence_num" +HOSPITALIZATIONS_FILTER = "confirmed_admissions_covid_1d" +HOSPITALIZATIONS_AHEAD_DAY = "Saturday" TOTAL_LOCATIONS = "Totaled Over States*" +MAX_AHEAD_CHOICES = c(1,2,3,4) # Score explanations wisExplanation = includeMarkdown("wis.md") @@ -64,9 +67,8 @@ ui <- fluidPage(padding=0, conditionalPanel(condition = "input.tabset == 'evaluations'", radioButtons("targetVariable", "Target Variable", choices = list("Incident Deaths" = "Deaths", - "Incident Cases" = "Cases")), - - + "Incident Cases" = "Cases", + "Incident Hospitalizations" = "Hospitalizations")), radioButtons("scoreType", "Scoring Metric", choices = list("Weighted Interval Score" = "wis", "Spread" = "sharpness", @@ -78,13 +80,13 @@ ui <- fluidPage(padding=0, "logScale", "Log Scale", value = FALSE, - ), + )), + conditionalPanel(condition = "input.scoreType != 'coverage' && input.targetVariable != 'Hospitalizations'", checkboxInput( "scaleByBaseline", "Scale by Baseline Forecaster", value = FALSE, - ) - ), + )), selectInput( "forecasters", p("Forecasters", tags$br(), tags$span(id="forecaster-input", "Type a name or select from dropdown")), @@ -96,7 +98,7 @@ ui <- fluidPage(padding=0, checkboxGroupInput( "aheads", "Forecast Horizon (Weeks)", - choices = c(1,2,3,4), + choices = MAX_AHEAD_CHOICES, selected = 1, inline = TRUE ), @@ -225,7 +227,9 @@ server <- function(input, output, session) { dfStateDeaths <- getData("score_cards_state_deaths.rds") dfNationCases = getData("score_cards_nation_cases.rds") dfNationDeaths = getData("score_cards_nation_deaths.rds") - + dfStateHospitalizations = getData("score_cards_state_hospitalizations.rds") + dfNationHospitalizations = getData("score_cards_nation_hospitalizations.rds") + # Pick out expected columns only covCols = paste0("cov_", COVERAGE_INTERVALS) expectedCols = c("ahead", "geo_value", "forecaster", "forecast_date", @@ -237,8 +241,10 @@ server <- function(input, output, session) { dfStateDeaths = dfStateDeaths %>% select(all_of(expectedCols)) dfNationCases = dfNationCases %>% select(all_of(expectedCols)) dfNationDeaths = dfNationDeaths %>% select(all_of(expectedCols)) + dfStateHospitalizations = dfStateHospitalizations %>% select(all_of(expectedCols)) + dfNationHospitalizations = dfNationHospitalizations %>% select(all_of(expectedCols)) - df <- rbind(dfStateCases, dfStateDeaths, dfNationCases, dfNationDeaths) + df <- rbind(dfStateCases, dfStateDeaths, dfNationCases, dfNationDeaths, dfStateHospitalizations, dfNationHospitalizations) df <- df %>% rename("10" = cov_10, "20" = cov_20, "30" = cov_30, "40" = cov_40, "50" = cov_50, "60" = cov_60, "70" = cov_70, "80" = cov_80, "90" = cov_90, "95" = cov_95, "98" = cov_98) # Prepare color palette @@ -262,11 +268,16 @@ server <- function(input, output, session) { if (targetVariable == "Deaths") { signalFilter = DEATH_FILTER } + if (targetVariable == "Hospitalizations") { + signalFilter = HOSPITALIZATIONS_FILTER + } scoreDf = scoreDf %>% filter(signal == signalFilter) %>% - filter(ahead %in% horizon) %>% filter(forecaster %in% forecasters) - + if (signalFilter == HOSPITALIZATIONS_FILTER) { + scoreDf = filterHospitalizationsAheads(scoreDf) + } + scoreDf = scoreDf %>% filter(ahead %in% horizon) filteredScoreDf <- scoreDf %>% rename(Forecaster = forecaster, Week_End_Date = target_end_date) if (scoreType == "wis" || scoreType == "sharpness") { @@ -475,8 +486,10 @@ server <- function(input, output, session) { observeEvent(input$targetVariable, { if (input$targetVariable == 'Deaths') { df = df %>% filter(signal == DEATH_FILTER) - } else { + } else if (input$targetVariable == 'Cases') { df = df %>% filter(signal == CASE_FILTER) + } else { + df = df %>% filter(signal == HOSPITALIZATIONS_FILTER) } updateForecasterChoices(session, df, input$forecasters, input$scoreType) updateLocationChoices(session, df, input$targetVariable, input$forecasters, input$location) @@ -486,8 +499,10 @@ server <- function(input, output, session) { observeEvent(input$scoreType, { if (input$targetVariable == 'Deaths') { df = df %>% filter(signal == DEATH_FILTER) - } else { + } else if (input$targetVariable == 'Cases') { df = df %>% filter(signal == CASE_FILTER) + } else { + df = df %>% filter(signal == HOSPITALIZATIONS_FILTER) } # Only show forecasters that have data for the score chosen updateForecasterChoices(session, df, input$forecasters, input$scoreType) @@ -522,11 +537,14 @@ server <- function(input, output, session) { observeEvent(input$forecasters, { if (input$targetVariable == 'Deaths') { df = df %>% filter(signal == DEATH_FILTER) - } else { + } else if (input$targetVariable == 'Cases') { df = df %>% filter(signal == CASE_FILTER) + } else { + df = df %>% filter(signal == HOSPITALIZATIONS_FILTER) } df = df %>% filter(forecaster %in% input$forecasters) - aheadChoices = unique(df$ahead) + aheadChoices = Filter(function(x) any(unique(df$ahead) %in% x), MAX_AHEAD_CHOICES) + aheadChoices = aheadChoices # Ensure previsouly selected options are still allowed if (input$aheads %in% aheadChoices) { selectedAheads = input$aheads @@ -551,7 +569,7 @@ server <- function(input, output, session) { # Ensure there is always one forecaster selected if(length(input$forecasters) < 1) { updateSelectInput(session, "forecasters", - selected = c("COVIDhub-baseline")) + selected = c("COVIDhub-ensemble")) # Use ensemble rather than baseline bc it has hospitalization scores } # Ensure COVIDhub-baseline is selected when scaling by baseline if(input$scaleByBaseline && !("COVIDhub-baseline" %in% input$forecasters)) { @@ -611,4 +629,26 @@ updateLocationChoices = function(session, df, targetVariable, forecasterChoices, selected = selectedLocation) } +filterHospitalizationsAheads = function(scoreDf) { + scoreDf['weekday'] = weekdays(as.Date(scoreDf$target_end_date)) + scoreDf = scoreDf %>% filter(weekday == HOSPITALIZATIONS_AHEAD_DAY) + + # Only use weekly aheads for hospitalizations + # May change in the future + oneAheadDf = scoreDf %>% filter(ahead >= 5) %>% filter(ahead < 12) %>% + group_by(target_end_date) %>% group_by(forecaster) %>% filter(ahead == min(ahead)) %>% + mutate(ahead = 1) + twoAheadDf = scoreDf %>% filter(ahead >= 12) %>% filter(ahead < 19) %>% + group_by(target_end_date) %>% group_by(forecaster) %>% filter(ahead == min(ahead)) %>% + mutate(ahead = 2) + threeAheadDf = scoreDf %>% filter(ahead >= 19) %>% filter(ahead < 26) %>% + group_by(target_end_date) %>% group_by(forecaster) %>% filter(ahead == min(ahead)) %>% + mutate(ahead = 3) + fourAheadDf = scoreDf %>% filter(ahead >= 26) %>% + group_by(target_end_date) %>% group_by(forecaster) %>% filter(ahead == min(ahead)) %>% + mutate(ahead = 4) + + return(rbind(oneAheadDf, twoAheadDf, threeAheadDf, fourAheadDf)) +} + shinyApp(ui = ui, server = server) From dd69a83814e182ba2e5145f893ed93d981f8dab4 Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Fri, 9 Jul 2021 12:14:43 -0400 Subject: [PATCH 11/26] updating about and aheads for hosp --- dashboard/about.md | 17 +++++-- dashboard/app.R | 121 ++++++++++++++++++++++++++++++--------------- 2 files changed, 95 insertions(+), 43 deletions(-) diff --git a/dashboard/about.md b/dashboard/about.md index 1cf743b..eb9966b 100644 --- a/dashboard/about.md +++ b/dashboard/about.md @@ -37,7 +37,9 @@ The Forecaster Evaluation Dashboard is a collaborative project, which has been m #### **Sources** -**Observed values** are from the [COVID-19 Data Repository](https://github.com/CSSEGISandData/COVID-19) by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University. +**Observed case and death** are from the [COVID-19 Data Repository](https://github.com/CSSEGISandData/COVID-19) by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University. + +**Observed hospitalizations** are from the U.S. Department of Health & Human Services and is the sum of all adult and pediatric COVID-19 hospital admissions. **Forecaster predictions** are drawn from the [COVID-19 Forecast Hub GitHub repository](https://github.com/reichlab/covid19-forecast-hub/) @@ -58,16 +60,23 @@ Data for the dashboard is pulled from these sources on Mondays and Tuesdays. #### **Dashboard Inclusion Criteria** A forecast is only included if all the following criteria are met: -* The target variable is the weekly incidence of either cases or deaths +* The target variable is the weekly incidence of either cases or deaths, or incidence of hospitalizations * The horizon is no more than 4 weeks ahead * The location is a U.S. state, territory, or the nation as a whole * All dates are parsable. If a date is not in yyyy/mm/dd format, the forecast may be dropped. * The forecast was made on or before the Monday of the relevant week. If multiple versions of a forecast are submitted then only the last forecast that meets the date restriction is included. +#### **How Hospitalization Forecasts are Processed** +Though hospitalizations are forecasted on a daily basis, in keeping with the cases and death scoring and plotting, we show the hospitalization scores on a weekly basis in the dashboard. We only look at forecasts for one target day a week (currently Wednesdays), and calculate the weekly horizons accordingly. Hospitalization horizons are calculated in the following manner: +* 2 days ahead: Forecast date is on or before the Monday preceeding the target date (Wednesday) +* 9 days ahead: Forecast date equal to or before 7 days before the Monday preceeding the target date +* 16 days ahead: Forecast date is equal to or before 14 days before the Monday preceeding the target date +* 23 days ahead: Forecast date equal to or before 21 days before the Monday preceeding the target date + #### **Notes on the Data** * If a forecast does not include an explicit point estimate, the 0.5 quantile is taken as the point estimate for calculating absolute error. -* WIS is only shown for forecasts that have predictions for all quantiles (23 quantiles for deaths and 15 for cases) +* WIS is only shown for forecasts that have predictions for all quantiles (23 quantiles for deaths and 7 for cases) * Totaling over all states and territories does not include nationwide forecasts. To ensure that values are comparable, these totals also exclude any locations that are absent from any file that was submitted by one of the selected forecasters. * We include revisions of observed values, which means that the scores for forecasts made in the past can change as our understanding of the ground truth changes. @@ -83,5 +92,7 @@ The available files are: * score_cards_nation_deaths.rds * score_cards_state_cases.rds * score_cards_state_deaths.rds +* score_cards_state_hospitalizations.rds +* score_cards_nation_hospitalizations.rds diff --git a/dashboard/app.R b/dashboard/app.R index 2f2ef1d..39c3c33 100644 --- a/dashboard/app.R +++ b/dashboard/app.R @@ -13,9 +13,16 @@ COVERAGE_INTERVALS = c("10", "20", "30", "40", "50", "60", "70", "80", "90", "95 DEATH_FILTER = "deaths_incidence_num" CASE_FILTER = "confirmed_incidence_num" HOSPITALIZATIONS_FILTER = "confirmed_admissions_covid_1d" -HOSPITALIZATIONS_AHEAD_DAY = "Saturday" +HOSPITALIZATIONS_TARGET_DAY = "Wednesday" TOTAL_LOCATIONS = "Totaled Over States*" -MAX_AHEAD_CHOICES = c(1,2,3,4) +AHEAD_OPTIONS = c(1,2,3,4) + +# Num days to offset the forecast week by +# Example: if HOSPITALIZATIONS_TARGET_DAY is Wednesday and HOSPITALIZATIONS_OFFSET is 2, +# ahead 1 has to have forecast date of Monday or earlier, +# ahead 2 has to have forecast date of Monday + 7 days or earlier (offset + 7 days or more), etc +HOSPITALIZATIONS_OFFSET = 2 +HOSPITALIZATIONS_AHEAD_OPTIONS = c(HOSPITALIZATIONS_OFFSET, HOSPITALIZATIONS_OFFSET + 7, HOSPITALIZATIONS_OFFSET + 14, HOSPITALIZATIONS_OFFSET + 21) # Score explanations wisExplanation = includeMarkdown("wis.md") @@ -68,7 +75,7 @@ ui <- fluidPage(padding=0, radioButtons("targetVariable", "Target Variable", choices = list("Incident Deaths" = "Deaths", "Incident Cases" = "Cases", - "Incident Hospitalizations" = "Hospitalizations")), + "Hospital Admissions" = "Hospitalizations")), radioButtons("scoreType", "Scoring Metric", choices = list("Weighted Interval Score" = "wis", "Spread" = "sharpness", @@ -98,8 +105,8 @@ ui <- fluidPage(padding=0, checkboxGroupInput( "aheads", "Forecast Horizon (Weeks)", - choices = MAX_AHEAD_CHOICES, - selected = 1, + choices = AHEAD_OPTIONS, + selected = AHEAD_OPTIONS[1], inline = TRUE ), conditionalPanel(condition = "input.scoreType == 'coverage'", @@ -393,9 +400,16 @@ server <- function(input, output, session) { as_tsibble(key = c(Forecaster, ahead), index = Week_End_Date) %>% group_by(Forecaster, ahead) %>% fill_gaps(.full = TRUE) - - filteredScoreDf$ahead = factor(filteredScoreDf$ahead, levels = c(1, 2, 3, 4), - labels = c("Horizon: 1 Week", "Horizon: 2 Weeks", "Horizon: 3 Weeks", "Horizon: 4 Weeks")) + # Set labels for faceted horizon plots + horizonOptions = AHEAD_OPTIONS + horizonLabels = lapply(AHEAD_OPTIONS, function (x) paste0("Horizon: ", x, " Week(s)")) + if (targetVariable == 'Hospitalizations') { + horizonOptions = HOSPITALIZATIONS_AHEAD_OPTIONS + horizonLabels = lapply(HOSPITALIZATIONS_AHEAD_OPTIONS, function (x) paste0("Horizon: ", x, " Days")) + } + filteredScoreDf$ahead = factor(filteredScoreDf$ahead, levels = horizonOptions, + labels = horizonLabels) + # Set forecaster colors for plot set.seed(colorSeed) forecasterRand <- sample(unique(df$forecaster)) colorPalette = setNames(object = viridis(length(unique(df$forecaster))), nm = forecasterRand) @@ -445,10 +459,14 @@ server <- function(input, output, session) { ################### # Create the plot for target variable ground truth truthPlot = function(scoreDf = NULL, targetVariable = NULL, locationsIntersect = NULL, allLocations = FALSE) { - titleText = paste0('Observed Incident ', targetVariable, '') + observation = paste0('Incident ', targetVariable) + if (targetVariable == "Hospitalizations") { + observation = paste0('Hospital Admissions') + } + titleText = paste0('Observed ', observation, '') if (allLocations) { - titleText = paste0('Observed Incident ', targetVariable, '', '
Totaled over all states and territories common to selected forecasters*') - } + titleText = paste0('Observed ', observation, '', '
Totaled over all states and territories common to selected forecasters*') + } scoreDf <- scoreDf %>% group_by(Week_End_Date) %>% summarize(Reported_Incidence = actual) @@ -491,6 +509,8 @@ server <- function(input, output, session) { } else { df = df %>% filter(signal == HOSPITALIZATIONS_FILTER) } + + updateAheadChoices(session, df, input$targetVariable, input$forecasters, input$aheads, TRUE) updateForecasterChoices(session, df, input$forecasters, input$scoreType) updateLocationChoices(session, df, input$targetVariable, input$forecasters, input$location) updateCoverageChoices(session, df, input$targetVariable, input$forecasters, input$coverageInterval, output) @@ -543,18 +563,8 @@ server <- function(input, output, session) { df = df %>% filter(signal == HOSPITALIZATIONS_FILTER) } df = df %>% filter(forecaster %in% input$forecasters) - aheadChoices = Filter(function(x) any(unique(df$ahead) %in% x), MAX_AHEAD_CHOICES) - aheadChoices = aheadChoices - # Ensure previsouly selected options are still allowed - if (input$aheads %in% aheadChoices) { - selectedAheads = input$aheads - } else { - selectedAheads = 1 - } - updateCheckboxGroupInput(session, "aheads", - choices = aheadChoices, - selected = selectedAheads, - inline = TRUE) + + updateAheadChoices(session, df, input$targetVariable, input$forecasters, input$aheads, FALSE) updateLocationChoices(session, df, input$targetVariable, input$forecasters, input$location) updateCoverageChoices(session, df, input$targetVariable, input$forecasters, input$coverageInterval, output) }) @@ -563,8 +573,13 @@ server <- function(input, output, session) { observe({ # Ensure there is always one ahead selected if(length(input$aheads) < 1) { - updateCheckboxGroupInput(session, "aheads", - selected = 1) + if (input$targetVariable == 'Hospitalizations') { + updateCheckboxGroupInput(session, "aheads", + selected = HOSPITALIZATIONS_AHEAD_OPTIONS[1]) + } else { + updateCheckboxGroupInput(session, "aheads", + selected = AHEAD_OPTIONS[1]) + } } # Ensure there is always one forecaster selected if(length(input$forecasters) < 1) { @@ -629,24 +644,50 @@ updateLocationChoices = function(session, df, targetVariable, forecasterChoices, selected = selectedLocation) } +updateAheadChoices = function(session, df, targetVariable, forecasterChoices, aheads, targetVariableChange) { + df = df %>% filter(forecaster %in% forecasterChoices) + aheadOptions = AHEAD_OPTIONS + title = "Forecast Horizon (Weeks)" + if (targetVariable == 'Hospitalizations') { + aheadOptions = HOSPITALIZATIONS_AHEAD_OPTIONS + title = "Forecast Horizon (Days)" + } + aheadChoices = Filter(function(x) any(unique(df$ahead) %in% x), aheadOptions) + # Ensure previsouly selected options are still allowed + if (!is.null(aheads) && aheads %in% aheadChoices) { + selectedAheads = aheads + } else { + selectedAheads = aheadOptions[1] + } + # If we are changing target variable, always reset ahead selection to first option + if (targetVariableChange) { + selectedAheads = aheadOptions[1] + } + updateCheckboxGroupInput(session, "aheads", + title, + choices = aheadChoices, + selected = selectedAheads, + inline = TRUE) +} + +# Only use weekly aheads for hospitalizations +# May change in the future filterHospitalizationsAheads = function(scoreDf) { scoreDf['weekday'] = weekdays(as.Date(scoreDf$target_end_date)) - scoreDf = scoreDf %>% filter(weekday == HOSPITALIZATIONS_AHEAD_DAY) + scoreDf = scoreDf %>% filter(weekday == HOSPITALIZATIONS_TARGET_DAY) - # Only use weekly aheads for hospitalizations - # May change in the future - oneAheadDf = scoreDf %>% filter(ahead >= 5) %>% filter(ahead < 12) %>% - group_by(target_end_date) %>% group_by(forecaster) %>% filter(ahead == min(ahead)) %>% - mutate(ahead = 1) - twoAheadDf = scoreDf %>% filter(ahead >= 12) %>% filter(ahead < 19) %>% - group_by(target_end_date) %>% group_by(forecaster) %>% filter(ahead == min(ahead)) %>% - mutate(ahead = 2) - threeAheadDf = scoreDf %>% filter(ahead >= 19) %>% filter(ahead < 26) %>% - group_by(target_end_date) %>% group_by(forecaster) %>% filter(ahead == min(ahead)) %>% - mutate(ahead = 3) - fourAheadDf = scoreDf %>% filter(ahead >= 26) %>% - group_by(target_end_date) %>% group_by(forecaster) %>% filter(ahead == min(ahead)) %>% - mutate(ahead = 4) + oneAheadDf = scoreDf %>% filter(ahead >= HOSPITALIZATIONS_OFFSET) %>% filter(ahead < 7 + HOSPITALIZATIONS_OFFSET) %>% + group_by(target_end_date, forecaster) %>% filter(ahead == min(ahead)) %>% + mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[1]) + twoAheadDf = scoreDf %>% filter(ahead >= 7 + HOSPITALIZATIONS_OFFSET) %>% filter(ahead < 14 + HOSPITALIZATIONS_OFFSET) %>% + group_by(target_end_date, forecaster) %>% filter(ahead == min(ahead)) %>% + mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[2]) + threeAheadDf = scoreDf %>% filter(ahead >= 14 + HOSPITALIZATIONS_OFFSET) %>% filter(ahead < 21 + HOSPITALIZATIONS_OFFSET) %>% + group_by(target_end_date, forecaster) %>% filter(ahead == min(ahead)) %>% + mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[3]) + fourAheadDf = scoreDf %>% filter(ahead >= 21 + HOSPITALIZATIONS_OFFSET) %>% filter(ahead < 28 + HOSPITALIZATIONS_OFFSET) %>% + group_by(target_end_date, forecaster) %>% filter(ahead == min(ahead)) %>% + mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[4]) return(rbind(oneAheadDf, twoAheadDf, threeAheadDf, fourAheadDf)) } From 771b05132bdc7edbeffdb7d2a2e0893f36032e69 Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Fri, 9 Jul 2021 12:16:24 -0400 Subject: [PATCH 12/26] fix typo --- dashboard/about.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dashboard/about.md b/dashboard/about.md index eb9966b..7db0690 100644 --- a/dashboard/about.md +++ b/dashboard/about.md @@ -37,7 +37,7 @@ The Forecaster Evaluation Dashboard is a collaborative project, which has been m #### **Sources** -**Observed case and death** are from the [COVID-19 Data Repository](https://github.com/CSSEGISandData/COVID-19) by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University. +**Observed cases and deaths** are from the [COVID-19 Data Repository](https://github.com/CSSEGISandData/COVID-19) by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University. **Observed hospitalizations** are from the U.S. Department of Health & Human Services and is the sum of all adult and pediatric COVID-19 hospital admissions. From 0e96c6747f0e487d7b9f2b9bf6843745b4d62e20 Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Fri, 9 Jul 2021 12:45:15 -0400 Subject: [PATCH 13/26] about text updates --- dashboard/about.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/dashboard/about.md b/dashboard/about.md index 1cf743b..4461521 100644 --- a/dashboard/about.md +++ b/dashboard/about.md @@ -2,17 +2,20 @@ This dashboard was developed by: -* Jed Grabman (Delphi Group, Google Fellow) -* Kate Harwood (Delphi Group, Google Fellow) * Chris Scott (Delphi Group, Google Fellow) +* Kate Harwood (Delphi Group, Google Fellow) +* Jed Grabman (Delphi Group, Google Fellow) with the Forecast Evaluation Research Collaborative: +* Ryan Tibshirani (Delphi Group) * Nicholas Reich (Reich Lab) -* Jacob Bien (Delphi Group) -* Logan Brooks (Delphi Group) -* Estee Cramer (Reich Lab) +* Evan Ray (Reich Lab) * Daniel McDonald (Delphi Group) +* Estee Cramer (Reich Lab) +* Logan Brooks (Delphi Group) +* Johannes Bracher (Karlsruhe Institute) +* Jacob Bien (Delphi Group) Forecast data in all states and U.S. territories are supplied by: @@ -29,7 +32,7 @@ Both groups are funded by the CDC as Centers of Excellence for Influenza and COV The collaborative’s mission is to help epidemiological researchers gain insights into the performance of their forecasts and lead to more accurate forecasting of epidemics. -Both groups lead initiatives related to COVID-19 data and forecast curation. The Reich Lab created and maintains the [COVID-19 Forecast Hub](https://covid19forecasthub.org/), a collaborative effort with over 80 groups submitting forecasts to be part of the official [CDC COVID-19 ensemble forecast](https://www.cdc.gov/coronavirus/2019-ncov/covid-data/mathematical-modeling.html). The Delphi Group created and maintains COVIDcast, a platform for [epidemiological surveillance data](https://delphi.cmu.edu/covidcast/), and runs the [Delphi Pandemic Survey via Facebook](https://delphi.cmu.edu/covidcast/surveys/). +Both groups lead initiatives related to COVID-19 data and forecast curation. The Reich Lab created and maintains the [COVID-19 Forecast Hub](https://covid19forecasthub.org/), a collaborative effort with over 80 groups submitting forecasts to be part of the official [CDC COVID-19 ensemble forecast](https://www.cdc.gov/coronavirus/2019-ncov/covid-data/mathematical-modeling.html). The Delphi Group created and maintains COVIDcast, a platform for [epidemiological surveillance data](https://delphi.cmu.edu/covidcast/), and runs the [U.S. COVID-19 Trends and Impact Survey in partnership with Facebook](https://delphi.cmu.edu/covidcast/surveys/). The Forecaster Evaluation Dashboard is a collaborative project, which has been made possible by the 13 pro bono Google.org Fellows who have spent 6 months working full-time with the Delphi Group. Google.org is [committed](https://www.google.org/covid-19/) to the recovery of lives and communities that have been impacted by COVID-19 and investing in developing the science to mitigate the damage of future pandemics. From d80b0971b3a11b856738e79a15f93c56569ecb22 Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Fri, 9 Jul 2021 14:08:52 -0400 Subject: [PATCH 14/26] fixing corner case where plot is blank with no warning message --- dashboard/app.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dashboard/app.R b/dashboard/app.R index 6f1bf49..213a82b 100644 --- a/dashboard/app.R +++ b/dashboard/app.R @@ -276,6 +276,10 @@ server <- function(input, output, session) { filteredScoreDf = filteredScoreDf %>% filter(!is.na(`10`)) %>% filter(!is.na(`20`)) %>% filter(!is.na(`30`)) %>% filter(!is.na(`40`)) %>% filter(!is.na(`60`)) %>% filter(!is.na(`70`)) %>% filter(!is.na(`90`)) %>% filter(!is.na(`98`)) } + if (dim(filteredScoreDf)[1] == 0) { + output$renderWarningText <- renderText("The selected forecasters do not have enough data to display the selected scoring metric.") + return() + } if (scoreType == "wis") { filteredScoreDf <- filteredScoreDf %>% rename(Score = wis) title = "Weighted Interval Score" From 8b5cd64c01611661422ea46bf2cb88de4f9af6cc Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Fri, 9 Jul 2021 15:42:00 -0400 Subject: [PATCH 15/26] adding forecast_date into tooltip on hover --- dashboard/app.R | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/dashboard/app.R b/dashboard/app.R index 213a82b..72516f4 100644 --- a/dashboard/app.R +++ b/dashboard/app.R @@ -267,7 +267,8 @@ server <- function(input, output, session) { filter(ahead %in% horizon) %>% filter(forecaster %in% forecasters) - filteredScoreDf <- scoreDf %>% rename(Forecaster = forecaster, Week_End_Date = target_end_date) + filteredScoreDf <- scoreDf %>% rename(Forecaster = forecaster, Forecast_Date = forecast_date, + Week_End_Date = target_end_date) if (scoreType == "wis" || scoreType == "sharpness") { # Only show WIS or Sharpness for forecasts that have all intervals @@ -315,14 +316,14 @@ server <- function(input, output, session) { if (scoreType == "coverage") { aggregate = "Averaged" filteredScoreDf = filteredScoreDf %>% - group_by(Forecaster, Week_End_Date, ahead) %>% + group_by(Forecaster, Forecast_Date, Week_End_Date, ahead) %>% summarize(Score = sum(Score)/length(locationsIntersect), actual = sum(actual)) output$renderAggregateText = renderText(paste(aggregateText," Some forecasters may not have any data for the coverage interval chosen. Locations inlcuded: ")) } else { aggregate = "Totaled" filteredScoreDf = filteredScoreDf %>% - group_by(Forecaster, Week_End_Date, ahead) %>% + group_by(Forecaster, Forecast_Date, Week_End_Date, ahead) %>% summarize(Score = sum(Score), actual = sum(actual)) output$renderAggregateText = renderText(paste(aggregateText, " Locations included: ")) } @@ -342,7 +343,7 @@ server <- function(input, output, session) { # Not totaling over all locations } else { filteredScoreDf <- filteredScoreDf %>% filter(geo_value == tolower(loc)) %>% - group_by(Forecaster, Week_End_Date, ahead) %>% + group_by(Forecaster, Forecast_Date, Week_End_Date, ahead) %>% summarize(Score = Score, actual = actual) locationSubtitleText = paste0(', Location: ', input$location) output$renderAggregateText = renderText("") @@ -359,7 +360,7 @@ server <- function(input, output, session) { }) # Format and transform data - filteredScoreDf = filteredScoreDf[c("Forecaster", "Week_End_Date", "Score", "ahead")] + filteredScoreDf = filteredScoreDf[c("Forecaster", "Forecast_Date", "Week_End_Date", "Score", "ahead")] filteredScoreDf = filteredScoreDf %>% mutate(across(where(is.numeric), ~ round(., 2))) if (scoreType != 'coverage') { if (scaleByBaseline) { @@ -368,8 +369,8 @@ server <- function(input, output, session) { # Scaling score by baseline forecaster filteredScoreDfMerged$Score.x = filteredScoreDfMerged$Score.x / filteredScoreDfMerged$Score.y filteredScoreDf = filteredScoreDfMerged %>% - rename(Forecaster = Forecaster.x, Score = Score.x) %>% - select(Forecaster, Week_End_Date, ahead, Score) + rename(Forecaster = Forecaster.x, Score = Score.x, Forecast_Date = Forecast_Date.x) %>% + select(Forecaster, Forecast_Date, Week_End_Date, ahead, Score) } if (logScale) { filteredScoreDf$Score = log10(filteredScoreDf$Score) @@ -384,7 +385,7 @@ server <- function(input, output, session) { # Fill gaps so there are line breaks on weeks without data filteredScoreDf = filteredScoreDf %>% as_tsibble(key = c(Forecaster, ahead), index = Week_End_Date) %>% - group_by(Forecaster, ahead) %>% + group_by(Forecaster, Forecast_Date, ahead) %>% fill_gaps(.full = TRUE) filteredScoreDf$ahead = factor(filteredScoreDf$ahead, levels = c(1, 2, 3, 4), @@ -395,7 +396,7 @@ server <- function(input, output, session) { p = ggplot( filteredScoreDf, - aes(x = Week_End_Date, y = Score, color = Forecaster, shape = Forecaster) + aes(x = Week_End_Date, y = Score, color = Forecaster, shape = Forecaster, label = Forecast_Date) ) + geom_line() + geom_point(size=2) + @@ -417,7 +418,7 @@ server <- function(input, output, session) { } plotHeight = 550 + (length(horizon)-1)*100 finalPlot <- - ggplotly(p,tooltip = c("x", "y", "shape")) %>% + ggplotly(p, tooltip = c("x", "y", "shape", "label")) %>% layout( height = plotHeight, legend = list(orientation = "h", y = -0.1), From b216d303061342bd4948506f3741aec524c5884e Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 9 Jul 2021 16:05:21 -0400 Subject: [PATCH 16/26] have start_dashboard use local docker build --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6df94a4..cf10a80 100644 --- a/Makefile +++ b/Makefile @@ -49,4 +49,4 @@ deploy_dashboard: build_dashboard docker push ghcr.io/cmu-delphi/forecast-eval:$(imageTag) start_dashboard: build_dashboard_dev - docker run --pull=always --rm -p 3838:3838 ghcr.io/cmu-delphi/forecast-eval:latest + docker run --rm -p 3838:3838 ghcr.io/cmu-delphi/forecast-eval:latest From a034d8950a807e72c5b797014c42228796a05335 Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Tue, 13 Jul 2021 11:31:10 -0400 Subject: [PATCH 17/26] adding unique wrapper to forecaster list --- Report/create_reports.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index e27dd04..6e5208d 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -23,8 +23,8 @@ prediction_cards_filepath = case_when( TRUE~prediction_cards_filename ) -forecasters = c(get_covidhub_forecaster_names(designations = c("primary", "secondary")), - "COVIDhub-baseline", "COVIDhub-trained_ensemble") +forecasters = c(get_covidhub_forecaster_names(unique(designations = c("primary", "secondary")), + "COVIDhub-baseline", "COVIDhub-trained_ensemble")) locations = covidHubUtils::hub_locations # also includes "us", which is national level data From 1719f45aa60d3c6e40fc6a6d82a608609e9f3f64 Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Tue, 13 Jul 2021 13:55:22 -0400 Subject: [PATCH 18/26] fixing unique addition --- Report/create_reports.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index 6e5208d..9e926df 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -23,7 +23,7 @@ prediction_cards_filepath = case_when( TRUE~prediction_cards_filename ) -forecasters = c(get_covidhub_forecaster_names(unique(designations = c("primary", "secondary")), +forecasters = unique(c(get_covidhub_forecaster_names(designations = c("primary", "secondary")), "COVIDhub-baseline", "COVIDhub-trained_ensemble")) locations = covidHubUtils::hub_locations From 56ffc38acae905f7474fe867b0eee1969740535b Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Wed, 14 Jul 2021 11:04:13 -0400 Subject: [PATCH 19/26] small fixes for totaling over all states --- dashboard/app.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dashboard/app.R b/dashboard/app.R index 39c3c33..ee77246 100644 --- a/dashboard/app.R +++ b/dashboard/app.R @@ -319,6 +319,7 @@ server <- function(input, output, session) { # Create df with col for all locations across each unique date, ahead and forecaster combo locationDf = filteredScoreDf %>% group_by(Forecaster, Week_End_Date, ahead) %>% summarize(location_list = paste(sort(unique(geo_value)),collapse=",")) + locationDf = locationDf %>% filter(location_list != c('us')) # Create a list containing each row's location list locationList = sapply(locationDf$location_list, function(x) strsplit(x, ",")) locationList = lapply(locationList, function(x) x[x != 'us']) @@ -341,7 +342,7 @@ server <- function(input, output, session) { output$renderAggregateText = renderText(paste(aggregateText, " Locations included: ")) } if (length(locationsIntersect) == 0) { - output$renderWarningText <- renderText("The selected forecasters do not have data for any locations in common.") + output$renderWarningText <- renderText("The selected forecasters do not have data for any locations in common on all dates.") output$renderLocations <- renderText("") output$renderAggregateText = renderText("") hideElement("truthPlot") From cfa8cc15addcbac87f02b21479a17639a0758631 Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Wed, 14 Jul 2021 11:24:37 -0400 Subject: [PATCH 20/26] delete old github action --- .github/workflows/s3_upload.yml | 49 --------------------------------- 1 file changed, 49 deletions(-) delete mode 100644 .github/workflows/s3_upload.yml diff --git a/.github/workflows/s3_upload.yml b/.github/workflows/s3_upload.yml deleted file mode 100644 index 8c5cdff..0000000 --- a/.github/workflows/s3_upload.yml +++ /dev/null @@ -1,49 +0,0 @@ -# This workflow is no longer in use in favor of the self-hosted version (s3_upload_ec2.yml) - -name: Github Actions S3 Deploy - -env: - AWS_REGION_NAME : "us-east-2" - -# Controls when the action will run. -on: - schedule: - - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -# A workflow run is made up of one or more jobs that can run sequentially or in parallel -jobs: - deploy: - # The type of runner that the job will run on - runs-on: ubuntu-latest - - # Steps represent a sequence of tasks that will be executed as part of the job - steps: - # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - uses: actions/checkout@v2 - - - name: Test AWS cli installation - run: aws --version - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_KEY }} - aws-region: ${{ env.AWS_REGION_NAME }} - - - name: Login to GitHub Container Registry - uses: docker/login-action@v1 - with: - registry: ghcr.io - username: cmu-delphi-deploy-machine - password: ${{ secrets.CMU_DELPHI_DEPLOY_MACHINE_PAT }} - - - name: Deploy score files to S3 bucket - run: | - make deploy - - - - From 368f72e1f5e153585ffb020fa67a43a096d8691a Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Thu, 15 Jul 2021 13:44:20 -0400 Subject: [PATCH 21/26] adding hosp to quantile info --- dashboard/about.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dashboard/about.md b/dashboard/about.md index b922453..954f761 100644 --- a/dashboard/about.md +++ b/dashboard/about.md @@ -79,7 +79,7 @@ Though hospitalizations are forecasted on a daily basis, in keeping with the cas #### **Notes on the Data** * If a forecast does not include an explicit point estimate, the 0.5 quantile is taken as the point estimate for calculating absolute error. -* WIS is only shown for forecasts that have predictions for all quantiles (23 quantiles for deaths and 7 for cases) +* WIS is only shown for forecasts that have predictions for all quantiles (23 quantiles for deaths and hospitalizations and 7 for cases) * Totaling over all states and territories does not include nationwide forecasts. To ensure that values are comparable, these totals also exclude any locations that are absent from any file that was submitted by one of the selected forecasters. * We include revisions of observed values, which means that the scores for forecasts made in the past can change as our understanding of the ground truth changes. From aa5dc99eb1fdbbf047b00f487e833469d4b9dd80 Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Thu, 15 Jul 2021 13:46:54 -0400 Subject: [PATCH 22/26] Update dashboard/about.md Co-authored-by: nmdefries <42820733+nmdefries@users.noreply.github.com> --- dashboard/about.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dashboard/about.md b/dashboard/about.md index 954f761..636edf6 100644 --- a/dashboard/about.md +++ b/dashboard/about.md @@ -63,7 +63,7 @@ Data for the dashboard is pulled from these sources on Mondays and Tuesdays. #### **Dashboard Inclusion Criteria** A forecast is only included if all the following criteria are met: -* The target variable is the weekly incidence of either cases or deaths, or incidence of hospitalizations +* The target variable is the weekly incidence of either cases or deaths, or the daily incidence of hospitalizations * The horizon is no more than 4 weeks ahead * The location is a U.S. state, territory, or the nation as a whole * All dates are parsable. If a date is not in yyyy/mm/dd format, the forecast may be dropped. From 940d8b813b8078603faa17844f93f0ab17366cce Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 15 Jul 2021 16:47:13 -0400 Subject: [PATCH 23/26] drop hosp forecasts for most US territories --- Report/create_reports.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index 8502bc8..e431767 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -44,9 +44,15 @@ predictions_cards = get_covidhub_predictions(forecasters, filter(!(incidence_period == "epiweek" & ahead > 4)) predictions_cards = predictions_cards %>% - filter(!is.na(predictions_cards$target_end_date)) %>% + filter(!is.na(target_end_date)) %>% filter(target_end_date < today()) +# For hospitalizations, drop all US territories except Puerto Rico and the +# Virgin Islands; HHS does not report data for these. +territories <- c("as", "gu", "mp", "fm", "mh", "pw", "um") +predictions_cards = predictions_cards %>% + filter(!(geo_value %in% territories), data_source != "hhs") + # For epiweek predictions, only accept forecasts made Monday or earlier. # target_end_date is the date of the last day (Saturday) in the epiweek # For daily predictions, accept any forecast where the target_end_date is later From 971c030056c34e668005109084a43026625a05e7 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 15 Jul 2021 18:41:55 -0400 Subject: [PATCH 24/26] save predictions cards object to dir pass as command line arg --- Report/create_reports.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index 8502bc8..6a7333c 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -66,7 +66,7 @@ class(predictions_cards) = c("predictions_cards", class(predictions_cards)) print("Saving predictions...") saveRDS(predictions_cards, - file = "predictions_cards.rds", + file = prediction_cards_filepath, compress = "xz") print("Predictions saved") From 2684b3242bb0c42802d1902054013d332be91f8c Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 16 Jul 2021 14:51:57 -0400 Subject: [PATCH 25/26] fix hhs-territory filter --- Report/create_reports.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index e431767..9b3b58c 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -48,10 +48,10 @@ predictions_cards = predictions_cards %>% filter(target_end_date < today()) # For hospitalizations, drop all US territories except Puerto Rico and the -# Virgin Islands; HHS does not report data for these. +# Virgin Islands; HHS does not report data for any territories except PR and VI. territories <- c("as", "gu", "mp", "fm", "mh", "pw", "um") predictions_cards = predictions_cards %>% - filter(!(geo_value %in% territories), data_source != "hhs") + filter(!(geo_value %in% territories & data_source == "hhs")) # For epiweek predictions, only accept forecasts made Monday or earlier. # target_end_date is the date of the last day (Saturday) in the epiweek From 27f14a1d5b117843767a09658d095aa60eb22d69 Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Wed, 21 Jul 2021 14:29:13 -0400 Subject: [PATCH 26/26] Version 3.0 updates --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9e9ff38..e5cc969 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: forecast-eval Title: Forecast Evaluation Dashboard -Version: 2.1 +Version: 3 Authors@R: person("Kate", "Harwood", role = c("cre")), person("Chris", "Scott",