diff --git a/.github/workflows/s3_upload.yml b/.github/workflows/s3_upload.yml
deleted file mode 100644
index 8c5cdff..0000000
--- a/.github/workflows/s3_upload.yml
+++ /dev/null
@@ -1,49 +0,0 @@
-# This workflow is no longer in use in favor of the self-hosted version (s3_upload_ec2.yml)
-
-name: Github Actions S3 Deploy
-
-env:
- AWS_REGION_NAME : "us-east-2"
-
-# Controls when the action will run.
-on:
- schedule:
-
- # Allows you to run this workflow manually from the Actions tab
- workflow_dispatch:
-
-# A workflow run is made up of one or more jobs that can run sequentially or in parallel
-jobs:
- deploy:
- # The type of runner that the job will run on
- runs-on: ubuntu-latest
-
- # Steps represent a sequence of tasks that will be executed as part of the job
- steps:
- # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- - uses: actions/checkout@v2
-
- - name: Test AWS cli installation
- run: aws --version
-
- - name: Configure AWS credentials
- uses: aws-actions/configure-aws-credentials@v1
- with:
- aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY }}
- aws-secret-access-key: ${{ secrets.AWS_SECRET_KEY }}
- aws-region: ${{ env.AWS_REGION_NAME }}
-
- - name: Login to GitHub Container Registry
- uses: docker/login-action@v1
- with:
- registry: ghcr.io
- username: cmu-delphi-deploy-machine
- password: ${{ secrets.CMU_DELPHI_DEPLOY_MACHINE_PAT }}
-
- - name: Deploy score files to S3 bucket
- run: |
- make deploy
-
-
-
-
diff --git a/DESCRIPTION b/DESCRIPTION
index 9e9ff38..e5cc969 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: forecast-eval
Title: Forecast Evaluation Dashboard
-Version: 2.1
+Version: 3
Authors@R: person("Kate", "Harwood",
role = c("cre")),
person("Chris", "Scott",
diff --git a/Makefile b/Makefile
index 64beeec..cf10a80 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ S3_BUCKET=s3://forecast-eval
build: build_dashboard
r_build:
- docker build -t forecast-eval-build docker_build
+ docker build --no-cache --pull -t forecast-eval-build docker_build
predictions_cards.rds score_cards_state_deaths.rds score_cards_state_cases.rds score_cards_nation_cases.rds score_cards_nation_deaths.rds: dist
test -f dist/$@ || curl -o dist/$@ $(S3_URL)/$@
@@ -32,7 +32,7 @@ deploy: score_forecast
# Starts a docker image with a full preconfigured R environment
start_dev: r_build
- docker run -ti --rm \
+ docker run --pull=always -ti --rm \
-v ${PWD}/Report:/var/forecast-eval \
-v ${PWD}/dashboard:/var/forecast-eval-dashboard \
-v ${PWD}/dist:/var/dist \
@@ -40,10 +40,10 @@ start_dev: r_build
ghcr.io/cmu-delphi/forecast-eval:latest bash
build_dashboard_dev: pull_data
- docker build -t ghcr.io/cmu-delphi/forecast-eval:latest -f docker_dashboard/Dockerfile .
+ docker build --no-cache --pull -t ghcr.io/cmu-delphi/forecast-eval:latest -f docker_dashboard/Dockerfile .
build_dashboard: pull_data
- docker build --no-cache=true -t ghcr.io/cmu-delphi/forecast-eval:$(imageTag) -f docker_dashboard/Dockerfile .
+ docker build --no-cache=true --pull -t ghcr.io/cmu-delphi/forecast-eval:$(imageTag) -f docker_dashboard/Dockerfile .
deploy_dashboard: build_dashboard
docker push ghcr.io/cmu-delphi/forecast-eval:$(imageTag)
diff --git a/README.md b/README.md
index 38753fa..5b06fd9 100644
--- a/README.md
+++ b/README.md
@@ -49,6 +49,8 @@ git tag -a v1.0 -m "Version 1.0"
git push origin release_v1.0
git push origin v1.0
```
+Create a PR into `main`.
+After code is merged to `main`, perform cleanup by merging `main` into `dev` so that `dev` stays up to date.
## Note on Scoring Script
diff --git a/Report/create_reports.R b/Report/create_reports.R
index 6f3f822..fac3761 100644
--- a/Report/create_reports.R
+++ b/Report/create_reports.R
@@ -23,8 +23,8 @@ prediction_cards_filepath = case_when(
TRUE~prediction_cards_filename
)
-forecasters = c(get_covidhub_forecaster_names(designations = "primary"),
- "COVIDhub-baseline")
+forecasters = unique(c(get_covidhub_forecaster_names(designations = c("primary", "secondary")),
+ "COVIDhub-baseline", "COVIDhub-trained_ensemble"))
locations = covidHubUtils::hub_locations
# also includes "us", which is national level data
@@ -32,20 +32,36 @@ state_geos = locations %>%
filter(nchar(.data$geo_value) == 2) %>%
pull(.data$geo_value)
signals = c("confirmed_incidence_num",
- "deaths_incidence_num")
+ "deaths_incidence_num",
+ "confirmed_admissions_covid_1d")
predictions_cards = get_covidhub_predictions(forecasters,
signal = signals,
+ ahead = 1:28,
geo_values = state_geos,
verbose = TRUE,
- use_disk = TRUE)
+ use_disk = TRUE) %>%
+ filter(!(incidence_period == "epiweek" & ahead > 4))
+
+predictions_cards = predictions_cards %>%
+ filter(!is.na(target_end_date)) %>%
+ filter(target_end_date < today())
+
+# For hospitalizations, drop all US territories except Puerto Rico and the
+# Virgin Islands; HHS does not report data for any territories except PR and VI.
+territories <- c("as", "gu", "mp", "fm", "mh", "pw", "um")
predictions_cards = predictions_cards %>%
- filter(!is.na(predictions_cards$target_end_date))
-predictions_cards = predictions_cards %>% filter(target_end_date < today())
+ filter(!(geo_value %in% territories & data_source == "hhs"))
-# Only accept forecasts made Monday or earlier
+# For epiweek predictions, only accept forecasts made Monday or earlier.
+# target_end_date is the date of the last day (Saturday) in the epiweek
+# For daily predictions, accept any forecast where the target_end_date is later
+# than the forecast_date.
predictions_cards = predictions_cards %>%
- filter(target_end_date - (forecast_date + 7 * ahead) >= -2)
+ filter(
+ (incidence_period == "epiweek" & target_end_date - (forecast_date + 7 * ahead) >= -2) |
+ (incidence_period == "day" & target_end_date > forecast_date)
+ )
# And only a forecaster's last forecast if multiple were made
predictions_cards = predictions_cards %>%
@@ -56,7 +72,7 @@ class(predictions_cards) = c("predictions_cards", class(predictions_cards))
print("Saving predictions...")
saveRDS(predictions_cards,
- file = "predictions_cards.rds",
+ file = prediction_cards_filepath,
compress = "xz")
print("Predictions saved")
@@ -76,7 +92,10 @@ err_measures = c(wis = weighted_interval_score,
underprediction = underprediction,
sharpness = sharpness,
ae = absolute_error,
- coverage_functions)
+ coverage_functions,
+ value_20 = get_quantile_prediction_factory(0.2),
+ value_50 = get_quantile_prediction_factory(0.5),
+ value_80 = get_quantile_prediction_factory(0.8))
nation_predictions = predictions_cards %>% filter(geo_value == "us")
state_predictions = predictions_cards %>% filter(geo_value != "us")
@@ -91,22 +110,54 @@ state_scores = evaluate_covid_predictions(state_predictions,
geo_type = "state")
source("score.R")
-print("Saving state confirmed incidence...")
-save_score_cards(state_scores, "state", signal_name = "confirmed_incidence_num",
- output_dir = opt$dir)
-print("Saving state deaths incidence...")
-save_score_cards(state_scores, "state", signal_name = "deaths_incidence_num",
- output_dir = opt$dir)
+if ( "confirmed_incidence_num" %in% unique(state_scores$signal)) {
+ print("Saving state confirmed incidence...")
+ save_score_cards(state_scores, "state", signal_name = "confirmed_incidence_num",
+ output_dir = opt$dir)
+} else {
+ warning("State confirmed incidence should generally be available. Please
+ verify that you expect not to have any cases incidence forecasts")
+}
+if ( "deaths_incidence_num" %in% unique(state_scores$signal)) {
+ print("Saving state deaths incidence...")
+ save_score_cards(state_scores, "state", signal_name = "deaths_incidence_num",
+ output_dir = opt$dir)
+} else {
+ warning("State deaths incidence should generally be available. Please
+ verify that you expect not to have any deaths incidence forecasts")
+}
+if ( "confirmed_admissions_covid_1d" %in% unique(state_scores$signal)) {
+ print("Saving state hospitalizations...")
+ save_score_cards(state_scores, "state", signal_name = "confirmed_admissions_covid_1d",
+ output_dir = opt$dir)
+}
+
print("Evaluating national forecasts")
# COVIDcast does not return national level data, using CovidHubUtils instead
+
nation_scores = evaluate_chu(nation_predictions, signals, err_measures)
-print("Saving nation confirmed incidence...")
-save_score_cards(nation_scores, "nation",
- signal_name = "confirmed_incidence_num", output_dir = opt$dir)
-print("Saving nation deaths incidence...")
-save_score_cards(nation_scores, "nation", signal_name = "deaths_incidence_num",
- output_dir = opt$dir)
+if ( "confirmed_incidence_num" %in% unique(state_scores$signal)) {
+ print("Saving nation confirmed incidence...")
+ save_score_cards(nation_scores, "nation",
+ signal_name = "confirmed_incidence_num", output_dir = opt$dir)
+} else {
+ warning("Nation confirmed incidence should generally be available. Please
+ verify that you expect not to have any cases incidence forecasts")
+}
+if ( "deaths_incidence_num" %in% unique(state_scores$signal)) {
+ print("Saving nation deaths incidence...")
+ save_score_cards(nation_scores, "nation", signal_name = "deaths_incidence_num",
+ output_dir = opt$dir)
+} else {
+ warning("Nation deaths incidence should generally be available. Please
+ verify that you expect not to have any deaths incidence forecasts")
+}
+if ( "confirmed_admissions_covid_1d" %in% unique(state_scores$signal)) {
+ print("Saving nation hospitalizations...")
+ save_score_cards(nation_scores, "nation", signal_name = "confirmed_admissions_covid_1d",
+ output_dir = opt$dir)
+}
print("Done")
diff --git a/Report/error_measures.R b/Report/error_measures.R
index 46c21c1..522fba0 100644
--- a/Report/error_measures.R
+++ b/Report/error_measures.R
@@ -72,6 +72,23 @@ find_quantile_match <- function(quantiles, val_to_match, tol=1e-8){
return(abs(quantiles - val_to_match) < tol & !is.na(quantiles))
}
+get_quantile_prediction_factory <- function(val_to_match, tol=1e-8) {
+ get_quantile_prediction <- function(quantile, value, actual_value) {
+ if (all(is.na(quantile))) return(NA)
+
+ value <- value[!is.na(quantile)]
+ quantile <- quantile[!is.na(quantile)]
+
+ val <- value[find_quantile_match(quantile, val_to_match, tol)]
+
+ if (length(val) != 1L) return(NA)
+
+ return(val)
+ }
+
+ return(get_quantile_prediction)
+}
+
score_func_param_checker <- function(quantiles, values, actual_value, id = ""){
id_str = paste0(id, ": ")
if (length(actual_value) > 1) {
diff --git a/Report/score.R b/Report/score.R
index 701b115..ac61312 100644
--- a/Report/score.R
+++ b/Report/score.R
@@ -3,7 +3,8 @@ library("assertthat")
save_score_cards = function(score_card, geo_type = c("state", "nation"),
signal_name = c("confirmed_incidence_num",
- "deaths_incidence_num"),
+ "deaths_incidence_num",
+ "confirmed_admissions_covid_1d"),
output_dir = ".") {
signal_name = match.arg(signal_name)
geo_type = match.arg(geo_type)
@@ -13,11 +14,11 @@ save_score_cards = function(score_card, geo_type = c("state", "nation"),
assert_that(signal_name %in% signals,
msg = "signal is not in score_card")
score_card = score_card %>% filter(signal == signal_name)
- if (signal_name == "confirmed_incidence_num") {
- sig_suffix = "cases"
- } else {
- sig_suffix = "deaths"
- }
+
+ type_map <- list("confirmed_incidence_num" = "cases",
+ "deaths_incidence_num" = "deaths",
+ "confirmed_admissions_covid_1d" = "hospitalizations")
+ sig_suffix <- type_map[[signal_name]]
output_file_name = file.path(output_dir,
paste0("score_cards_", geo_type, "_",
sig_suffix, ".rds"))
@@ -37,20 +38,25 @@ save_score_cards = function(score_card, geo_type = c("state", "nation"),
evaluate_chu = function(predictions, signals, err_measures) {
allowed_signals = c("confirmed_incidence_num",
- "deaths_incidence_num")
+ "deaths_incidence_num",
+ "confirmed_admissions_covid_1d")
assert_that(all(signals %in% allowed_signals),
msg = paste("Signal not allowed:",
setdiff(signals, allowed_signals)))
+
+ target_map <- list("confirmed_incidence_num" = "inc case",
+ "deaths_incidence_num" = "inc death",
+ "confirmed_admissions_covid_1d" = "inc hosp")
+ source_map <- list("confirmed_incidence_num" = "JHU",
+ "deaths_incidence_num" = "JHU",
+ "confirmed_admissions_covid_1d" = "HealthData")
scores = c()
for (signal_name in signals) {
preds_signal = predictions %>%
filter(signal == signal_name)
- if (signal_name == "confirmed_incidence_num") {
- jhu_signal = "inc case"
- } else {
- jhu_signal = "inc death"
- }
- chu_truth = covidHubUtils::load_truth("JHU", jhu_signal)
+ signal <- target_map[[signal_name]]
+ source <- source_map[[signal_name]]
+ chu_truth = covidHubUtils::load_truth(source, signal)
chu_truth = chu_truth %>%
rename(actual = value) %>%
select(-c(model,
diff --git a/dashboard/about.md b/dashboard/about.md
index 1cf743b..636edf6 100644
--- a/dashboard/about.md
+++ b/dashboard/about.md
@@ -2,17 +2,20 @@
This dashboard was developed by:
-* Jed Grabman (Delphi Group, Google Fellow)
-* Kate Harwood (Delphi Group, Google Fellow)
* Chris Scott (Delphi Group, Google Fellow)
+* Kate Harwood (Delphi Group, Google Fellow)
+* Jed Grabman (Delphi Group, Google Fellow)
with the Forecast Evaluation Research Collaborative:
+* Ryan Tibshirani (Delphi Group)
* Nicholas Reich (Reich Lab)
-* Jacob Bien (Delphi Group)
-* Logan Brooks (Delphi Group)
-* Estee Cramer (Reich Lab)
+* Evan Ray (Reich Lab)
* Daniel McDonald (Delphi Group)
+* Estee Cramer (Reich Lab)
+* Logan Brooks (Delphi Group)
+* Johannes Bracher (Karlsruhe Institute)
+* Jacob Bien (Delphi Group)
Forecast data in all states and U.S. territories are supplied by:
@@ -29,7 +32,7 @@ Both groups are funded by the CDC as Centers of Excellence for Influenza and COV
The collaborative’s mission is to help epidemiological researchers gain insights into the performance of their forecasts and lead to more accurate forecasting of epidemics.
-Both groups lead initiatives related to COVID-19 data and forecast curation. The Reich Lab created and maintains the [COVID-19 Forecast Hub](https://covid19forecasthub.org/), a collaborative effort with over 80 groups submitting forecasts to be part of the official [CDC COVID-19 ensemble forecast](https://www.cdc.gov/coronavirus/2019-ncov/covid-data/mathematical-modeling.html). The Delphi Group created and maintains COVIDcast, a platform for [epidemiological surveillance data](https://delphi.cmu.edu/covidcast/), and runs the [Delphi Pandemic Survey via Facebook](https://delphi.cmu.edu/covidcast/surveys/).
+Both groups lead initiatives related to COVID-19 data and forecast curation. The Reich Lab created and maintains the [COVID-19 Forecast Hub](https://covid19forecasthub.org/), a collaborative effort with over 80 groups submitting forecasts to be part of the official [CDC COVID-19 ensemble forecast](https://www.cdc.gov/coronavirus/2019-ncov/covid-data/mathematical-modeling.html). The Delphi Group created and maintains COVIDcast, a platform for [epidemiological surveillance data](https://delphi.cmu.edu/covidcast/), and runs the [U.S. COVID-19 Trends and Impact Survey in partnership with Facebook](https://delphi.cmu.edu/covidcast/surveys/).
The Forecaster Evaluation Dashboard is a collaborative project, which has been made possible by the 13 pro bono Google.org Fellows who have spent 6 months working full-time with the Delphi Group. Google.org is [committed](https://www.google.org/covid-19/) to the recovery of lives and communities that have been impacted by COVID-19 and investing in developing the science to mitigate the damage of future pandemics.
@@ -37,7 +40,9 @@ The Forecaster Evaluation Dashboard is a collaborative project, which has been m
#### **Sources**
-**Observed values** are from the [COVID-19 Data Repository](https://github.com/CSSEGISandData/COVID-19) by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University.
+**Observed cases and deaths** are from the [COVID-19 Data Repository](https://github.com/CSSEGISandData/COVID-19) by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University.
+
+**Observed hospitalizations** are from the U.S. Department of Health & Human Services and is the sum of all adult and pediatric COVID-19 hospital admissions.
**Forecaster predictions** are drawn from the [COVID-19 Forecast Hub GitHub repository](https://github.com/reichlab/covid19-forecast-hub/)
@@ -58,16 +63,23 @@ Data for the dashboard is pulled from these sources on Mondays and Tuesdays.
#### **Dashboard Inclusion Criteria**
A forecast is only included if all the following criteria are met:
-* The target variable is the weekly incidence of either cases or deaths
+* The target variable is the weekly incidence of either cases or deaths, or the daily incidence of hospitalizations
* The horizon is no more than 4 weeks ahead
* The location is a U.S. state, territory, or the nation as a whole
* All dates are parsable. If a date is not in yyyy/mm/dd format, the forecast may be dropped.
* The forecast was made on or before the Monday of the relevant week. If multiple versions of a forecast are submitted then only the last forecast that meets the date restriction is included.
+#### **How Hospitalization Forecasts are Processed**
+Though hospitalizations are forecasted on a daily basis, in keeping with the cases and death scoring and plotting, we show the hospitalization scores on a weekly basis in the dashboard. We only look at forecasts for one target day a week (currently Wednesdays), and calculate the weekly horizons accordingly. Hospitalization horizons are calculated in the following manner:
+* 2 days ahead: Forecast date is on or before the Monday preceeding the target date (Wednesday)
+* 9 days ahead: Forecast date equal to or before 7 days before the Monday preceeding the target date
+* 16 days ahead: Forecast date is equal to or before 14 days before the Monday preceeding the target date
+* 23 days ahead: Forecast date equal to or before 21 days before the Monday preceeding the target date
+
#### **Notes on the Data**
* If a forecast does not include an explicit point estimate, the 0.5 quantile is taken as the point estimate for calculating absolute error.
-* WIS is only shown for forecasts that have predictions for all quantiles (23 quantiles for deaths and 15 for cases)
+* WIS is only shown for forecasts that have predictions for all quantiles (23 quantiles for deaths and hospitalizations and 7 for cases)
* Totaling over all states and territories does not include nationwide forecasts. To ensure that values are comparable, these totals also exclude any locations that are absent from any file that was submitted by one of the selected forecasters.
* We include revisions of observed values, which means that the scores for forecasts made in the past can change as our understanding of the ground truth changes.
@@ -83,5 +95,7 @@ The available files are:
* score_cards_nation_deaths.rds
* score_cards_state_cases.rds
* score_cards_state_deaths.rds
+* score_cards_state_hospitalizations.rds
+* score_cards_nation_hospitalizations.rds
diff --git a/dashboard/app.R b/dashboard/app.R
index 6f1bf49..f94fa1a 100644
--- a/dashboard/app.R
+++ b/dashboard/app.R
@@ -12,7 +12,17 @@ library(aws.s3)
COVERAGE_INTERVALS = c("10", "20", "30", "40", "50", "60", "70", "80", "90", "95", "98")
DEATH_FILTER = "deaths_incidence_num"
CASE_FILTER = "confirmed_incidence_num"
+HOSPITALIZATIONS_FILTER = "confirmed_admissions_covid_1d"
+HOSPITALIZATIONS_TARGET_DAY = "Wednesday"
TOTAL_LOCATIONS = "Totaled Over States*"
+AHEAD_OPTIONS = c(1,2,3,4)
+
+# Num days to offset the forecast week by
+# Example: if HOSPITALIZATIONS_TARGET_DAY is Wednesday and HOSPITALIZATIONS_OFFSET is 2,
+# ahead 1 has to have forecast date of Monday or earlier,
+# ahead 2 has to have forecast date of Monday + 7 days or earlier (offset + 7 days or more), etc
+HOSPITALIZATIONS_OFFSET = 2
+HOSPITALIZATIONS_AHEAD_OPTIONS = c(HOSPITALIZATIONS_OFFSET, HOSPITALIZATIONS_OFFSET + 7, HOSPITALIZATIONS_OFFSET + 14, HOSPITALIZATIONS_OFFSET + 21)
# Score explanations
wisExplanation = includeMarkdown("wis.md")
@@ -64,9 +74,8 @@ ui <- fluidPage(padding=0,
conditionalPanel(condition = "input.tabset == 'evaluations'",
radioButtons("targetVariable", "Target Variable",
choices = list("Incident Deaths" = "Deaths",
- "Incident Cases" = "Cases")),
-
-
+ "Incident Cases" = "Cases",
+ "Hospital Admissions" = "Hospitalizations")),
radioButtons("scoreType", "Scoring Metric",
choices = list("Weighted Interval Score" = "wis",
"Spread" = "sharpness",
@@ -78,13 +87,13 @@ ui <- fluidPage(padding=0,
"logScale",
"Log Scale",
value = FALSE,
- ),
+ )),
+ conditionalPanel(condition = "input.scoreType != 'coverage' && input.targetVariable != 'Hospitalizations'",
checkboxInput(
"scaleByBaseline",
"Scale by Baseline Forecaster",
value = FALSE,
- )
- ),
+ )),
selectInput(
"forecasters",
p("Forecasters", tags$br(), tags$span(id="forecaster-input", "Type a name or select from dropdown")),
@@ -96,8 +105,8 @@ ui <- fluidPage(padding=0,
checkboxGroupInput(
"aheads",
"Forecast Horizon (Weeks)",
- choices = c(1,2,3,4),
- selected = 1,
+ choices = AHEAD_OPTIONS,
+ selected = AHEAD_OPTIONS[1],
inline = TRUE
),
conditionalPanel(condition = "input.scoreType == 'coverage'",
@@ -225,7 +234,9 @@ server <- function(input, output, session) {
dfStateDeaths <- getData("score_cards_state_deaths.rds")
dfNationCases = getData("score_cards_nation_cases.rds")
dfNationDeaths = getData("score_cards_nation_deaths.rds")
-
+ dfStateHospitalizations = getData("score_cards_state_hospitalizations.rds")
+ dfNationHospitalizations = getData("score_cards_nation_hospitalizations.rds")
+
# Pick out expected columns only
covCols = paste0("cov_", COVERAGE_INTERVALS)
expectedCols = c("ahead", "geo_value", "forecaster", "forecast_date",
@@ -237,8 +248,10 @@ server <- function(input, output, session) {
dfStateDeaths = dfStateDeaths %>% select(all_of(expectedCols))
dfNationCases = dfNationCases %>% select(all_of(expectedCols))
dfNationDeaths = dfNationDeaths %>% select(all_of(expectedCols))
+ dfStateHospitalizations = dfStateHospitalizations %>% select(all_of(expectedCols))
+ dfNationHospitalizations = dfNationHospitalizations %>% select(all_of(expectedCols))
- df <- rbind(dfStateCases, dfStateDeaths, dfNationCases, dfNationDeaths)
+ df <- rbind(dfStateCases, dfStateDeaths, dfNationCases, dfNationDeaths, dfStateHospitalizations, dfNationHospitalizations)
df <- df %>% rename("10" = cov_10, "20" = cov_20, "30" = cov_30, "40" = cov_40, "50" = cov_50, "60" = cov_60, "70" = cov_70, "80" = cov_80, "90" = cov_90, "95" = cov_95, "98" = cov_98)
# Prepare color palette
@@ -262,13 +275,19 @@ server <- function(input, output, session) {
if (targetVariable == "Deaths") {
signalFilter = DEATH_FILTER
}
+ if (targetVariable == "Hospitalizations") {
+ signalFilter = HOSPITALIZATIONS_FILTER
+ }
scoreDf = scoreDf %>%
filter(signal == signalFilter) %>%
- filter(ahead %in% horizon) %>%
filter(forecaster %in% forecasters)
-
- filteredScoreDf <- scoreDf %>% rename(Forecaster = forecaster, Week_End_Date = target_end_date)
-
+ if (signalFilter == HOSPITALIZATIONS_FILTER) {
+ scoreDf = filterHospitalizationsAheads(scoreDf)
+ }
+ scoreDf = scoreDf %>% filter(ahead %in% horizon)
+ filteredScoreDf <- scoreDf %>% rename(Forecaster = forecaster, Forecast_Date = forecast_date,
+ Week_End_Date = target_end_date)
+
if (scoreType == "wis" || scoreType == "sharpness") {
# Only show WIS or Sharpness for forecasts that have all intervals
filteredScoreDf = filteredScoreDf %>% filter(!is.na(`50`)) %>% filter(!is.na(`80`)) %>% filter(!is.na(`95`))
@@ -276,6 +295,10 @@ server <- function(input, output, session) {
filteredScoreDf = filteredScoreDf %>% filter(!is.na(`10`)) %>% filter(!is.na(`20`)) %>% filter(!is.na(`30`)) %>%
filter(!is.na(`40`)) %>% filter(!is.na(`60`)) %>% filter(!is.na(`70`)) %>% filter(!is.na(`90`)) %>% filter(!is.na(`98`))
}
+ if (dim(filteredScoreDf)[1] == 0) {
+ output$renderWarningText <- renderText("The selected forecasters do not have enough data to display the selected scoring metric.")
+ return()
+ }
if (scoreType == "wis") {
filteredScoreDf <- filteredScoreDf %>% rename(Score = wis)
title = "Weighted Interval Score"
@@ -301,6 +324,7 @@ server <- function(input, output, session) {
# Create df with col for all locations across each unique date, ahead and forecaster combo
locationDf = filteredScoreDf %>% group_by(Forecaster, Week_End_Date, ahead) %>%
summarize(location_list = paste(sort(unique(geo_value)),collapse=","))
+ locationDf = locationDf %>% filter(location_list != c('us'))
# Create a list containing each row's location list
locationList = sapply(locationDf$location_list, function(x) strsplit(x, ","))
locationList = lapply(locationList, function(x) x[x != 'us'])
@@ -311,19 +335,19 @@ server <- function(input, output, session) {
if (scoreType == "coverage") {
aggregate = "Averaged"
filteredScoreDf = filteredScoreDf %>%
- group_by(Forecaster, Week_End_Date, ahead) %>%
+ group_by(Forecaster, Forecast_Date, Week_End_Date, ahead) %>%
summarize(Score = sum(Score)/length(locationsIntersect), actual = sum(actual))
output$renderAggregateText = renderText(paste(aggregateText," Some forecasters may not have any data for the coverage interval chosen. Locations inlcuded: "))
}
else {
aggregate = "Totaled"
filteredScoreDf = filteredScoreDf %>%
- group_by(Forecaster, Week_End_Date, ahead) %>%
+ group_by(Forecaster, Forecast_Date, Week_End_Date, ahead) %>%
summarize(Score = sum(Score), actual = sum(actual))
output$renderAggregateText = renderText(paste(aggregateText, " Locations included: "))
}
if (length(locationsIntersect) == 0) {
- output$renderWarningText <- renderText("The selected forecasters do not have data for any locations in common.")
+ output$renderWarningText <- renderText("The selected forecasters do not have data for any locations in common on all dates.")
output$renderLocations <- renderText("")
output$renderAggregateText = renderText("")
hideElement("truthPlot")
@@ -338,7 +362,7 @@ server <- function(input, output, session) {
# Not totaling over all locations
} else {
filteredScoreDf <- filteredScoreDf %>% filter(geo_value == tolower(loc)) %>%
- group_by(Forecaster, Week_End_Date, ahead) %>%
+ group_by(Forecaster, Forecast_Date, Week_End_Date, ahead) %>%
summarize(Score = Score, actual = actual)
locationSubtitleText = paste0(', Location: ', input$location)
output$renderAggregateText = renderText("")
@@ -355,7 +379,7 @@ server <- function(input, output, session) {
})
# Format and transform data
- filteredScoreDf = filteredScoreDf[c("Forecaster", "Week_End_Date", "Score", "ahead")]
+ filteredScoreDf = filteredScoreDf[c("Forecaster", "Forecast_Date", "Week_End_Date", "Score", "ahead")]
filteredScoreDf = filteredScoreDf %>% mutate(across(where(is.numeric), ~ round(., 2)))
if (scoreType != 'coverage') {
if (scaleByBaseline) {
@@ -364,8 +388,8 @@ server <- function(input, output, session) {
# Scaling score by baseline forecaster
filteredScoreDfMerged$Score.x = filteredScoreDfMerged$Score.x / filteredScoreDfMerged$Score.y
filteredScoreDf = filteredScoreDfMerged %>%
- rename(Forecaster = Forecaster.x, Score = Score.x) %>%
- select(Forecaster, Week_End_Date, ahead, Score)
+ rename(Forecaster = Forecaster.x, Score = Score.x, Forecast_Date = Forecast_Date.x) %>%
+ select(Forecaster, Forecast_Date, Week_End_Date, ahead, Score)
}
if (logScale) {
filteredScoreDf$Score = log10(filteredScoreDf$Score)
@@ -380,18 +404,25 @@ server <- function(input, output, session) {
# Fill gaps so there are line breaks on weeks without data
filteredScoreDf = filteredScoreDf %>%
as_tsibble(key = c(Forecaster, ahead), index = Week_End_Date) %>%
- group_by(Forecaster, ahead) %>%
+ group_by(Forecaster, Forecast_Date, ahead) %>%
fill_gaps(.full = TRUE)
-
- filteredScoreDf$ahead = factor(filteredScoreDf$ahead, levels = c(1, 2, 3, 4),
- labels = c("Horizon: 1 Week", "Horizon: 2 Weeks", "Horizon: 3 Weeks", "Horizon: 4 Weeks"))
+ # Set labels for faceted horizon plots
+ horizonOptions = AHEAD_OPTIONS
+ horizonLabels = lapply(AHEAD_OPTIONS, function (x) paste0("Horizon: ", x, " Week(s)"))
+ if (targetVariable == 'Hospitalizations') {
+ horizonOptions = HOSPITALIZATIONS_AHEAD_OPTIONS
+ horizonLabels = lapply(HOSPITALIZATIONS_AHEAD_OPTIONS, function (x) paste0("Horizon: ", x, " Days"))
+ }
+ filteredScoreDf$ahead = factor(filteredScoreDf$ahead, levels = horizonOptions,
+ labels = horizonLabels)
+ # Set forecaster colors for plot
set.seed(colorSeed)
forecasterRand <- sample(unique(df$forecaster))
colorPalette = setNames(object = viridis(length(unique(df$forecaster))), nm = forecasterRand)
p = ggplot(
filteredScoreDf,
- aes(x = Week_End_Date, y = Score, color = Forecaster, shape = Forecaster)
+ aes(x = Week_End_Date, y = Score, color = Forecaster, shape = Forecaster, label = Forecast_Date)
) +
geom_line() +
geom_point(size=2) +
@@ -413,7 +444,7 @@ server <- function(input, output, session) {
}
plotHeight = 550 + (length(horizon)-1)*100
finalPlot <-
- ggplotly(p,tooltip = c("x", "y", "shape")) %>%
+ ggplotly(p, tooltip = c("x", "y", "shape", "label")) %>%
layout(
height = plotHeight,
legend = list(orientation = "h", y = -0.1),
@@ -434,10 +465,14 @@ server <- function(input, output, session) {
###################
# Create the plot for target variable ground truth
truthPlot = function(scoreDf = NULL, targetVariable = NULL, locationsIntersect = NULL, allLocations = FALSE) {
- titleText = paste0('Observed Incident ', targetVariable, '')
+ observation = paste0('Incident ', targetVariable)
+ if (targetVariable == "Hospitalizations") {
+ observation = paste0('Hospital Admissions')
+ }
+ titleText = paste0('Observed ', observation, '')
if (allLocations) {
- titleText = paste0('Observed Incident ', targetVariable, '', '
Totaled over all states and territories common to selected forecasters*')
- }
+ titleText = paste0('Observed ', observation, '', '
Totaled over all states and territories common to selected forecasters*')
+ }
scoreDf <- scoreDf %>%
group_by(Week_End_Date) %>% summarize(Reported_Incidence = actual)
@@ -475,9 +510,13 @@ server <- function(input, output, session) {
observeEvent(input$targetVariable, {
if (input$targetVariable == 'Deaths') {
df = df %>% filter(signal == DEATH_FILTER)
- } else {
+ } else if (input$targetVariable == 'Cases') {
df = df %>% filter(signal == CASE_FILTER)
+ } else {
+ df = df %>% filter(signal == HOSPITALIZATIONS_FILTER)
}
+
+ updateAheadChoices(session, df, input$targetVariable, input$forecasters, input$aheads, TRUE)
updateForecasterChoices(session, df, input$forecasters, input$scoreType)
updateLocationChoices(session, df, input$targetVariable, input$forecasters, input$location)
updateCoverageChoices(session, df, input$targetVariable, input$forecasters, input$coverageInterval, output)
@@ -486,8 +525,10 @@ server <- function(input, output, session) {
observeEvent(input$scoreType, {
if (input$targetVariable == 'Deaths') {
df = df %>% filter(signal == DEATH_FILTER)
- } else {
+ } else if (input$targetVariable == 'Cases') {
df = df %>% filter(signal == CASE_FILTER)
+ } else {
+ df = df %>% filter(signal == HOSPITALIZATIONS_FILTER)
}
# Only show forecasters that have data for the score chosen
updateForecasterChoices(session, df, input$forecasters, input$scoreType)
@@ -522,21 +563,14 @@ server <- function(input, output, session) {
observeEvent(input$forecasters, {
if (input$targetVariable == 'Deaths') {
df = df %>% filter(signal == DEATH_FILTER)
- } else {
+ } else if (input$targetVariable == 'Cases') {
df = df %>% filter(signal == CASE_FILTER)
- }
- df = df %>% filter(forecaster %in% input$forecasters)
- aheadChoices = unique(df$ahead)
- # Ensure previsouly selected options are still allowed
- if (input$aheads %in% aheadChoices) {
- selectedAheads = input$aheads
} else {
- selectedAheads = 1
+ df = df %>% filter(signal == HOSPITALIZATIONS_FILTER)
}
- updateCheckboxGroupInput(session, "aheads",
- choices = aheadChoices,
- selected = selectedAheads,
- inline = TRUE)
+ df = df %>% filter(forecaster %in% input$forecasters)
+
+ updateAheadChoices(session, df, input$targetVariable, input$forecasters, input$aheads, FALSE)
updateLocationChoices(session, df, input$targetVariable, input$forecasters, input$location)
updateCoverageChoices(session, df, input$targetVariable, input$forecasters, input$coverageInterval, output)
})
@@ -545,13 +579,18 @@ server <- function(input, output, session) {
observe({
# Ensure there is always one ahead selected
if(length(input$aheads) < 1) {
- updateCheckboxGroupInput(session, "aheads",
- selected = 1)
+ if (input$targetVariable == 'Hospitalizations') {
+ updateCheckboxGroupInput(session, "aheads",
+ selected = HOSPITALIZATIONS_AHEAD_OPTIONS[1])
+ } else {
+ updateCheckboxGroupInput(session, "aheads",
+ selected = AHEAD_OPTIONS[1])
+ }
}
# Ensure there is always one forecaster selected
if(length(input$forecasters) < 1) {
updateSelectInput(session, "forecasters",
- selected = c("COVIDhub-baseline"))
+ selected = c("COVIDhub-ensemble")) # Use ensemble rather than baseline bc it has hospitalization scores
}
# Ensure COVIDhub-baseline is selected when scaling by baseline
if(input$scaleByBaseline && !("COVIDhub-baseline" %in% input$forecasters)) {
@@ -611,4 +650,52 @@ updateLocationChoices = function(session, df, targetVariable, forecasterChoices,
selected = selectedLocation)
}
+updateAheadChoices = function(session, df, targetVariable, forecasterChoices, aheads, targetVariableChange) {
+ df = df %>% filter(forecaster %in% forecasterChoices)
+ aheadOptions = AHEAD_OPTIONS
+ title = "Forecast Horizon (Weeks)"
+ if (targetVariable == 'Hospitalizations') {
+ aheadOptions = HOSPITALIZATIONS_AHEAD_OPTIONS
+ title = "Forecast Horizon (Days)"
+ }
+ aheadChoices = Filter(function(x) any(unique(df$ahead) %in% x), aheadOptions)
+ # Ensure previsouly selected options are still allowed
+ if (!is.null(aheads) && aheads %in% aheadChoices) {
+ selectedAheads = aheads
+ } else {
+ selectedAheads = aheadOptions[1]
+ }
+ # If we are changing target variable, always reset ahead selection to first option
+ if (targetVariableChange) {
+ selectedAheads = aheadOptions[1]
+ }
+ updateCheckboxGroupInput(session, "aheads",
+ title,
+ choices = aheadChoices,
+ selected = selectedAheads,
+ inline = TRUE)
+}
+
+# Only use weekly aheads for hospitalizations
+# May change in the future
+filterHospitalizationsAheads = function(scoreDf) {
+ scoreDf['weekday'] = weekdays(as.Date(scoreDf$target_end_date))
+ scoreDf = scoreDf %>% filter(weekday == HOSPITALIZATIONS_TARGET_DAY)
+
+ oneAheadDf = scoreDf %>% filter(ahead >= HOSPITALIZATIONS_OFFSET) %>% filter(ahead < 7 + HOSPITALIZATIONS_OFFSET) %>%
+ group_by(target_end_date, forecaster) %>% filter(ahead == min(ahead)) %>%
+ mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[1])
+ twoAheadDf = scoreDf %>% filter(ahead >= 7 + HOSPITALIZATIONS_OFFSET) %>% filter(ahead < 14 + HOSPITALIZATIONS_OFFSET) %>%
+ group_by(target_end_date, forecaster) %>% filter(ahead == min(ahead)) %>%
+ mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[2])
+ threeAheadDf = scoreDf %>% filter(ahead >= 14 + HOSPITALIZATIONS_OFFSET) %>% filter(ahead < 21 + HOSPITALIZATIONS_OFFSET) %>%
+ group_by(target_end_date, forecaster) %>% filter(ahead == min(ahead)) %>%
+ mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[3])
+ fourAheadDf = scoreDf %>% filter(ahead >= 21 + HOSPITALIZATIONS_OFFSET) %>% filter(ahead < 28 + HOSPITALIZATIONS_OFFSET) %>%
+ group_by(target_end_date, forecaster) %>% filter(ahead == min(ahead)) %>%
+ mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[4])
+
+ return(rbind(oneAheadDf, twoAheadDf, threeAheadDf, fourAheadDf))
+}
+
shinyApp(ui = ui, server = server)