diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 85b1a04..6e0e6cc 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 7.1.0 +current_version = 7.2.0 commit = False tag = False diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 2d82e6f..30242bf 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out code - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: ref: main ssh-key: ${{ secrets.CMU_DELPHI_DEPLOY_MACHINE_SSH }} diff --git a/.github/workflows/release_main.yml b/.github/workflows/release_main.yml index adceae0..0e7fa71 100644 --- a/.github/workflows/release_main.yml +++ b/.github/workflows/release_main.yml @@ -20,7 +20,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up Python 3.8 uses: actions/setup-python@v2 with: @@ -49,7 +49,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out code - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: ref: dev fetch-depth: 0 diff --git a/DESCRIPTION b/DESCRIPTION index 60a758c..ac917bb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,9 +1,9 @@ Package: forecasteval Title: Forecast Evaluation Dashboard -Version: 7.1.0 +Version: 7.2.0 Authors@R: c(person("Kate", "Harwood", role = "aut"), person("Chris", "Scott", role = "ctb"), - person("Jed", "Grabman", role = "ctb")), + person("Jed", "Grabman", role = "ctb"), person("Nat", "DeFries", email= "ndefries@andrew.cmu.edu", role = c("aut", "cre"))) Description: This app collects and scores COVID-19 forecasts submitted to the CDC, and displays the results in an RShiny dashboard. License: MIT License, Copyright (c) 2021 Delphi contributors diff --git a/Makefile b/Makefile index 31652d3..ef9469a 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,15 @@ PWD=$(shell pwd) S3_URL=https://forecast-eval.s3.us-east-2.amazonaws.com S3_BUCKET=s3://forecast-eval +# Change `imageTag` during `make` call via `make imageTag=` +# +# `imageTag` specifies the tag to be used for the production dashboard Docker +# image. If building from `main`, it should be `latest`. If building from +# `dev`, it should be `dev`. The default value used here is meant to prevent +# the actual `latest` and `dev` images in the image repository from being +# accidentally overwritten. +imageTag=local + build: build_dashboard # Build a docker image suitable for running the scoring pipeline @@ -66,10 +75,14 @@ build_dashboard_dev: pull_data start_dashboard: build_dashboard_dev docker run --rm -p 3838:80 ghcr.io/cmu-delphi/forecast-eval:latest -# Build a docker image for production use +# Build a docker image for production use. Currently this isn't used anywhere, +# but could be useful if we need to manually build a docker image for +# production. build_dashboard: pull_data docker build --no-cache=true --pull -t ghcr.io/cmu-delphi/forecast-eval:$(imageTag) -f devops/Dockerfile . -# Push a production docker image to the image repository +# Push a production docker image to the image repository. Currently this isn't +# used anywhere, but could be useful if we need to manually release a docker +# image for production. deploy_dashboard: build_dashboard docker push ghcr.io/cmu-delphi/forecast-eval:$(imageTag) diff --git a/app/R/exportScores.R b/app/R/exportScores.R index 04a6570..d22f237 100644 --- a/app/R/exportScores.R +++ b/app/R/exportScores.R @@ -5,7 +5,7 @@ exportScoresUI <- function(id = "exportScores") { ) } -createExportScoresDataFrame <- function(scoreDf, targetVariable, scoreType, forecasters, loc, coverageInterval) { +createExportScoresDataFrame <- function(scoreDf, targetVariable, scoreType, forecasters, loc, coverageInterval, filterDate) { scoreDf <- filter( scoreDf[[targetVariable]], forecaster %chin% forecasters @@ -16,7 +16,7 @@ createExportScoresDataFrame <- function(scoreDf, targetVariable, scoreType, fore if (targetVariable == "Hospitalizations") { scoreDf <- filterHospitalizationsAheads(scoreDf) } - scoreDf <- filterOverAllLocations(scoreDf, scoreType) + scoreDf <- filterOverAllLocations(scoreDf, scoreType, filterDate = filterDate) return(scoreDf[[1]]) } else { scoreDf <- filter(scoreDf, geo_value == tolower(loc)) diff --git a/app/assets/about.md b/app/assets/about.md index 9284e29..53f3766 100644 --- a/app/assets/about.md +++ b/app/assets/about.md @@ -127,3 +127,18 @@ stateCases = tryCatch( ``` + +##### Forecasts with actuals + +If you are interested in getting the forecasts paired with the corresponding actual values (if you were e.g. testing different evaluation methods), that can be found in [the Amazon S3 bucket](https://forecast-eval.s3.us-east-2.amazonaws.com/) in 3 zip files. +These files are static, generated using [the aggregation script](https://raw.githubusercontent.com/cmu-delphi/forecast-eval/main/app/assets/forecastsWithActuals.R), and forecast and actual data available on June 12, 2023. The latest forecast date available for each target signal is + +* [cases](https://forecast-eval.s3.us-east-2.amazonaws.com/cases.zip): 2023-02-13 +* [hospitalizations](https://forecast-eval.s3.us-east-2.amazonaws.com/hospitalizations.zip): + * 1 week: 2023-06-05 + * 2 week: 2023-06-05 + * 3 week: 2023-06-05 + * 4 week: 2023-06-05 +* [deaths](https://forecast-eval.s3.us-east-2.amazonaws.com/deaths.zip): 2023-03-06 + +If the S3 bucket is down, these files are also available on [Delphi's file-hosting site](https://www.cmu.edu/delphi-web/forecast-eval-scores). diff --git a/app/assets/forecastsWithActuals.R b/app/assets/forecastsWithActuals.R new file mode 100644 index 0000000..3fb84ce --- /dev/null +++ b/app/assets/forecastsWithActuals.R @@ -0,0 +1,98 @@ +library(dplyr) +library(tidyr) +library(aws.s3) + +Sys.setenv("AWS_DEFAULT_REGION" = "us-east-2") +s3bucket <- tryCatch( + { + get_bucket(bucket = "forecast-eval") + }, + error = function(e) { + e + } +) + +readbucket <- function(name) { + tryCatch( + { + s3readRDS(object = name, bucket = s3bucket) + }, + error = function(e) { + e + } + ) +} + +# Cases, deaths, hosp scores: needed for "actual"s +cases <- bind_rows( + readbucket("score_cards_nation_cases.rds"), + readbucket("score_cards_state_cases.rds") +) +deaths <- bind_rows( + readbucket("score_cards_nation_deaths.rds"), + readbucket("score_cards_state_deaths.rds") +) +hosp <- bind_rows( + readbucket("score_cards_nation_hospitalizations.rds"), + readbucket("score_cards_state_hospitalizations.rds") +) + +# The big one: predictions from all forecasters +pred <- readbucket("predictions_cards.rds") + +# Cases +pred_cases <- pred %>% + filter(signal == "confirmed_incidence_num") %>% + mutate(signal = NULL, data_source = NULL, incidence_period = NULL) %>% + pivot_wider( + names_from = quantile, + values_from = value, + names_prefix = "forecast_" + ) + +actual_cases <- cases %>% + select(ahead, geo_value, forecaster, forecast_date, target_end_date, actual) + +joined_cases <- left_join(pred_cases, actual_cases) +sum(is.na(actual_cases$actual)) == sum(is.na(joined_cases$actual)) +write.csv(joined_cases, "cases.csv") + +# Deaths +pred_deaths <- pred %>% + filter(signal == "deaths_incidence_num") %>% + mutate(signal = NULL, data_source = NULL, incidence_period = NULL) %>% + pivot_wider( + names_from = quantile, + values_from = value, + names_prefix = "forecast_" + ) + +actual_deaths <- deaths %>% + select(ahead, geo_value, forecaster, forecast_date, target_end_date, actual) + +joined_deaths <- left_join(pred_deaths, actual_deaths) +sum(is.na(actual_deaths$actual)) == sum(is.na(joined_deaths$actual)) +write.csv(joined_deaths, "deaths.csv") + +# Hospitalizations: break up by weeks since we run into memory errors o/w! +pred_hosp <- actual_hosp <- joined_hosp <- vector(mode = "list", length = 4) +for (k in 1:4) { + cat(k, "... ") + days <- (k - 1) * 7 + 1:7 + pred_hosp[[k]] <- pred %>% + filter(signal == "confirmed_admissions_covid_1d", ahead %in% days) %>% + mutate(signal = NULL, data_source = NULL, incidence_period = NULL) %>% + pivot_wider( + names_from = quantile, + values_from = value, + names_prefix = "forecast_" + ) + + actual_hosp[[k]] <- hosp %>% + filter(ahead %in% days) %>% + select(ahead, geo_value, forecaster, forecast_date, target_end_date, actual) + + joined_hosp[[k]] <- left_join(pred_hosp[[k]], actual_hosp[[k]]) + cat(sum(is.na(actual_hosp[[k]]$act)) == sum(is.na(joined_hosp[[k]]$act))) + write.csv(joined_hosp[[k]], sprintf("hospitalizations_%iwk.csv", k)) +} diff --git a/app/global.R b/app/global.R index 46cee51..1cec713 100644 --- a/app/global.R +++ b/app/global.R @@ -10,7 +10,7 @@ library(tsibble) library(covidcast) library(data.table) -appVersion <- "7.1.0" +appVersion <- "7.2.0" COVERAGE_INTERVALS <- c("10", "20", "30", "40", "50", "60", "70", "80", "90", "95", "98") CASES_DEATHS_TARGET_DAY <- "Saturday" diff --git a/app/server.R b/app/server.R index df5d6f7..1d7b067 100644 --- a/app/server.R +++ b/app/server.R @@ -991,7 +991,8 @@ server <- function(input, output, session) { "exportScores", shiny::reactive(generateExportFilename(input)), shiny::reactive(createExportScoresDataFrame( df_list, input$targetVariable, input$scoreType, input$forecasters, - input$location, input$coverageInterval + input$location, input$coverageInterval, + filterDate = dataCreationDate )) ) }