From 907f62aa6bc1fa398acb3f87f0291a19ec9d0c28 Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Thu, 4 Mar 2021 12:35:00 -0500 Subject: [PATCH 1/6] adding more error catching --- dashboard/app.R | 66 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/dashboard/app.R b/dashboard/app.R index 6a46030..3cf50e6 100644 --- a/dashboard/app.R +++ b/dashboard/app.R @@ -28,17 +28,29 @@ s3bucket = tryCatch( # Get and prepare data getData <- function(filename){ if(!is.null(s3bucket)) { - s3readRDS(object = filename, bucket = s3bucket) - } else { - path = ifelse( - file.exists(filename), - filename, - file.path("../dist/",filename) + tryCatch( + { + s3readRDS(object = filename, bucket = s3bucket) + }, + error = function(e) { + e + getFallbackData(filename) + } ) - readRDS(path) + } else { + getFallbackData(filename) } } +getFallbackData = function(filename) { + path = ifelse( + file.exists(filename), + filename, + file.path("../dist/",filename) + ) + readRDS(path) +} + dfStateCases <- getData("score_cards_state_cases.rds") dfStateDeaths <- getData("score_cards_state_deaths.rds") dfNationCases = getData("score_cards_nation_cases.rds") @@ -84,23 +96,38 @@ coverageExplanation = "
aboutPageText = HTML("

Who We Are


-This app was conceived and built in a collaboration between the Reich Lab's Forecast Hub -and Carnegie Mellon's Delphi Research Group. +The Forecast Evaluation Research Collaborative was founded by the Reich Lab +at University of Massachusetts Amherst and the Carnegie Mellon University Delphi Group. +Both groups are funded by the CDC as Centers of Excellence for Influenza and COVID-19 Forecasting. +We have partnered together on this project to focus on providing a robust set of tools and methods for evaluating the performance of epidemic forecasts. +

+The collaborative’s mission is to help epidemiological researchers gain insights into the performance of their forecasts, +and ultimately lead to more accurate forecasting of epidemics. +

+Both groups have led initiatives related to COVID-19 data and forecast curation. +The Reich Lab has created the COVID-19 Forecast Hub, +a collaborative effort with over 80 groups submitting forecasts to be part of the official + CDC COVID-19 ensemble forecast. +The Delphi Group has created COVIDcast, a platform for epidemiological surveillance data, +and runs the Delphi Pandemic Survey via Facebook, +which is a valuable signal +for Delphi’s participation in the ensemble forecast. +

+This Forecaster Evaluation Dashboard is a collaborative project, also made possible by the Google Fellowship program... TODO more to add + +
TODO: should there be more here about what each group is, and why we are collaborating (sharing resources and expertise). For instance, something about how the Forecast Hub gathers all the weekly forecasts, and Delphi's evalcast scores them?


Collaborators

-TODO: how should these be displayed?
-From the Forecast Hub: Nick Reich, Estee Cramer, Johannes Bracher, anyone else?
-From the Delphi Research Group: Jed Grabman, Kate Harwood, Chris Scott, Jacob Bien, Daniel McDonald, Logan Brooks, anyone else? +From the Forecast Hub: Estee Cramer, NIcholas Reich, the COVID-19 Forecast Hub Team +
+From the Delphi Research Group: Jed Grabman, Kate Harwood, Chris Scott, Jacob Bien, Daniel McDonald, Logan Brooks

Our Mission


-The goal of the Forecast Evaluation Working Group is to provide a robust set of tools and methods for evaluating the -performance of COVID-19 forecasting models to help epidemiological researchers gain insights into the models' performance, -and ultimately lead to more accurate forecasting of COVID-19 and other diseases. TODO: obviously this needs work.

About the Data


Sources

Observed values are from the @@ -144,7 +171,6 @@ For many forecasters this is the 50% quantile prediction.
  • When totaling over all locations, these locations include states and territories and do not include nationwide forecasts.
  • We do include revisions of observed values, meaning the scores for forecasts made in the past can change. Scores change as our understanding of the truth changes.
  • -
  • TODO: Is there anything else missing here?


  • Explanation of Scoring Methods

    @@ -214,11 +240,11 @@ ui <- fluidPage( ), tags$hr(), ), - tags$div(HTML("This app was conceived and built by the Forecast Evaluation Working Group, a collaboration between - the Reich Lab's Forecast Hub and - Carnegie Mellon's Delphi Research Group. + tags$div(HTML("This app was conceived and built by the Forecast Evaluation Research Collaborative, a collaboration between + the Reich Lab and + the Delphi Group.

    - This data can also be viewed in a weekly report on the Forecast Hub site.")), + This data can also be viewed in a weekly report on the Forecast Hub site. TODO need link")), a("View Weekly Report", href = "#"), width=3, ), From f1e32e196d4d2f7899b04cba5a6454264742a815 Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Fri, 5 Mar 2021 10:21:45 -0500 Subject: [PATCH 2/6] updating wording --- dashboard/app.R | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/dashboard/app.R b/dashboard/app.R index 3cf50e6..41ce83a 100644 --- a/dashboard/app.R +++ b/dashboard/app.R @@ -90,7 +90,9 @@ coverageExplanation = "
    the time, aka the forecaster's 50% CI was under-confident that week, or too wide. Conversely, if the y-value is below the line, it means that the forecaster's 50% CI was over-confident that week, or too narrow.
    " - +# Truth data disclaimer +observedValueDisclaimer = + "All forecasts are evaluated against the latest version of observed data. Scores of pasts forecasts may change as observed data is revised." # About page content aboutPageText = HTML(" @@ -113,22 +115,16 @@ and runs the Delphi Pandemic which is a valuable signal for Delphi’s participation in the ensemble forecast.

    -This Forecaster Evaluation Dashboard is a collaborative project, also made possible by the Google Fellowship program... TODO more to add - - -
    TODO: should there be more here about what each group is, and why we are collaborating (sharing resources and expertise). -For instance, something -about how the Forecast Hub gathers all the weekly forecasts, and Delphi's evalcast scores them? +The Forecaster Evaluation Dashboard is a collaborative project, which has been made possible by members of the Google.org Fellowship +engaged with the Delphi Group. Google.org is committed to the recovery of lives +and communities that have been impacted by COVID-19 and investing in developing the science to mitigate the damage of future pandemics.


    Collaborators


    From the Forecast Hub: Estee Cramer, NIcholas Reich, the COVID-19 Forecast Hub Team
    From the Delphi Research Group: Jed Grabman, Kate Harwood, Chris Scott, Jacob Bien, Daniel McDonald, Logan Brooks -
    -

    Our Mission

    -
    -

    About the Data

    +


    About the Data


    Sources

    Observed values are from the COVID-19 Data Repository @@ -258,7 +254,6 @@ ui <- fluidPage( tabPanel("Evaluation Plots", value = "evaluations", textOutput('renderWarningText'), plotlyOutput(outputId = "summaryPlot"), - dataTableOutput('renderTable'), tags$br(),tags$br(),tags$br(),tags$br(),tags$br(), HTML('
    '), textOutput('renderLocationText'), @@ -274,7 +269,9 @@ ui <- fluidPage( actionLink("truthValues", h4(tags$div(style = "color: black; padding-left:40px;", HTML("Observed Values"), icon("arrow-circle-down")))), - hidden(div(id="truthSection", hidden(div(id='truthPlot', plotlyOutput(outputId = "truthPlot"))))), + hidden(div(id="truthSection", hidden(div(id='truthPlot', + HTML('
    '), textOutput('renderObservedValueDisclaimer'), HTML('
    '), + plotlyOutput(outputId = "truthPlot"))))), tags$br(),tags$br() ) ), @@ -290,7 +287,7 @@ server <- function(input, output, session) { ############## summaryPlot = function(scoreDf, targetVariable, scoreType, forecasters, horizon, loc, allLocations, coverageInterval = NULL) { - signalFilter = CASE_FILTER + signalFilter = CASE_FILTER if (targetVariable == "Deaths") { signalFilter = DEATH_FILTER } @@ -415,6 +412,7 @@ server <- function(input, output, session) { scoreDf <- scoreDf %>% group_by(Date) %>% summarize(Incidence = actual) + output$renderObservedValueDisclaimer = renderText(observedValueDisclaimer) return (ggplotly(ggplot(scoreDf, aes(x = Date, y = Incidence)) + geom_line() + geom_point() + From 194345ba824ce4c50012147b736056f4ed471198 Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Fri, 5 Mar 2021 16:33:18 -0500 Subject: [PATCH 3/6] fixing spacing --- dashboard/app.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dashboard/app.R b/dashboard/app.R index 41ce83a..d67ffc0 100644 --- a/dashboard/app.R +++ b/dashboard/app.R @@ -287,7 +287,7 @@ server <- function(input, output, session) { ############## summaryPlot = function(scoreDf, targetVariable, scoreType, forecasters, horizon, loc, allLocations, coverageInterval = NULL) { - signalFilter = CASE_FILTER + signalFilter = CASE_FILTER if (targetVariable == "Deaths") { signalFilter = DEATH_FILTER } From 9fc5f46718bd4396e3ca5c2afc1820d11266d8aa Mon Sep 17 00:00:00 2001 From: Kate Harwood Date: Mon, 8 Mar 2021 12:55:31 -0500 Subject: [PATCH 4/6] adding text updates --- dashboard/app.R | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/dashboard/app.R b/dashboard/app.R index d67ffc0..9e62268 100644 --- a/dashboard/app.R +++ b/dashboard/app.R @@ -72,8 +72,10 @@ coverageChoices = intersect(colnames(df), COVERAGE_INTERVALS) # Score explanations wisExplanation = "
    The weighted interval score (WIS) is a proper score that combines a set of interval scores. -See this preprint about the WIS method for a more in depth explanation. -TODO: How is it actually calculated from the intervals?
    " + See this article about the WIS method for + a more in depth explanation. The WIS factors in both the sharpness of prediction intervals and their calibration (or coverage) of the + actual observations. +
    " aeExplanation = "
    The absolute error of a forecast is calculated from the Point Forecast. Usually this is the 50% quantile prediction, but forecasters can specify their own Point Forecast value. @@ -115,8 +117,9 @@ and runs the Delphi Pandemic which is a valuable signal for Delphi’s participation in the ensemble forecast.

    -The Forecaster Evaluation Dashboard is a collaborative project, which has been made possible by members of the Google.org Fellowship -engaged with the Delphi Group. Google.org is committed to the recovery of lives +The Forecaster Evaluation Dashboard is a collaborative project, which has been made possible by the 13 pro bono Google.org Fellows +who have spent 6 months working full-time with the Delphi Group. +Google.org is committed to the recovery of lives and communities that have been impacted by COVID-19 and investing in developing the science to mitigate the damage of future pandemics.


    Collaborators

    @@ -129,7 +132,7 @@ From the Delphi Research Group: Jed Grabman, Kate Harwood, Chris Scott, Jacob Bi Observed values are from the COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University. -

    Forecaster predictions are drawn from the Forecast Hub. TODO is there a good link this should go to? Git repo? +

    Forecaster predictions are drawn from the COVID-19 Forecast Hub GitHub repository

    Data for the dashboard is pulled once a week from these sources, on Tuesdays.

    Terms

    @@ -142,14 +145,19 @@ for each of a certain number of horizons
  • Target Variable
    What the forecast is predicting, ie: “weekly incident cases”
  • Horizon -
    1 epi-week, some number of epi-weeks ahead of the current week
  • -
  • Epi-week +
    +The duration of time between when the prediction was made and the predicted event, typically in units of epidemiological weeks. +
  • +
  • Epidemiological Week (Epi-week)
    Week that starts on a Sunday. If it is Sunday or Monday, the next epi-week is the week that starts on that Sunday (going back a day if it is Monday). -If it is Tuesday-Saturday, it is the week that starts on the subsequent Sunday.
  • +If it is Tuesday-Saturday, it is the week that starts on the subsequent Sunday, following +CDC convention.
    +
  • Point Forecast -
    The value that each forecaster picks as their “most important” prediction. -For many forecasters this is the 50% quantile prediction.
  • +
    The value that each forecaster picks as their “most likely” prediction. +For many forecasters this is the 50th quantile of the predictive distribution, for others it might be the mean of the distribution. +
  • Geo Type
    States or U.S. as a nation

  • Dashboard Inclusion Criteria

    @@ -160,11 +168,12 @@ For many forecasters this is the 50% quantile prediction.
  • Includes only non-NA target dates (if the date is not in yyyy/mm/dd, the prediction will not be included)
  • Includes only predictions with at least 3 quantile values
  • Includes only one file per forecaster per week (according to forecast date). That file must be from a Sunday or Monday. If both are present, we keep the Monday data.
  • -
  • If a forecaster updates a file, we do not include the new predictions
  • +
  • If a forecaster updates a file after that Monday, we do not include the new predictions

  • Notes on the Data

    @@ -236,9 +245,10 @@ ui <- fluidPage( ), tags$hr(), ), - tags$div(HTML("This app was conceived and built by the Forecast Evaluation Research Collaborative, a collaboration between - the Reich Lab and - the Delphi Group. + tags$div(HTML("This app was conceived and built by the Forecast Evaluation Research Collaborative, + a collaboration between the UMass-Amherst Reich Lab's + COVID-19 Forecast Hub + and Carnegie Mellon's Delphi Research Group.

    This data can also be viewed in a weekly report on the Forecast Hub site. TODO need link")), a("View Weekly Report", href = "#"), From 8ca1f385007f198a49c5209280ddee75d2a48af2 Mon Sep 17 00:00:00 2001 From: Chris Scott Date: Mon, 8 Mar 2021 17:24:17 -0500 Subject: [PATCH 5/6] Covert main content sections to markdown. Also refactor the visibility toggle. --- dashboard/about.md | 69 +++++++++++++++ dashboard/ae.md | 1 + dashboard/app.R | 171 ++++++++------------------------------ dashboard/coverageplot.md | 3 + dashboard/wis.md | 1 + 5 files changed, 109 insertions(+), 136 deletions(-) create mode 100644 dashboard/about.md create mode 100644 dashboard/ae.md create mode 100644 dashboard/coverageplot.md create mode 100644 dashboard/wis.md diff --git a/dashboard/about.md b/dashboard/about.md new file mode 100644 index 0000000..819f19d --- /dev/null +++ b/dashboard/about.md @@ -0,0 +1,69 @@ +### Who We Are + +The Forecast Evaluation Research Collaborative was founded by the [Reich Lab](https://reichlab.io/) at University of Massachusetts Amherst and the Carnegie Mellon University [Delphi Group](https://delphi.cmu.edu). Both groups are funded by the CDC as Centers of Excellence for Influenza and COVID-19 Forecasting. We have partnered together on this project to focus on providing a robust set of tools and methods for evaluating the performance of epidemic forecasts. + +The collaborative’s mission is to help epidemiological researchers gain insights into the performance of their forecasts, and ultimately lead to more accurate forecasting of epidemics. + +Both groups have led initiatives related to COVID-19 data and forecast curation. The Reich Lab has created the [COVID-19 Forecast Hub](https://covid19forecasthub.org/), a collaborative effort with over 80 groups submitting forecasts to be part of the official [CDC COVID-19 ensemble forecast](https://www.cdc.gov/coronavirus/2019-ncov/covid-data/mathematical-modeling.html). The Delphi Group has created COVIDcast, a platform for [epidemiological surveillance data](https://delphi.cmu.edu/covidcast/), and runs the [Delphi Pandemic Survey via Facebook](https://delphi.cmu.edu/covidcast/surveys/), which is a [valuable signal](https://delphi.cmu.edu/blog/2020/09/21/can-symptoms-surveys-improve-covid-19-forecasts/) for Delphi’s participation in the ensemble forecast. + +The Forecaster Evaluation Dashboard is a collaborative project, which has been made possible by the 13 pro bono Google.org Fellows who have spent 6 months working full-time with the Delphi Group. Google.org is [committed](https://www.google.org/covid-19/) to the recovery of lives and communities that have been impacted by COVID-19 and investing in developing the science to mitigate the damage of future pandemics. + +#### **Collaborators** + +From the Forecast Hub: Estee Cramer, NIcholas Reich, [the COVID-19 Forecast Hub Team](https://covid19forecasthub.org/doc/team/) +From the Delphi Research Group: Jed Grabman, Kate Harwood, Chris Scott, Jacob Bien, Daniel McDonald, Logan Brooks + +### About the Data + +#### **Sources** + +**Observed values** are from the [COVID-19 Data Repository](https://github.com/CSSEGISandData/COVID-19) by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University. + +**Forecaster predictions** are drawn from the [COVID-19 Forecast Hub GitHub repository](https://github.com/reichlab/covid19-forecast-hub/) + +Data for the dashboard is pulled once a week from these sources, on Tuesdays. + +#### **Terms** + +* **Forecaster** + + A model producing quantile predictions + +* **Forecast** + + A set of data that, for all locales in a geo type, includes predictions for a target variable for each of a certain number of quantiles for each of a certain number of horizons + +* **Target Variable** + + What the forecast is predicting, ie: “weekly incident cases” + +* **Horizon** + + The duration of time between when the prediction was made and the predicted event, typically in units of epidemiological weeks. + +* **Epidemiological Week (Epi-week)** + + Week that starts on a Sunday. If it is Sunday or Monday, the next epi-week is the week that starts on that Sunday (going back a day if it is Monday). If it is Tuesday-Saturday, it is the week that starts on the subsequent Sunday, following [CDC convention](https://wwwn.cdc.gov/nndss/document/MMWR_week_overview.pdf). + +* **Point Forecast** + + The value that each forecaster picks as their “most likely” prediction. For many forecasters this is the 50th quantile of the predictive distribution, for others it might be the mean of the distribution. + +* **Geo Type** + + States or U.S. as a nation + +#### **Dashboard Inclusion Criteria** + +* Includes only weekly deaths incidence and weekly case incidence target variables +* Includes only horizon < 5 weeks ahead +* Includes only geo values that are 2 characters (states / territories / nation) +* Includes only non-NA target dates (if the date is not in yyyy/mm/dd, the prediction will not be included) +* Includes only predictions with at least 3 quantile values +* Includes only one file per forecaster per week (according to forecast date). That file must be from a Sunday or Monday. If both are present, we keep the Monday data. +* If a forecaster updates a file after that Monday, we do not include the new predictions + +#### **Notes on the Data** + +* When totaling over all locations, these locations include states and territories and do not include nationwide forecasts. We only include states and territories common to the selected forecasters (over all time) that have data for at least one location. +* We do include revisions of observed values, meaning the scores for forecasts made in the past can change. Scores change as our understanding of the truth changes. \ No newline at end of file diff --git a/dashboard/ae.md b/dashboard/ae.md new file mode 100644 index 0000000..881e526 --- /dev/null +++ b/dashboard/ae.md @@ -0,0 +1 @@ +The **absolute error** of a forecast is calculated from the Point Forecast. Usually this is the 50% quantile prediction, but forecasters can specify their own Point Forecast value. When none is provided explicity, we use the 50% quantile prediction. \ No newline at end of file diff --git a/dashboard/app.R b/dashboard/app.R index 9e62268..9f56dbc 100644 --- a/dashboard/app.R +++ b/dashboard/app.R @@ -71,122 +71,15 @@ locationChoices = locationChoices[c(length(locationChoices), (1:length(locationC coverageChoices = intersect(colnames(df), COVERAGE_INTERVALS) # Score explanations -wisExplanation = "
    The weighted interval score (WIS) is a proper score that combines a set of interval scores. - See this article about the WIS method for - a more in depth explanation. The WIS factors in both the sharpness of prediction intervals and their calibration (or coverage) of the - actual observations. -
    " -aeExplanation = "
    - The absolute error of a forecast is calculated from the Point Forecast. - Usually this is the 50% quantile prediction, but forecasters can specify their own Point Forecast value. - When none is provided explicity, we use the 50% quantile prediction. -
    " -coverageExplanation = "
    - The coverage plot shows how well a forecaster's confidence intervals performed on a given week, across all locations. - The horizontal black line is the selected confidence interval, and the y-values are the percentage of time that the observed - values of the target variable value fell into that confidence interval. - A perfect forecaster on this measure would follow the black line. -

    - For example, a forecaster wants the observed values to be within the 50% confidence interval in 50% of locations for the given week. - If the y-value is above the horizontal line, it means that the observed values fell within the forecaster's 50% CI more than 50% of - the time, aka the forecaster's 50% CI was under-confident that week, or too wide. Conversely, if the y-value is below the line, - it means that the forecaster's 50% CI was over-confident that week, or too narrow. -
    " +wisExplanation = includeMarkdown("wis.md") +aeExplanation = includeMarkdown("ae.md") +coverageExplanation = includeMarkdown("coverageplot.md") # Truth data disclaimer observedValueDisclaimer = "All forecasts are evaluated against the latest version of observed data. Scores of pasts forecasts may change as observed data is revised." # About page content -aboutPageText = HTML(" -
    -

    Who We Are


    -The Forecast Evaluation Research Collaborative was founded by the Reich Lab -at University of Massachusetts Amherst and the Carnegie Mellon University Delphi Group. -Both groups are funded by the CDC as Centers of Excellence for Influenza and COVID-19 Forecasting. -We have partnered together on this project to focus on providing a robust set of tools and methods for evaluating the performance of epidemic forecasts. -

    -The collaborative’s mission is to help epidemiological researchers gain insights into the performance of their forecasts, -and ultimately lead to more accurate forecasting of epidemics. -

    -Both groups have led initiatives related to COVID-19 data and forecast curation. -The Reich Lab has created the COVID-19 Forecast Hub, -a collaborative effort with over 80 groups submitting forecasts to be part of the official - CDC COVID-19 ensemble forecast. -The Delphi Group has created COVIDcast, a platform for epidemiological surveillance data, -and runs the Delphi Pandemic Survey via Facebook, -which is a valuable signal -for Delphi’s participation in the ensemble forecast. -

    -The Forecaster Evaluation Dashboard is a collaborative project, which has been made possible by the 13 pro bono Google.org Fellows -who have spent 6 months working full-time with the Delphi Group. -Google.org is committed to the recovery of lives -and communities that have been impacted by COVID-19 and investing in developing the science to mitigate the damage of future pandemics. -

    -

    Collaborators

    -
    -From the Forecast Hub: Estee Cramer, NIcholas Reich, the COVID-19 Forecast Hub Team -
    -From the Delphi Research Group: Jed Grabman, Kate Harwood, Chris Scott, Jacob Bien, Daniel McDonald, Logan Brooks -


    About the Data

    -

    Sources

    -Observed values are from the -COVID-19 Data Repository -by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University. -

    Forecaster predictions are drawn from the COVID-19 Forecast Hub GitHub repository -

    Data for the dashboard is pulled once a week from these sources, on Tuesdays. -

    -

    Terms

    -
    • Forecaster -
      A model producing quantile predictions
    • -
    • Forecast -
      A set of data that, for all locales in a geo type, -includes predictions for a target variable for each of a certain number of quantiles -for each of a certain number of horizons
    • -
    • Target Variable -
      What the forecast is predicting, ie: “weekly incident cases”
    • -
    • Horizon -
      -The duration of time between when the prediction was made and the predicted event, typically in units of epidemiological weeks. -
    • -
    • Epidemiological Week (Epi-week) -
      Week that starts on a Sunday. If it is Sunday or Monday, -the next epi-week is the week that starts on that Sunday (going back a day if it is Monday). -If it is Tuesday-Saturday, it is the week that starts on the subsequent Sunday, following -CDC convention.
    • - -
    • Point Forecast -
      The value that each forecaster picks as their “most likely” prediction. -For many forecasters this is the 50th quantile of the predictive distribution, for others it might be the mean of the distribution. -
    • -
    • Geo Type -
      States or U.S. as a nation
    -

    Dashboard Inclusion Criteria

    -
      -
    • Includes only weekly deaths incidence and weekly case incidence target variables
    • -
    • Includes only horizon < 5 weeks ahead
    • -
    • Includes only geo values that are 2 characters (states / territories / nation)
    • -
    • Includes only non-NA target dates (if the date is not in yyyy/mm/dd, the prediction will not be included)
    • -
    • Includes only predictions with at least 3 quantile values
    • -
    • Includes only one file per forecaster per week (according to forecast date). That file must be from a Sunday or Monday. If both are present, we keep the Monday data.
    • -
    • If a forecaster updates a file after that Monday, we do not include the new predictions
    • -
    -

    Notes on the Data

    -
      -
    • When totaling over all locations, these locations include states and territories and do not include nationwide forecasts. -We only include states and territories common to the selected forecasters (over all time) that have data for at least one location.
    • -
    • We do include revisions of observed values, meaning the scores for forecasts made in the past can change. -Scores change as our understanding of the truth changes.
    • -
    -

    -

    Explanation of Scoring Methods

    -
    -Weighted Interval Score
    ", wisExplanation, -"

    -Absolute Error
    ", aeExplanation, -"

    -Coverage
    ", coverageExplanation, -"

    ") - +aboutPageText = includeMarkdown("about.md") ui <- fluidPage( useShinyjs(), @@ -260,29 +153,29 @@ ui <- fluidPage( tabsetPanel(id = "tabset", selected = "evaluations", tabPanel("About", - tags$div(HTML("
    ", aboutPageText))), + fluidRow(column(10,aboutPageText)), + fluidRow(column(10,h3("Explanation of Scoring Methods"))), + fluidRow(column(10,h4("Weighted Interval Score"))), + fluidRow(column(10,wisExplanation)), + fluidRow(column(10,h4("Absolute Error"))), + fluidRow(column(10,aeExplanation)), + fluidRow(column(10,h4("Coverage Plot"))), + fluidRow(column(10,coverageExplanation)) + ), tabPanel("Evaluation Plots", value = "evaluations", - textOutput('renderWarningText'), - plotlyOutput(outputId = "summaryPlot"), - tags$br(),tags$br(),tags$br(),tags$br(),tags$br(), - HTML('
    '), - textOutput('renderLocationText'), - textOutput('renderAggregateText'), - textOutput('renderLocations'), - HTML('
    '), - - actionLink("scoreExplanation", - h4(tags$div(style = "color: black; padding-left:40px;", HTML("Explanation Of Score"), - icon("arrow-circle-down")))), - hidden(div(id='explainScore', - tags$div(style = "width: 90%", HTML("")))), - actionLink("truthValues", - h4(tags$div(style = "color: black; padding-left:40px;", HTML("Observed Values"), - icon("arrow-circle-down")))), - hidden(div(id="truthSection", hidden(div(id='truthPlot', - HTML('
    '), textOutput('renderObservedValueDisclaimer'), HTML('
    '), - plotlyOutput(outputId = "truthPlot"))))), - tags$br(),tags$br() + fluidRow(column(10,textOutput('renderWarningText'))), + fluidRow(column(10,plotlyOutput(outputId = "summaryPlot", height = "auto"))), + fluidRow( + column(9,offset=1, + hidden(div(id = "wisExplanation", wisExplanation)), + hidden(div(id = "aeExplanation", aeExplanation)), + hidden(div(id = "coverageExplanation", coverageExplanation)) + ) + ), + fluidRow(column(10,textOutput('renderLocationText'))), + fluidRow(column(10,textOutput('renderAggregateText'))), + fluidRow(column(10,textOutput('renderLocations'))), + fluidRow(column(10,plotlyOutput(outputId = "truthPlot"))) ) ), ), @@ -489,13 +382,19 @@ server <- function(input, output, session) { updateForecasterChoices(session, df, input$forecasters, input$scoreType) if (input$scoreType == "wis") { - html("explainScore", paste0(wisExplanation)) + show("wisExplanation") + hide("aeExplanation") + hide("coverageExplanation") } if (input$scoreType == "ae") { - html("explainScore", paste0(aeExplanation)) + hide("wisExplanation") + show("aeExplanation") + hide("coverageExplanation") } if (input$scoreType == "coverage") { - html("explainScore", paste0(coverageExplanation)) + hide("wisExplanation") + hide("aeExplanation") + show("coverageExplanation") } }) diff --git a/dashboard/coverageplot.md b/dashboard/coverageplot.md new file mode 100644 index 0000000..863a1fa --- /dev/null +++ b/dashboard/coverageplot.md @@ -0,0 +1,3 @@ +The **coverage plot** shows how well a forecaster's confidence intervals performed on a given week, across all locations. The horizontal black line is the selected confidence interval, and the y-values are the percentage of time that the observed values of the target variable value fell into that confidence interval. A perfect forecaster on this measure would follow the black line. + +For example, a forecaster wants the observed values to be within the 50% confidence interval in 50% of locations for the given week. If the y-value is above the horizontal line, it means that the observed values fell within the forecaster's 50% CI more than 50% of the time, aka the forecaster's 50% CI was under-confident that week, or too wide. Conversely, if the y-value is below the line, it means that the forecaster's 50% CI was over-confident that week, or too narrow. \ No newline at end of file diff --git a/dashboard/wis.md b/dashboard/wis.md new file mode 100644 index 0000000..017e26f --- /dev/null +++ b/dashboard/wis.md @@ -0,0 +1 @@ +The **weighted interval score** (WIS) is a proper score that combines a set of interval scores. See [this article](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1008618) about the WIS method for a more in depth explanation. The WIS factors in both the sharpness of prediction intervals and their calibration (or coverage) of the actual observations. \ No newline at end of file From c9431f93df231d3802839ab19d3ab94c68bbe92b Mon Sep 17 00:00:00 2001 From: Chris Scott Date: Wed, 10 Mar 2021 12:36:58 -0500 Subject: [PATCH 6/6] Revise layouts and change plot order. --- dashboard/app.R | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/dashboard/app.R b/dashboard/app.R index 9f56dbc..2cb76ad 100644 --- a/dashboard/app.R +++ b/dashboard/app.R @@ -153,29 +153,35 @@ ui <- fluidPage( tabsetPanel(id = "tabset", selected = "evaluations", tabPanel("About", - fluidRow(column(10,aboutPageText)), - fluidRow(column(10,h3("Explanation of Scoring Methods"))), - fluidRow(column(10,h4("Weighted Interval Score"))), - fluidRow(column(10,wisExplanation)), - fluidRow(column(10,h4("Absolute Error"))), - fluidRow(column(10,aeExplanation)), - fluidRow(column(10,h4("Coverage Plot"))), - fluidRow(column(10,coverageExplanation)) + fluidRow(column(9,offset=1, + aboutPageText, + h3("Explanation of Scoring Methods"), + h4("Weighted Interval Score"), + wisExplanation, + h4("Absolute Error"), + aeExplanation, + h4("Coverage Plot"), + coverageExplanation + )), ), tabPanel("Evaluation Plots", value = "evaluations", - fluidRow(column(10,textOutput('renderWarningText'))), - fluidRow(column(10,plotlyOutput(outputId = "summaryPlot", height = "auto"))), + fluidRow(column(9, offset=1, textOutput('renderWarningText'))), + plotlyOutput(outputId = "summaryPlot", height="auto"), fluidRow( - column(9,offset=1, + column(9, offset=1, hidden(div(id = "wisExplanation", wisExplanation)), hidden(div(id = "aeExplanation", aeExplanation)), hidden(div(id = "coverageExplanation", coverageExplanation)) ) ), - fluidRow(column(10,textOutput('renderLocationText'))), - fluidRow(column(10,textOutput('renderAggregateText'))), - fluidRow(column(10,textOutput('renderLocations'))), - fluidRow(column(10,plotlyOutput(outputId = "truthPlot"))) + plotlyOutput(outputId = "truthPlot", height="auto"), + fluidRow( + column(9,offset=1, + textOutput('renderLocationText'), + textOutput('renderAggregateText'), + textOutput('renderLocations') + ) + ) ) ), ),