From 1a610149aaf76e99540185514a61c6fe52377e6f Mon Sep 17 00:00:00 2001 From: Nicholas Loveday Date: Fri, 1 Dec 2023 13:57:00 +1100 Subject: [PATCH] minor updates --- docs/paper.bib | 38 ++++++++++++++++++++++++++++++++++++++ docs/paper.md | 12 ++++++++---- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/docs/paper.bib b/docs/paper.bib index 899267f66..61781ddf0 100644 --- a/docs/paper.bib +++ b/docs/paper.bib @@ -168,4 +168,42 @@ @misc{Taggart:2022d author = {Taggart, Robert}, year = {2022}, note = {Accessed on September 9, 2023} +} +@misc{loveday2023userfocused, + title={A User-Focused Approach to Evaluating Probabilistic and Categorical Forecasts}, + author={Nicholas Loveday and Robert Taggart and Mohammadreza Khanarmuei}, + year={2023}, + eprint={2311.18258}, + archivePrefix={arXiv}, + primaryClass={stat.AP} +} +@article{nipen2023verif, + title={Verif: A weather-prediction verification tool for effective product development}, + author={Nipen, Thomas N and Stull, Roland B and Lussana, Cristian and Seierstad, Ivar A}, + journal={Bulletin of the American Meteorological Society}, + volume={104}, + number={9}, + pages={E1610--E1618}, + year={2023}, + publisher={American Meteorological Society} +} +@article{dimitriadis2021stable, + title={Stable reliability diagrams for probabilistic classifiers}, + author={Dimitriadis, Timo and Gneiting, Tilmann and Jordan, Alexander I}, + journal={Proceedings of the National Academy of Sciences}, + volume={118}, + number={8}, + pages={e2016191118}, + year={2021}, + publisher={National Acad Sciences} +} +@article{griffiths2021circular, + title={Circular Flip-Flop Index: quantifying revision stability of forecasts of direction}, + author={Griffiths, Deryn and Loveday, Nicholas and Price, Benjamin and Foley, Michael and McKelvie, Alistair}, + journal={Journal of Southern Hemisphere Earth Systems Science}, + volume={71}, + number={3}, + pages={266--271}, + year={2021}, + publisher={CSIRO Publishing} } \ No newline at end of file diff --git a/docs/paper.md b/docs/paper.md index 3bc95c899..f1ba142bb 100644 --- a/docs/paper.md +++ b/docs/paper.md @@ -17,7 +17,7 @@ authors: affiliations: - name: Bureau of Meteorology, Australia index: 1 -date: 6 September 2023 +date: 1 December 2023 bibliography: paper.bib --- @@ -30,13 +30,13 @@ bibliography: paper.bib All of the scores and metrics in this package have undergone a thorough statistical and scientific review. Every score has a companion Jupyter Notebook tutorial demonstrating its use in practice. -At the time of writing, the scores contained in this package are: MSE, MAE, RMSE, FIRM [@Taggart:2022a], CRPS (including threshold-weighting, see [@Gneiting:2011]), the FlipFlop index [@Griffiths:2019] and the Murphy score [@Ehm:2016]. It also includes the Diebold-Mariano statistical test [@Diebold:1995] with both the [@Harvey:1997] and [@Hering:2011] modifications. +At the time of writing, the scores contained in this package are: MSE, MAE, RMSE, FIRM [@Taggart:2022a], CRPS for CDFs (including threshold-weighting, see [@Gneiting:2011]), the FlipFlop index [@Griffiths:2019; @griffiths2021circular], ROC curves, the quantile score, and the Murphy score [@Ehm:2016]. It also includes the Diebold-Mariano statistical test [@Diebold:1995] with both the [@Harvey:1997] and [@Hering:2011] modifications. Additionally it contains isotonic regression which is becoming an increasingly important tool in forecast verification and can be used to generate stable reliability diagrams [@dimitriadis2021stable]. # Statement of Need The research purpose of this software is (a) to mathematically verify and validate scientific research and (b) to foster research into new scores and metrics. -`scores` includes novel scores not commonly found elsewhere (e.g. FIRM, FlipFlop index), complex scores (e.g. CRPS), more common scores (e.g. MAE, RMSE) and statistical tests (such as the Diebold Mariano test). Scores provides its own implementations where relevant to avoid extensive dependencies. +`scores` includes novel scores not commonly found elsewhere (e.g. FIRM, FlipFlop index), complex scores (e.g. threshold weighted CRPS), more common scores (e.g. MAE, RMSE) and statistical tests (such as the Diebold Mariano test). Scores provides its own implementations where relevant to avoid extensive dependencies. `scores` works with n-dimensional data (e.g., geospatial, vertical and temporal dimensions) for both point-based and gridded data. It has proper treatments for missing data, masking of data and weighting of results. @@ -48,6 +48,8 @@ The `scores` roadmap includes support for machine learning library integration, `scores` has an area specifically to hold emerging scores which are still undergoing research and development. This provides a clear mechanism for people to share, access and collaborate on new scores, and be able to easily re-use versioned implementations of those scores. +`scores` has been used in research papers (e.g., [@loveday2023userfocused]). + ## Related Works `scores` has arisen from, and now supports, the Jive verification system, described by [@Loveday:2023]. `scores` includes the mathematical functions from this package and is intended to modularise these components. The Jive metrics have been used by [@Griffiths:2017], [@Foley:2020], [@Taggart:2022b], [@Taggart:2022c] and [@Taggart:2022d]. @@ -56,7 +58,9 @@ The `scores` roadmap includes support for machine learning library integration, `xskillscore` [@xskillscore] provides many of the same functions as `scores`. `xskillscore` does not contain some of the novel functions contained within `scores` and does not contain the Jupyter Notebook tutorials which provide users with clear guidance on the use of the verification metrics. -`METplus` [@Brown:2021] provides related functionality. `METplus` includes a database and visualisation system and python wrappers to utilise the `MET` package. Verification scores in `MET` are implemented in C++ rather than Python. `METplus` does not contain some of the novel functions contained within `scores`. +`METplus` [@Brown:2021] provides related functionality. `METplus` includes a database and visualisation system with python wrappers to utilise the `MET` package. Verification scores in `MET` are implemented in C++ rather than Python. `METplus` does not contain some of the novel functions contained within `scores`. + +`Verif` [@nipen2023verif] is an command line tool for forecast verification and is utilised very differently to `scores`. It also does not contain some of the novel metrics in `scores`. # Acknowledgements