diff --git a/src/scores/probability/crps_impl.py b/src/scores/probability/crps_impl.py index ba3eab084..c81135058 100644 --- a/src/scores/probability/crps_impl.py +++ b/src/scores/probability/crps_impl.py @@ -749,21 +749,52 @@ def crps_for_ensemble( weights: XarrayLike = None, ) -> XarrayLike: """ - Calculates CRPS using the formula CRPS(F, Y) = E|X - Y| - E|X - X'|/2 - where X, X' are independent samples of the predictive distribution F and Y is the observation (possibly unknown). - Samples from F and Y are drawn from the fcst_sample_dim and obs_sample_dim respectively. - Other dimensions are broadcast using xr broadcast rules. + Calculates the continuous ranked probability score (CRPS) given an ensemble of forecasts. + An ensemble of forecasts can also be thought of as a random sample from the predictive + distribution. + + Given an observation y, and ensemble member values {x_i} (for 1 <= i <= M), the CRPS is + calculated by the formaula + CRPS({x_i}, y) = (1 / M) * sum(|x_i - y_i|) - (1 / 2 * K) * sum(|x_i - x_j|), + where the first sum is iterated over 1 <= i <= M and the second sum is iterated over + 1 <= i <= M and 1 <= j <= M. + + The value of the constant K in this formula depends on the method. + - If `method="ecdf"` then K = M ** 2. In this case the CRPS value returned is + the exact CRPS value for the emprical cumulation distribution function + constructed using the ensemble values. + - If `method="fair"` then K = M * (M - 1). In this case the CRPS value returned + is the approximated CRPS where the ensemble values can be interpreted as a + random sample from the underlying predictive distribution. This interpretation + stems from the formula CRPS(F, Y) = E|X - Y| - E|X - X'|/2, where X and X' + are independent samples of the predictive distribution F, Y is the observation + (possibly unknown) and E denotes the expectation. This choice of K gives an + unbiased estimate for the second expectation. Args: - fcst: Forecast data. - obs: Observation data - weights: Weights for calculating a weighted mean of scores + fcst: Forecast data. Must have a dimension `ensemble_member_dim`. + obs: Observation data. + weights: Weights for calculating a weighted mean of individual scores. + ensemble_member_dim: the dimension that specifies the ensemble member or the sample + from the predictive distribution. + method: Either "ecdf" or "fair". reduce_dims: Dimensions to reduce. Can be "all" to reduce all dimensions. preserve_dims: Dimensions to preserve. Can be "all" to preserve all dimensions. - special_fcst_dims: Dimension(s) in `fcst` that are reduced to calculate individual scores. - Must not appear as a dimension in `obs`, `weights`, `reduce_dims` or `preserve_dims`. - e.g. the ensemble member dimension if calculating CRPS for ensembles, or the - threshold dimension of calculating CRPS for CDFs. + + Returns: + xarray object of (weighted mean) CRPS values. + + Raises: + ValueError: when method is not one of "ecdf" or "fair". + + References: + - C. Ferro (2014), "Fair scores for ensemble forecasts", Q J R Meteorol Soc + 140(683):1917-1923. + - T. Gneiting T and A. Raftery (2007), "Strictly proper scoring rules, prediction, + and estimation", J Am Stat Assoc, 102(477):359-37. + - M. Zamo and P. Naveau (2018), "Estimation of the Continuous Ranked Probability + Score with Limited Information and Applications to Ensemble Weather Forecasts", + Math Geosci 50:209-234, https://doi.org/10.1007/s11004-017-9709-7 """ if method not in ["ecdf", "fair"]: raise ValueError("`method` must be one of 'ecdf' or 'fair'") diff --git a/src/scores/utils.py b/src/scores/utils.py index c1d2e6b03..0965e5c35 100644 --- a/src/scores/utils.py +++ b/src/scores/utils.py @@ -23,11 +23,23 @@ It is ambiguous how to proceed therefore an exception has been raised instead. """ +ERROR_SPECIFIED_NONPRESENT_PRESERVE_DIMENSION2 = """ +You are requesting to preserve a dimension which does not appear in your data +(fcst, obs or weights). It is ambiguous how to proceed therefore an exception has been +raised instead. +""" + ERROR_SPECIFIED_NONPRESENT_REDUCE_DIMENSION = """ You are requesting to reduce a dimension which does not appear in your data (fcst or obs). It is ambiguous how to proceed therefore an exception has been raised instead. """ +ERROR_SPECIFIED_NONPRESENT_REDUCE_DIMENSION2 = """ +You are requesting to reduce a dimension which does not appear in your data +(fcst, obs or weights). It is ambiguous how to proceed therefore an exception has been +raised instead. +""" + ERROR_OVERSPECIFIED_PRESERVE_REDUCE = """ You have specified both preserve_dims and reduce_dims. This method doesn't know how to properly interpret that, therefore an exception has been raised. @@ -121,7 +133,7 @@ def gather_dimensions2( special_fcst_dims: FlexibleDimensionTypes = None, ) -> set[Hashable]: """ - Performs a standard dimensions check for inputs of a function that calculates (mean) scores. + Performs standard dimensions checks for inputs of functions that calculate (mean) scores. Returns a set of the dimensions to reduce. Args: @@ -186,8 +198,8 @@ def gather_dimensions2( if specified_dims is not None and specified_dims != "all": if not set(specified_dims).issubset(all_scoring_dims): if preserve_dims is not None: - raise ValueError(ERROR_SPECIFIED_NONPRESENT_PRESERVE_DIMENSION) - raise ValueError(ERROR_SPECIFIED_NONPRESENT_REDUCE_DIMENSION) + raise ValueError(ERROR_SPECIFIED_NONPRESENT_PRESERVE_DIMENSION2) + raise ValueError(ERROR_SPECIFIED_NONPRESENT_REDUCE_DIMENSION2) # all errors have been captured, so now return list of dims to reduce if specified_dims is None: diff --git a/tests/test_utils.py b/tests/test_utils.py index 5e7485f43..54535e45a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -578,7 +578,7 @@ def test_gather_dimensions_exceptions(): None, ["blue", "yellow"], None, - utils.ERROR_SPECIFIED_NONPRESENT_PRESERVE_DIMENSION, + utils.ERROR_SPECIFIED_NONPRESENT_PRESERVE_DIMENSION2, ), ( utils_test_data.DA_RGB, @@ -587,7 +587,7 @@ def test_gather_dimensions_exceptions(): "yellow", None, "blue", - utils.ERROR_SPECIFIED_NONPRESENT_REDUCE_DIMENSION, + utils.ERROR_SPECIFIED_NONPRESENT_REDUCE_DIMENSION2, ), ], )