From 6dd7f45800ea6418db9805f6a766f114040d7420 Mon Sep 17 00:00:00 2001 From: Nicholas Loveday Date: Mon, 4 Dec 2023 12:03:07 +1100 Subject: [PATCH] can now choose if thresholds for firm are left or right alaligned --- .github/workflows/run-pre-commit.yml | 4 +- .../categorical/multicategorical_impl.py | 46 +++++++----- .../categorical/multicategorical_test_data.py | 75 +++++++++++++++++++ tests/categorical/test_multicategorical.py | 50 ++++++++++--- 4 files changed, 145 insertions(+), 30 deletions(-) diff --git a/.github/workflows/run-pre-commit.yml b/.github/workflows/run-pre-commit.yml index cd186c90b..191caf634 100644 --- a/.github/workflows/run-pre-commit.yml +++ b/.github/workflows/run-pre-commit.yml @@ -26,4 +26,6 @@ jobs: pip install .[all] - name: pre-commit checks run: | - pre-commit run --all-files + pre-commit run black --all-files + pre-commit run isort --all-files + pre-commit run pylint --all-files diff --git a/src/scores/categorical/multicategorical_impl.py b/src/scores/categorical/multicategorical_impl.py index 016cddc8c..7974b61cb 100644 --- a/src/scores/categorical/multicategorical_impl.py +++ b/src/scores/categorical/multicategorical_impl.py @@ -8,6 +8,7 @@ import xarray as xr from scores.functions import apply_weights +from scores.typing import FlexibleDimensionTypes from scores.utils import check_dims, gather_dimensions @@ -18,9 +19,10 @@ def firm( # pylint: disable=too-many-arguments categorical_thresholds: Sequence[float], threshold_weights: Sequence[Union[float, xr.DataArray]], discount_distance: Optional[float] = 0, - reduce_dims: Optional[Sequence[str]] = None, - preserve_dims: Optional[Sequence[str]] = None, + reduce_dims: FlexibleDimensionTypes = None, + preserve_dims: FlexibleDimensionTypes = None, weights: Optional[xr.DataArray] = None, + threshold_assignment: Optional[str] = "lower", ) -> xr.Dataset: """ Calculates the FIxed Risk Multicategorical (FIRM) score including the @@ -64,6 +66,9 @@ def firm( # pylint: disable=too-many-arguments supplied. The default behaviour if neither are supplied is to reduce all dims. weights: Optionally provide an array for weighted averaging (e.g. by area, by latitude, by population, custom) + threshold_assignment: Specifies whether the intervals defining the categories are + left or right closed. That is whether the decision threshold is included in + the upper (left closed) or lower (right closed) category. Defaults to "lower". Returns: An xarray Dataset with data vars: @@ -98,15 +103,13 @@ def firm( # pylint: disable=too-many-arguments Journal of the Royal Meteorological Society, 148(744), pp.1389-1406. """ _check_firm_inputs( - obs, - risk_parameter, - categorical_thresholds, - threshold_weights, - discount_distance, + obs, risk_parameter, categorical_thresholds, threshold_weights, discount_distance, threshold_assignment ) total_score = [] for categorical_threshold, weight in zip(categorical_thresholds, threshold_weights): - score = weight * _single_category_score(fcst, obs, risk_parameter, categorical_threshold, discount_distance) + score = weight * _single_category_score( + fcst, obs, risk_parameter, categorical_threshold, discount_distance, threshold_assignment + ) total_score.append(score) summed_score = sum(total_score) reduce_dims = gather_dimensions(fcst.dims, obs.dims, reduce_dims, preserve_dims) # type: ignore[assignment] @@ -117,11 +120,7 @@ def firm( # pylint: disable=too-many-arguments def _check_firm_inputs( - obs, - risk_parameter, - categorical_thresholds, - threshold_weights, - discount_distance, + obs, risk_parameter, categorical_thresholds, threshold_weights, discount_distance, threshold_assignment ): """ Checks that the FIRM inputs are suitable @@ -150,6 +149,9 @@ def _check_firm_inputs( if discount_distance < 0: raise ValueError("`discount_distance` must be >= 0") + if threshold_assignment not in ["upper", "lower"]: + raise ValueError(""" `threshold_assignment` must be either \"upper\" or \"lower\" """) + def _single_category_score( fcst: xr.DataArray, @@ -157,6 +159,7 @@ def _single_category_score( risk_parameter: float, categorical_threshold: float, discount_distance: Optional[float] = None, + threshold_assignment: Optional[str] = "lower", ) -> xr.Dataset: """ Calculates the score for a single category for the `firm` metric at each @@ -175,6 +178,9 @@ def _single_category_score( discounted whenever the observation is within distance `discount_distance` of the forecast category. A value of 0 will not a apply any discounting. + threshold_assignment: Specifies whether the intervals defining the categories are + left or right closed. That is whether the decision threshold is included in + the upper (left closed) or lower (right closed) category. Defaults to "lower". Returns: An xarray Dataset with data vars: @@ -187,10 +193,16 @@ def _single_category_score( # pylint: disable=unbalanced-tuple-unpacking fcst, obs = xr.align(fcst, obs) - # False Alarms - condition1 = (obs <= categorical_threshold) & (categorical_threshold < fcst) - # Misses - condition2 = (fcst <= categorical_threshold) & (categorical_threshold < obs) + if threshold_assignment == "lower": + # False Alarms + condition1 = (obs <= categorical_threshold) & (categorical_threshold < fcst) + # Misses + condition2 = (fcst <= categorical_threshold) & (categorical_threshold < obs) + else: + # False Alarms + condition1 = (obs < categorical_threshold) & (categorical_threshold <= fcst) + # Misses + condition2 = (fcst < categorical_threshold) & (categorical_threshold <= obs) # Bring back NaNs condition1 = condition1.where(~np.isnan(fcst)) diff --git a/tests/categorical/multicategorical_test_data.py b/tests/categorical/multicategorical_test_data.py index 7798767be..fa985786b 100644 --- a/tests/categorical/multicategorical_test_data.py +++ b/tests/categorical/multicategorical_test_data.py @@ -13,6 +13,13 @@ "k": [10, 11, 12], }, ) +DA_FCST_SC2 = xr.DataArray( + data=[3, 3, 1, 2, 2, 1], + dims=["i"], + coords={ + "i": [1, 2, 3, 4, 5, 6], + }, +) DA_OBS_SC = xr.DataArray( data=[[10, np.nan], [0, 1]], @@ -23,6 +30,14 @@ }, ) +DA_OBS_SC2 = xr.DataArray( + data=[1, 2, 3, 4, 1, 2], + dims=["i"], + coords={ + "i": [1, 2, 3, 4, 5, 6], + }, +) + EXP_SC_TOTAL_CASE0 = xr.DataArray( data=[[[np.nan, 0.3], [0.7, np.nan]], [[0.0, 0], [0, np.nan]]], dims=["i", "j", "k"], @@ -147,6 +162,66 @@ ) +EXP_SC_TOTAL_CASE4 = xr.DataArray( + data=[0.3, 0.3, 0.7, 0.7, 0, 0], + dims=["i"], + coords={ + "i": [1, 2, 3, 4, 5, 6], + }, +) +EXP_SC_UNDER_CASE4 = xr.DataArray( + data=[0, 0, 0.7, 0.7, 0, 0], + dims=["i"], + coords={ + "i": [1, 2, 3, 4, 5, 6], + }, +) +EXP_SC_OVER_CASE4 = xr.DataArray( + data=[0.3, 0.3, 0, 0, 0, 0], + dims=["i"], + coords={ + "i": [1, 2, 3, 4, 5, 6], + }, +) + +EXP_SC_CASE4 = xr.Dataset( + { + "firm_score": EXP_SC_TOTAL_CASE4, + "underforecast_penalty": EXP_SC_UNDER_CASE4, + "overforecast_penalty": EXP_SC_OVER_CASE4, + } +) + +EXP_SC_TOTAL_CASE5 = xr.DataArray( + data=[0.3, 0, 0.7, 0.0, 0.3, 0.7], + dims=["i"], + coords={ + "i": [1, 2, 3, 4, 5, 6], + }, +) +EXP_SC_UNDER_CASE5 = xr.DataArray( + data=[0, 0, 0.7, 0, 0, 0.7], + dims=["i"], + coords={ + "i": [1, 2, 3, 4, 5, 6], + }, +) +EXP_SC_OVER_CASE5 = xr.DataArray( + data=[0.3, 0.0, 0, 0, 0.3, 0], + dims=["i"], + coords={ + "i": [1, 2, 3, 4, 5, 6], + }, +) + +EXP_SC_CASE5 = xr.Dataset( + { + "firm_score": EXP_SC_TOTAL_CASE5, + "underforecast_penalty": EXP_SC_UNDER_CASE5, + "overforecast_penalty": EXP_SC_OVER_CASE5, + } +) + DA_FCST_FIRM = xr.DataArray( data=[ [[np.nan, 7, 4], [-100, 0, 1], [0, -100, 1]], diff --git a/tests/categorical/test_multicategorical.py b/tests/categorical/test_multicategorical.py index 49e79a500..ffffbbdcf 100644 --- a/tests/categorical/test_multicategorical.py +++ b/tests/categorical/test_multicategorical.py @@ -14,32 +14,32 @@ @pytest.mark.parametrize( - ("fcst", "obs", "categorical_threshold", "discount_distance", "expected"), + ("fcst", "obs", "categorical_threshold", "discount_distance", "threshold_assignment", "expected"), [ # Threshold 5, discount = 0, preserve all dims - (mtd.DA_FCST_SC, mtd.DA_OBS_SC, 5, 0, mtd.EXP_SC_CASE0), + (mtd.DA_FCST_SC, mtd.DA_OBS_SC, 5, 0, "lower", mtd.EXP_SC_CASE0), # Threshold -200, discount = 0, preserve 1 dim - (mtd.DA_FCST_SC, mtd.DA_OBS_SC, -200, 0, mtd.EXP_SC_CASE1), + (mtd.DA_FCST_SC, mtd.DA_OBS_SC, -200, 0, "lower", mtd.EXP_SC_CASE1), # Threshold 200, discount = 0, preserve 1 dim - (mtd.DA_FCST_SC, mtd.DA_OBS_SC, 200, 0, mtd.EXP_SC_CASE1), + (mtd.DA_FCST_SC, mtd.DA_OBS_SC, 200, 0, "lower", mtd.EXP_SC_CASE1), # Threshold 5, discount = 7, preserve all dims. # discount_distance is maximum for both false alarms and misses - (mtd.DA_FCST_SC, mtd.DA_OBS_SC, 5, 7, mtd.EXP_SC_CASE2), + (mtd.DA_FCST_SC, mtd.DA_OBS_SC, 5, 7, "lower", mtd.EXP_SC_CASE2), # Threshold 5, discount = 0.5, preserve all dims. # discount_distance is minimum for both false alarms and misses - (mtd.DA_FCST_SC, mtd.DA_OBS_SC, 5, 0.5, mtd.EXP_SC_CASE3), + (mtd.DA_FCST_SC, mtd.DA_OBS_SC, 5, 0.5, "lower", mtd.EXP_SC_CASE3), + # Test lower/right assignment + (mtd.DA_FCST_SC2, mtd.DA_OBS_SC2, 2, None, "lower", mtd.EXP_SC_CASE4), + # Test upper/left assignment + (mtd.DA_FCST_SC2, mtd.DA_OBS_SC2, 2, None, "upper", mtd.EXP_SC_CASE5), ], ) -def test__single_category_score(fcst, obs, categorical_threshold, discount_distance, expected): +def test__single_category_score(fcst, obs, categorical_threshold, discount_distance, threshold_assignment, expected): """Tests _single_category_score""" risk_parameter = 0.7 calculated = _single_category_score( - fcst, - obs, - risk_parameter, - categorical_threshold, - discount_distance, + fcst, obs, risk_parameter, categorical_threshold, discount_distance, threshold_assignment ) xr.testing.assert_allclose(calculated, expected) @@ -286,6 +286,7 @@ def test_firm_dask(): "weights", "preserve_dims", "discount_distance", + "threshold_assignment", "error_type", "error_msg_snippet", ), @@ -299,6 +300,7 @@ def test_firm_dask(): [], ["i", "j", "k"], 0, + "upper", ValueError, "`categorical_thresholds` must have at least", ), @@ -311,6 +313,7 @@ def test_firm_dask(): [1, 2], ["i", "j", "k"], 0, + "upper", ValueError, "`categorical_thresholds` and `weights`", ), @@ -323,6 +326,7 @@ def test_firm_dask(): [1], ["i", "j", "k"], 0, + "upper", ValueError, "0 < `risk_parameter` < 1 must", ), @@ -335,6 +339,7 @@ def test_firm_dask(): [1], ["i", "j", "k"], 0, + "upper", ValueError, "0 < `risk_parameter` < 1 must", ), @@ -347,6 +352,7 @@ def test_firm_dask(): [1, -1], ["i", "j", "k"], 0, + "upper", ValueError, "`weights` must be > 0", ), @@ -359,6 +365,7 @@ def test_firm_dask(): mtd.LIST_WEIGHTS_FIRM3, ["i", "j", "k"], 0, + "upper", ValueError, "value was found in index 0 of `weights", ), @@ -371,6 +378,7 @@ def test_firm_dask(): [1, 0], ["i", "j", "k"], 0, + "upper", ValueError, "`weights` must be > 0", ), @@ -383,6 +391,7 @@ def test_firm_dask(): mtd.LIST_WEIGHTS_FIRM4, ["i", "j", "k"], 0, + "upper", ValueError, "No values <= 0 are allowed in `weights`", ), @@ -395,6 +404,7 @@ def test_firm_dask(): mtd.LIST_WEIGHTS_FIRM5, ["i", "j", "k"], 0, + "upper", DimensionError, "of data object are not subset to", ), @@ -407,9 +417,23 @@ def test_firm_dask(): [1], ["i", "j", "k"], -1, + "upper", ValueError, "`discount_distance` must be >= 0", ), + # wrong threshold assignment + ( + mtd.DA_FCST_FIRM, + mtd.DA_OBS_FIRM, + 0.5, + [5], + [1], + ["i", "j", "k"], + 0.0, + "up", + ValueError, + """ `threshold_assignment` must be either \"upper\" or \"lower\" """, + ), ], ) def test_firm_raises( @@ -420,6 +444,7 @@ def test_firm_raises( weights, preserve_dims, discount_distance, + threshold_assignment, error_type, error_msg_snippet, ): @@ -436,4 +461,5 @@ def test_firm_raises( discount_distance, None, preserve_dims, + threshold_assignment=threshold_assignment, )