nci · tennlee · Feb 14, 2024 · Dec 15, 2023 · Jan 18, 2024 · Jan 21, 2024
diff --git a/.github/workflows/run-pre-commit.yml b/.github/workflows/run-pre-commit.yml
@@ -12,7 +12,7 @@ permissions:
 jobs:
   build:
 
-    runs-on: ubuntu-latest   
+    runs-on: ubuntu-latest
 
     steps:
     - uses: actions/checkout@v3

diff --git a/docs/api.md b/docs/api.md
@@ -18,6 +18,7 @@
 .. autofunction:: scores.continuous.murphy_score
 .. autofunction:: scores.continuous.murphy.thetas
 .. autofunction:: scores.continuous.flip_flop_index
+.. autofunction:: scores.continuous.flip_flop_index_proportion_exceeding
 ```
 
 ## scores.probability

diff --git a/docs/contributing.md b/docs/contributing.md
@@ -7,13 +7,10 @@ Types of contributions include bug reports, merge requests, feature requests, an
 These guidelines aim to make it clear how to collaborate effectively.
 
 ## Roadmap
-
-1. Further development of examples and demonstrations for new users
-2. Set up of readthedocs and travis-ci
-3. Add further tests for NaNs and missing data
-4. Addition of more standard metrics
-5. Addition of more novel metrics
-6. Performance review and tests for dask
+1. Addition of more scores, metrics and statistical techniques
+2. Further optimisation and performance improvements
+3. Increased support for machine learning library integration
+4. Additional notebooks exploring complex use cases in depth
 
 ## Bug Reports and Feature Requests
 

diff --git a/src/scores/continuous/__init__.py b/src/scores/continuous/__init__.py
@@ -1,7 +1,10 @@
 """
 Import the functions from the implementations into the public API
 """
-from scores.continuous.flip_flop_impl import flip_flop_index
+from scores.continuous.flip_flop_impl import (
+    flip_flop_index,
+    flip_flop_index_proportion_exceeding,
+)
 from scores.continuous.isoreg import isotonic_fit
 from scores.continuous.murphy_impl import murphy_score, murphy_thetas
 from scores.continuous.quantile_loss_impl import quantile_score

diff --git a/src/scores/continuous/flip_flop_impl.py b/src/scores/continuous/flip_flop_impl.py
@@ -9,7 +9,8 @@
 import xarray as xr
 
 from scores.functions import angular_difference
-from scores.typing import XarrayLike
+from scores.processing import proportion_exceeding
+from scores.typing import FlexibleDimensionTypes, XarrayLike
 from scores.utils import DimensionError, check_dims, dims_complement
 
 
@@ -34,8 +35,7 @@ def _flip_flop_index(data: xr.DataArray, sampling_dim: str, is_angular: bool = F
     """
     # check that `sampling_dim` is in `data`.
     check_dims(data, [sampling_dim], mode="superset")
-
-    # the maximum possible number of discrete flipflops
+    # the maximum possible number of discrete flip_flops
     sequence_length = len(data[sampling_dim])
     max_possible_flip_flop_count = sequence_length - 2
 
@@ -50,21 +50,21 @@ def _flip_flop_index(data: xr.DataArray, sampling_dim: str, is_angular: bool = F
         # maximum possible angular difference between two forecasts
         enc_size = encompassing_sector_size(data=data, dims=dims_to_preserve)
         range_val = np.clip(enc_size, a_min=None, a_max=180.0)
-        flipflop = angular_difference(data.shift({sampling_dim: 1}), data)
+        flip_flop = angular_difference(data.shift({sampling_dim: 1}), data)
     else:
         max_val = data.max(dim=sampling_dim, skipna=False)
         min_val = data.min(dim=sampling_dim, skipna=False)
         range_val = max_val - min_val
         # subtract each consecutive 'row' from eachother
-        flipflop = data.shift({sampling_dim: 1}) - data
+        flip_flop = data.shift({sampling_dim: 1}) - data
 
     # take the absolute value and sum.
     # I don't do skipna=False here because .shift makes a row of nan
-    flipflop = abs(flipflop).sum(dim=sampling_dim)
+    flip_flop = abs(flip_flop).sum(dim=sampling_dim)
     # adjust based on the range. This is where nan will be introduced.
-    flipflop = flipflop - range_val
+    flip_flop = flip_flop - range_val
     # normalise
-    return flipflop / max_possible_flip_flop_count
+    return flip_flop / max_possible_flip_flop_count
 
 
 # If there are selections, a DataSet is always returned
@@ -98,7 +98,7 @@ def flip_flop_index(
             direction).
         **selections: Additional keyword arguments specify
             subsets to draw from the dimension `sampling_dim` of the supplied `data`
-            before calculation of the flipflop index. e.g. days123=[1, 2, 3]
+            before calculation of the flip_flop index. e.g. days123=[1, 2, 3]
 
     Returns:
         If `selections` are not supplied: An xarray.DataArray, the Flip-flop
@@ -348,3 +348,93 @@ def _encompassing_sector_size_np(
     n_unique_angles = (angular_diffs != 0).sum(axis=0)
     result = np.where(n_unique_angles <= 2, np.max(angular_diffs, axis=0), result)
     return result
+
+
+def flip_flop_index_proportion_exceeding(
+    data: xr.DataArray,
+    sampling_dim: str,
+    thresholds: Iterable,
+    is_angular: bool = False,
+    preserve_dims: FlexibleDimensionTypes = None,
+    reduce_dims: FlexibleDimensionTypes = None,
+    **selections: Iterable[int],
+):
+    """
+    Calculates the flip-flop index and returns the proportion exceeding
+    (or equal to) each of the supplied `thresholds`.
+
+    Args:
+        data: Data from which to draw subsets.
+        sampling_dim: The name of the dimension along which to calculate
+        thresholds: The proportion of Flip-Flop index results
+            equal to or exceeding these thresholds will be calculated.
+            the flip-flop index.
+        is_angular: specifies whether `data` is directional data (e.g. wind
+            direction).
+        reduce_dims: Dimensions to reduce.
+        preserve_dims: Dimensions to preserve.
+        **selections: Additional keyword arguments specify
+            subsets to draw from the dimension `sampling_dim` of the supplied `data`
+            before calculation of the flip_flop index. e.g. days123=[1, 2, 3]
+    Returns:
+        If `selections` are not supplied - An xarray.DataArray with dimensions
+        `dims` + 'threshold'. The DataArray is the proportion of the Flip-flop
+        Index calculated by collapsing dimension `sampling_dim` exceeding or
+        equal to `thresholds`.
+
+        If `selections` are supplied - An xarray.Dataset with dimensions `dims`
+        + 'threshold'. There is a data variable for each keyword in
+        `selections`, and corresponds to the Flip-Flop Index proportion
+        exceeding for the subset of data specified by the keyword values.
+
+    Examples:
+        >>> data = xr.DataArray(
+        ...     [[50, 20, 40, 80], [10, 50, 10, 100], [0, 30, 20, 50]],
+        ...     dims=['station_number', 'lead_day'],
+        ...     coords=[[10001, 10002, 10003], [1, 2, 3, 4]]
+        ... )
+
+        >>> flip_flop_index_proportion_exceeding(data, 'lead_day', [20])
+        <xarray.DataArray (threshold: 1)>
+        array([ 0.33333333])
+        Coordinates:
+          * threshold  (threshold) int64 20
+        Attributes:
+            sampling_dim: lead_day
+
+        >>> flip_flop_index_proportion_exceeding(
+        ...     data, 'lead_day', [20], days123=[1, 2, 3], all_days=[1, 2, 3, 4]
+        ... )
+        <xarray.Dataset>
+        Dimensions:    (threshold: 1)
+        Coordinates:
+          * threshold  (threshold) int64 20
+        Data variables:
+            days123    (threshold) float64 0.6667
+            all_days   (threshold) float64 0.3333
+        Attributes:
+            selections: {{'days123': [1, 2, 3], 'all_days': [1, 2, 3, 4]}}
+            sampling_dim: lead_day
+
+    See also:
+        `scores.continuous.flip_flop_index`
+
+    """
+    if preserve_dims is not None and sampling_dim in list(preserve_dims):
+        raise DimensionError(
+            f"`sampling_dim`: '{sampling_dim}' must not be in dimensions to preserve "
+            f"`preserve_dims`: {list(preserve_dims)}"
+        )
+    if reduce_dims is not None and sampling_dim in list(reduce_dims):
+        raise DimensionError(
+            f"`sampling_dim`: '{sampling_dim}' must not be in dimensions to reduce "
+            f"`reduce_dims`: {list(reduce_dims)}"
+        )
+    # calculate the flip-flop index
+    flip_flop_data = flip_flop_index(data, sampling_dim, is_angular=is_angular, **selections)
+    # calculate the proportion exceeding each threshold
+    flip_flop_exceeding = proportion_exceeding(flip_flop_data, thresholds, reduce_dims, preserve_dims)
+    # overwrite the attributes
+    flip_flop_exceeding.attrs = flip_flop_data.attrs
+
+    return flip_flop_exceeding
diff --git a/src/scores/processing.py b/src/scores/processing.py
@@ -1,12 +1,14 @@
 """Tools for processing data for verification"""
 import operator
+from collections.abc import Iterable
 from typing import Optional, Union
 
 import numpy as np
 import pandas as pd
 import xarray as xr
 
 from scores.typing import FlexibleDimensionTypes, XarrayLike
+from scores.utils import gather_dimensions
 
 INEQUALITY_MODES = {
     ">=": (operator.ge, -1),
@@ -260,3 +262,123 @@ def update_mask(mask, data_array):
 
     # return matched data objects
     return tuple(arg.where(mask) for arg in args)
+
+
+def proportion_exceeding(
+    data: XarrayLike,
+    thresholds: Iterable,
+    preserve_dims: FlexibleDimensionTypes = None,
+    reduce_dims: FlexibleDimensionTypes = None,
+):
+    """
+    Calculates the proportion of `data` equal to or exceeding `thresholds`.
+
+    Args:
+        data (xarray.Dataset or xarray.DataArray): The data from which
+            to calculate the proportion exceeding `thresholds`
+        thresholds (iterable): The proportion of Flip-Flop index results
+            equal to or exceeding these thresholds will be calculated.
+            the flip-flop index.
+        reduce_dims: Dimensions to reduce.
+        preserve_dims: Dimensions to preserve.
+
+    Returns:
+        An xarray data object with the type of `data` and dimensions
+        `dims` + 'threshold'. The values are the proportion of `data`
+        that are greater than or equal to the corresponding threshold.
+
+    """
+    return _binary_discretise_proportion(data, thresholds, ">=", preserve_dims, reduce_dims)
+
+
+def _binary_discretise_proportion(
+    data: XarrayLike,
+    thresholds: Iterable,
+    mode: str,
+    preserve_dims: FlexibleDimensionTypes = None,
+    reduce_dims: FlexibleDimensionTypes = None,
+    abs_tolerance: Optional[bool] = None,
+    autosqueeze: bool = False,
+):
+    """
+    Returns the proportion of `data` in each category. The categories are
+    defined by the relationship of data to threshold as specified by
+    the operation `mode`.
+
+    Args:
+        data: The data to convert
+           into 0 and 1 according the thresholds before calculating the
+           proportion.
+        thresholds: The proportion of Flip-Flop index results
+            equal to or exceeding these thresholds will be calculated.
+            the flip-flop index.
+        mode: Specifies the required relation of `data` to `thresholds`
+            for a value to fall in the 'event' category (i.e. assigned to 1).
+            Allowed modes are:
+
+            - '>=' values in `data` greater than or equal to the
+              corresponding threshold are assigned as 1.
+            - '>' values in `data` greater than the corresponding threshold
+              are assigned as 1.
+            - '<=' values in `data` less than or equal to the corresponding
+              threshold are assigned as 1.
+            - '<' values in `data` less than the corresponding threshold
+              are assigned as 1.
+            - '==' values in `data` equal to the corresponding threshold
+              are assigned as 1
+            - '!=' values in `data` not equal to the corresponding threshold
+              are assigned as 1.
+        reduce_dims: Dimensions to reduce.
+        preserve_dims: Dimensions to preserve.
+        abs_tolerance: If supplied, values in data that are
+            within abs_tolerance of a threshold are considered to be equal to
+            that threshold. This is generally used to correct for floating
+            point rounding, e.g. we may want to consider 1.0000000000000002 as
+            equal to 1.
+        autosqueeze: If True and only one threshold is
+            supplied, then the dimension 'threshold' is squeezed out of the
+            output. If `thresholds` is float-like, then this is forced to
+            True, otherwise defaults to False.
+
+    Returns:
+        An xarray data object with the type of `data`, dimension `dims` +
+        'threshold'. The values of the output are the proportion of `data` that
+        satisfy the relationship to `thresholds` as specified by `mode`.
+
+    Examples:
+
+        >>> data = xr.DataArray([0, 0.5, 0.5, 1])
+
+        >>> _binary_discretise_proportion(data, [0, 0.5, 1], '==')
+        <xarray.DataArray (threshold: 3)>
+        array([ 0.25,  0.5 ,  0.25])
+        Coordinates:
+          * threshold  (threshold) float64 0.0 0.5 1.0
+        Attributes:
+            discretisation_tolerance: 0
+            discretisation_mode: ==
+
+        >>> _binary_discretise_proportion(data, [0, 0.5, 1], '>=')
+        <xarray.DataArray (threshold: 3)>
+        array([ 1.  ,  0.75,  0.25])
+        Coordinates:
+          * threshold  (threshold) float64 0.0 0.5 1.0
+        Attributes:
+            discretisation_tolerance: 0
+            discretisation_mode: >=
+
+    See also:
+        `scores.processing.binary_discretise`
+
+    """
+    # values are 1 when (data {mode} threshold), and 0 when ~(data {mode} threshold).
+    discrete_data = binary_discretise(data, thresholds, mode, abs_tolerance=abs_tolerance, autosqueeze=autosqueeze)
+
+    # The proportion in each category
+    dims = gather_dimensions(data.dims, data.dims, preserve_dims, reduce_dims)
+    proportion = discrete_data.mean(dim=dims)
+
+    # attach attributes
+    proportion.attrs = discrete_data.attrs
+
+    return proportion
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,7 +12,7 @@ permissions: @@
     jobs:
       build:
-        runs-on: ubuntu-latest
+        runs-on: ubuntu-latest
         steps:
         - uses: actions/checkout@v3
@@ Expand Down @@