Skip to content

Commit

Permalink
Add brier_score_for_ensemble with fair brier correction (#735)
Browse files Browse the repository at this point in the history
* Add the new score "brier_score_for_ensemble" with the fair brier correction

Co-authored-by: Stephanie Chong <[email protected]>
Signed-off-by: Tennessee Leeuwenburg <[email protected]>
Co-authored-by: Tennessee Leeuwenburg <[email protected]>
Signed-off-by: Nicholas Loveday <[email protected]>
  • Loading branch information
3 people authored Dec 5, 2024
1 parent 2e60196 commit b35e928
Show file tree
Hide file tree
Showing 7 changed files with 2,820 additions and 160 deletions.
1 change: 1 addition & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
.. autofunction:: scores.probability.murphy_thetas
.. autofunction:: scores.probability.roc_curve_data
.. autofunction:: scores.probability.brier_score
.. autofunction:: scores.probability.brier_score_for_ensemble
.. autofunction:: scores.probability.isotonic_fit
```

Expand Down
4 changes: 4 additions & 0 deletions docs/included.md
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,10 @@
- [API](api.md#scores.probability.brier_score)
- [Tutorial](project:./tutorials/Brier_Score.md)
- [Brier (1950)](https://doi.org/10.1175/1520-0493(1950)078<0001:VOFEIT>2.0.CO;2)
* - Brier Score for Ensembles
- [API](api.md#scores.probability.brier_score_for_ensemble)
- [Tutorial](project:./tutorials/Brier_Score.md)
- [Ferro (2013)](https://doi.org/10.1002/qj.2270)
* - Continuous Ranked Probability Score (CRPS) for Cumulative Distribution Functions (CDFs)
-
-
Expand Down
3 changes: 2 additions & 1 deletion src/scores/probability/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""

from scores.continuous.murphy_impl import murphy_score, murphy_thetas
from scores.probability.brier_impl import brier_score
from scores.probability.brier_impl import brier_score, brier_score_for_ensemble
from scores.probability.crps_impl import (
adjust_fcst_for_crps,
crps_cdf,
Expand All @@ -21,6 +21,7 @@
"murphy_score",
"murphy_thetas",
"brier_score",
"brier_score_for_ensemble",
"adjust_fcst_for_crps",
"crps_cdf",
"crps_cdf_brier_decomposition",
Expand Down
181 changes: 177 additions & 4 deletions src/scores/probability/brier_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,18 @@
This module contains methods related to the Brier score
"""

from typing import Optional
import operator
from collections.abc import Sequence
from numbers import Real
from typing import Callable, Optional

import xarray as xr

from scores.continuous import mse
from scores.functions import apply_weights
from scores.processing import binary_discretise
from scores.typing import FlexibleDimensionTypes, XarrayLike
from scores.utils import check_binary
from scores.utils import check_binary, gather_dimensions


def brier_score(
Expand Down Expand Up @@ -42,8 +47,8 @@ def brier_score(
point (i.e. single-value comparison against observed), and the
forecast and observed dimensions must match precisely.
weights: Optionally provide an array for weighted averaging (e.g. by area, by latitude,
by population, custom)
check_args: will perform some tests on the data if set to True
by population, custom).
check_args: will perform some tests on the data if set to True.
Raises:
ValueError: if ``fcst`` contains non-nan values outside of the range [0, 1]
ValueError: if ``obs`` contains non-nan values not in the set {0, 1}
Expand All @@ -52,6 +57,9 @@ def brier_score(
- Brier, G. W. (1950). Verification of forecasts expressed in terms of probability.
Monthly Weather Review, 78(1), 1-3. https://doi.org/fp62r6
- https://en.wikipedia.org/wiki/Brier_score
See Also:
- :py:func:`scores.probability.brier_score_for_ensemble`
"""
if check_args:
error_msg = ValueError("`fcst` contains values outside of the range [0, 1]")
Expand All @@ -65,3 +73,168 @@ def brier_score(
check_binary(obs, "obs")

return mse(fcst, obs, reduce_dims=reduce_dims, preserve_dims=preserve_dims, weights=weights)


def brier_score_for_ensemble(
fcst: XarrayLike, # type: ignore
obs: XarrayLike, # type: ignore
ensemble_member_dim: str,
event_thresholds: Real | Sequence[Real],
*, # Force keywords arguments to be keyword-only
reduce_dims: Optional[FlexibleDimensionTypes] = None,
preserve_dims: Optional[FlexibleDimensionTypes] = None,
weights: Optional[xr.DataArray] = None,
fair_correction: bool = True,
event_threshold_operator: Callable = operator.ge,
threshold_dim: str = "threshold",
) -> XarrayLike: # type: ignore
"""
Calculates the Brier score for an ensemble forecast for the provided thresholds. By default,
the fair Brier score is calculated, which is a modified version of the Brier score that
applies a correction related to the number of ensemble members and the number of
ensemble members that predict the event.
Given an event defined by ``event_threshold``, the fair Brier score for ensembles is defined as
.. math::
s_{i,y} = \\left(\\frac{i}{m} - y\\right)^2 - \\frac{i(m-i)}{m^2(m-1)}
Where:
- :math:`i` is the number of ensemble members that predict the event
- :math:`m` is the total number of ensemble members
- :math:`y` is the binary observation (0 if the event was not observed or 1 otherwise)
When the fair correction is not applied (i.e., ``fair_correction=False``),
the Brier score is calculated as:
.. math::
s_{i,y} = \\left(\\frac{i}{m} - y\\right)^2
The fair correction will only be applied at coordinates where the number of ensemble members
is greater than 1 and ``fair_correction=True``.
When the ``fair_correction`` is set to ``True`` and the number of ensemble members is 1, the
fair correction will not be applied to avoid division by zero. If you want to
set the minimum number of ensemble members required to calculate the Brier score,
we recommend preprocessing your data to remove data points with fewer than the
minimum number of ensemble members that you want.
Args:
fcst: Real valued ensemble forecasts in xarray. The proportion of ensemble members
above a threshold is calculated within this function.
obs: Real valued observations in xarray. These values are converted to binary
values in this function based on the supplied event thresholds and operators.
ensemble_member_dim: The dimension name of the ensemble member.
event_thresholds: The threshold(s) that define what is considered an event. If
multiple thresholds are provided, they should be monotonically increasing with no NaNs.
reduce_dims: Optionally specify which dimensions to reduce when
calculating the Brier score. All other dimensions will be preserved.
preserve_dims: Optionally specify which dimensions to preserve when
calculating the score. All other dimensions will be reduced. As a
special case, 'all' will allow all dimensions to be preserved. In
this case, the result will be in the same shape/dimensionality
as the forecast, and the errors will be the absolute error at each
point (i.e. single-value comparison against observed), and the
forecast and observed dimensions must match precisely.
weights: Optionally provide an array for weighted averaging (e.g. by area, by latitude)
when calculating the mean score across specified dimensions via ``reduce_dims`` or ``preserve_dims``.
fair_correction: Whether or not to apply the fair Brier score correction. Default is True.
event_threshold_operator: The mode to use to define an event relative to a supplied threshold.
Default is ``operator.ge``, which means that an event occurs
if the observation is greater than or equal to the threshold.
Similarly, an event will have been considered to be forecast by an ensemble member
if the forecast is greater than or equal to the threshold.
Passing in ``operator.lt`` will give the same results as it is mathematically
equivalent in this case. Alternatively, you can provide
``operator.gt`` which means that an event occurs if the observation
is strictly greater than the threshold. Again, an event for an ensemble member
will have been considered to be forecast if the forecast is greater than the threshold.
Using``operator.le`` will give the same results as ``operator.gt`` as it is mathematically
equivalent in this case.
threshold_dim: The name of the threshold dimension in the output. Default is 'threshold'.
It must not exist as a dimension in the forecast or observation data.
Returns:
The Brier score for the ensemble forecast.
Raises:
ValueError: if values in ``event_thresholds`` are not monotonically increasing.
ValueError: if ``fcst`` contains the dimension provided in ``threshold_dim``.
ValueError: if ``obs`` contains the dimension provided in ``threshold_dim``.
ValueError: if ``weights`` contains the dimension provided in ``threshold_dim``.
ValueError: if ``ensemble_member_dim`` is not in ``fcst`` dimensions.
References:
- Ferro, C. A. T. (2013). Fair scores for ensemble forecasts. Quarterly
Journal of the Royal Meteorological Society, 140(683), 1917–1923.
https://doi.org/10.1002/qj.2270
See Also:
- :py:func:`scores.probability.brier_score`
Examples:
Calculate the Brier score for an ensemble forecast for a single threshold:
>>> import numpy as np
>>> import xarray as xr
>>> from scores.probability import brier_score_for_ensemble
>>> fcst = 10 * xr.DataArray(np.random.rand(10, 10), dims=['time', 'ensemble'])
>>> obs = 10 * xr.DataArray(np.random.rand(10), dims=['time'])
>>> brier_score_for_ensemble(fcst, obs, ensemble_member_dim='ensemble', thresholds=0.5)
Calculate the Brier score for an ensemble forecast for multiple thresholds:
>>> thresholds = [0.1, 5, 9]
>>> brier_score_for_ensemble(fcst, obs, ensemble_member_dim='ensemble', thresholds=thresholds)
"""
if event_threshold_operator not in [operator.ge, operator.gt, operator.le, operator.lt]:
raise ValueError("event_threshold_operator must be one of operator.ge, operator.gt, operator.le, operator.lt")
if threshold_dim in fcst.dims:
raise ValueError("threshold_dim is not allowed to be the same as a dim in fcst")
if threshold_dim in obs.dims:
raise ValueError("threshold_dim is not allowed to be the same as a dim in obs")

weights_dims = None
if weights is not None:
if threshold_dim in weights.dims:
raise ValueError("threshold_dim is not allowed to be the same as a dim in weights")
weights_dims = weights.dims

dims_for_mean = gather_dimensions(
fcst.dims,
obs.dims,
weights_dims=weights_dims,
reduce_dims=reduce_dims,
preserve_dims=preserve_dims,
score_specific_fcst_dims=ensemble_member_dim,
)
if isinstance(event_thresholds, (float, int)):
event_thresholds = [event_thresholds]
thresholds_xr = xr.DataArray(event_thresholds, dims=[threshold_dim], coords={threshold_dim: event_thresholds})

# calculate i term in equation
member_event_count = event_threshold_operator(fcst, thresholds_xr).sum(dim=ensemble_member_dim)
# calculate m term in equation
total_member_count = fcst.notnull().sum(dim=ensemble_member_dim)

binary_obs = binary_discretise(obs, event_thresholds, event_threshold_operator)

result = (member_event_count / total_member_count - binary_obs) ** 2
if fair_correction:
fair_corr = (member_event_count * (total_member_count - member_event_count)) / (
total_member_count**2 * (total_member_count - 1)
)
# For coordinates where the ensemble member count is 1, the fair correction will
# have a NaN value. In this situation, we set the fair correction to 0 at these coordinates.
# This may be important if you have a lagged ensemble where at least one time step
# has only a single ensemble member
fair_correction = fair_corr.fillna(0)
result -= fair_correction

# apply weights and take means across specified dims
result = apply_weights(result, weights=weights).mean(dim=dims_for_mean)

return result
Loading

0 comments on commit b35e928

Please sign in to comment.