Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEAT] CRPS, MSSE and Energy Score metrics #134

Merged
merged 7 commits into from
Dec 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion hierarchicalforecast/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,19 @@
'hierarchicalforecast.evaluation.HierarchicalEvaluation.__init__': ( 'evaluation.html#hierarchicalevaluation.__init__',
'hierarchicalforecast/evaluation.py'),
'hierarchicalforecast.evaluation.HierarchicalEvaluation.evaluate': ( 'evaluation.html#hierarchicalevaluation.evaluate',
'hierarchicalforecast/evaluation.py')},
'hierarchicalforecast/evaluation.py'),
'hierarchicalforecast.evaluation._metric_protections': ( 'evaluation.html#_metric_protections',
'hierarchicalforecast/evaluation.py'),
'hierarchicalforecast.evaluation.energy_score': ( 'evaluation.html#energy_score',
'hierarchicalforecast/evaluation.py'),
'hierarchicalforecast.evaluation.mqloss': ( 'evaluation.html#mqloss',
'hierarchicalforecast/evaluation.py'),
'hierarchicalforecast.evaluation.mse': ( 'evaluation.html#mse',
'hierarchicalforecast/evaluation.py'),
'hierarchicalforecast.evaluation.msse': ( 'evaluation.html#msse',
'hierarchicalforecast/evaluation.py'),
'hierarchicalforecast.evaluation.scaled_crps': ( 'evaluation.html#scaled_crps',
'hierarchicalforecast/evaluation.py')},
'hierarchicalforecast.methods': { 'hierarchicalforecast.methods.BottomUp': ( 'methods.html#bottomup',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.BottomUp._get_PW_matrices': ( 'methods.html#bottomup._get_pw_matrices',
Expand Down
9 changes: 3 additions & 6 deletions hierarchicalforecast/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def _build_fn_name(fn) -> str:
def _reverse_engineer_sigmah(Y_hat_df, y_hat, model_name, uids):
"""
This function assumes that the model creates prediction intervals
under a normality assumption with the following the Equation:
under a normality with the following the Equation:
$\hat{y}_{t+h} + c \hat{sigma}_{h}$

In the future, we might deprecate this function in favor of a
Expand Down Expand Up @@ -93,8 +93,8 @@ def reconcile(self,
intervals_method: str = 'normality'):
"""Hierarchical Reconciliation Method.

The `reconcile` method is analogous to SKLearn `fit` method, it applies different
reconciliation methods instantiated in the `reconcilers` list.
The `reconcile` method is analogous to SKLearn `fit_predict` method, it
applies different reconciliation methods instantiated in the `reconcilers` list.

Most reconciliation methods can be described by the following convenient
linear algebra notation:
Expand Down Expand Up @@ -207,9 +207,6 @@ def reconcile(self,
end = time.time()
self.execution_times[f'{model_name}/{reconcile_fn_name}'] = (end - start)

end = time.time()
self.execution_times[f'{model_name}/{reconcile_fn_name}'] = (end - start)

if self.insample and has_fitted:
del reconciler_args['y_hat_insample']
gc.collect()
Expand Down
230 changes: 225 additions & 5 deletions hierarchicalforecast/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,235 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/evaluation.ipynb.

# %% auto 0
__all__ = ['HierarchicalEvaluation']
__all__ = ['msse', 'scaled_crps', 'energy_score', 'HierarchicalEvaluation']

# %% ../nbs/evaluation.ipynb 2
# %% ../nbs/evaluation.ipynb 3
from inspect import signature
from typing import Callable, Dict, List, Optional
from typing import Callable, Dict, List, Optional, Union

import numpy as np
import pandas as pd

# %% ../nbs/evaluation.ipynb 5
class HierarchicalEvaluation:
# %% ../nbs/evaluation.ipynb 6
def _metric_protections(y: np.ndarray, y_hat: np.ndarray,
weights: Optional[np.ndarray]) -> None:
if not ((weights is None) or (np.sum(weights) > 0)):
raise Exception('Sum of `weights` cannot be 0')
if not ((weights is None) or (weights.shape == y.shape)):
raise Exception(
f'Wrong weight dimension weights.shape {weights.shape}, y.shape {y.shape}')

def mse(y: np.ndarray, y_hat: np.ndarray,
weights: Optional[np.ndarray] = None,
axis: Optional[int] = None) -> Union[float, np.ndarray]:
"""Mean Squared Error

Calculates Mean Squared Error between
`y` and `y_hat`. MSE measures the relative prediction
accuracy of a forecasting method by calculating the
squared deviation of the prediction and the true
value at a given time, and averages these devations
over the length of the series.

$$ \mathrm{MSE}(\\mathbf{y}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} (y_{\\tau} - \hat{y}_{\\tau})^{2} $$

**Parameters:**<br>
`y`: numpy array, Actual values.<br>
`y_hat`: numpy array, Predicted values.<br>
`mask`: numpy array, Specifies date stamps per serie to consider in loss.<br>

**Returns:**<br>
`mse`: numpy array, (single value).
"""
_metric_protections(y, y_hat, weights)

delta_y = np.square(y - y_hat)
if weights is not None:
mse = np.average(delta_y[~np.isnan(delta_y)],
weights=weights[~np.isnan(delta_y)],
axis=axis)
else:
mse = np.nanmean(delta_y, axis=axis)
return mse

def mqloss(y: np.ndarray, y_hat: np.ndarray,
quantiles: np.ndarray,
weights: Optional[np.ndarray] = None,
axis: Optional[int] = None) -> Union[float, np.ndarray]:
"""Multi-Quantile Loss

Calculates the Multi-Quantile loss (MQL) between `y` and `y_hat`.
MQL calculates the average multi-quantile Loss for
a given set of quantiles, based on the absolute
difference between predicted quantiles and observed values.

$$ \mathrm{MQL}(\\mathbf{y}_{\\tau},[\\mathbf{\hat{y}}^{(q_{1})}_{\\tau}, ... ,\hat{y}^{(q_{n})}_{\\tau}]) = \\frac{1}{n} \\sum_{q_{i}} \mathrm{QL}(\\mathbf{y}_{\\tau}, \\mathbf{\hat{y}}^{(q_{i})}_{\\tau}) $$

The limit behavior of MQL allows to measure the accuracy
of a full predictive distribution $\mathbf{\hat{F}}_{\\tau}$ with
the continuous ranked probability score (CRPS). This can be achieved
through a numerical integration technique, that discretizes the quantiles
and treats the CRPS integral with a left Riemann approximation, averaging over
uniformly distanced quantiles.

$$ \mathrm{CRPS}(y_{\\tau}, \mathbf{\hat{F}}_{\\tau}) = \int^{1}_{0} \mathrm{QL}(y_{\\tau}, \hat{y}^{(q)}_{\\tau}) dq $$

**Parameters:**<br>
`y`: numpy array, Actual values.<br>
`y_hat`: numpy array, Predicted values.<br>
`quantiles`: numpy array. Quantiles between 0 and 1, to perform evaluation upon size (n_quantiles).<br>
`mask`: numpy array, Specifies date stamps per serie to consider in loss.<br>

**Returns:**<br>
`mqloss`: numpy array, (single value).

**References:**<br>
[Roger Koenker and Gilbert Bassett, Jr., "Regression Quantiles".](https://www.jstor.org/stable/1913643)<br>
[James E. Matheson and Robert L. Winkler, "Scoring Rules for Continuous Probability Distributions".](https://www.jstor.org/stable/2629907)
"""
if weights is None: weights = np.ones(y.shape)
if (np.sum(quantiles>1)>0 or np.sum(quantiles<0)>0):
raise Exception('`quantiles` need to be between 0 and 1')

_metric_protections(y, y_hat, weights)
n_q = len(quantiles)

y_rep = np.expand_dims(y, axis=-1)
error = y_hat - y_rep
sq = np.maximum(-error, np.zeros_like(error))
s1_q = np.maximum(error, np.zeros_like(error))
mqloss = (quantiles * sq + (1 - quantiles) * s1_q)

# Match y/weights dimensions and compute weighted average
weights = np.repeat(np.expand_dims(weights, axis=-1), repeats=n_q, axis=-1)
mqloss = np.average(mqloss, weights=weights, axis=axis)

return mqloss

# %% ../nbs/evaluation.ipynb 8
def msse(y, y_hat, y_train, mask=None):
"""Mean Squared Scaled Error

Computes Mean squared scaled error (MSSE), as proposed by Hyndman & Koehler (2006)
as an alternative to percentage errors, to avoid measure unstability.

$$ \mathrm{MSSE}(\\mathbf{y}, \\mathbf{\hat{y}}, \\mathbf{\hat{y}}^{naive1}) =
\\frac{\mathrm{MSE}(\\mathbf{y}, \\mathbf{\hat{y}})}{\mathrm{MSE}(\\mathbf{y}, \\mathbf{\hat{y}}^{naive1})} $$

**Parameters:**<br>
`y`: numpy array, Actual values of size (`n_series`, `horizon`).<br>
`y_hat`: numpy array, Predicted values (`n_series`, `horizon`).<br>
`mask`: numpy array, Specifies date stamps per serie to consider in loss.<br>

**Returns:**<br>
`loss`: float.

**References:**<br>
- [Hyndman, R. J and Koehler, A. B. (2006).
"Another look at measures of forecast accuracy",
International Journal of Forecasting, Volume 22, Issue 4.](https://www.sciencedirect.com/science/article/pii/S0169207006000239)
- [Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker.
"Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures.
Submitted to the International Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)
"""
if mask is None:
mask = np.ones_like(y)
n_series, horizon = y.shape

eps = np.finfo(float).eps
y_naive = np.repeat(y_train[:,[-1]], horizon, axis=1)
norm = mse(y=y, y_hat=y_naive)
loss = mse(y=y, y_hat=y_hat, weights=mask)
loss = loss / (norm + eps)
return loss

# %% ../nbs/evaluation.ipynb 11
def scaled_crps(y, y_hat, quantiles):
"""Scaled Continues Ranked Probability Score

Calculates a scaled variation of the CRPS, as proposed by Rangapuram (2021),
to measure the accuracy of predicted quantiles `y_hat` compared to the observation `y`.

This metric averages percentual weighted absolute deviations as
defined by the quantile losses.

$$ \mathrm{sCRPS}(\hat{F}_{\\tau}, \mathbf{y}_{\\tau}) = 2 \sum_{i}
\int^{1}_{0}
\\frac{\mathrm{QL}(\hat{F}_{i,\\tau}, y_{i,\\tau})_{q}}{\sum_{i} | y_{i,\\tau} |} dq $$

where $\hat{F}_{\\tau}$ is the an estimated multivariate distribution, and $y_{i,\\tau}$
are its realizations.

**Parameters:**<br>
`y`: numpy array, Actual values of size (`n_series`, `horizon`).<br>
`y_hat`: numpy array, Predicted quantiles of size (`n_series`, `horizon`, `n_quantiles`).<br>
`quantiles`: numpy array,(`n_quantiles`). Quantiles to estimate from the distribution of y.<br>

**Returns:**<br>
`loss`: float.

**References:**<br>
- [Gneiting, Tilmann. (2011). \"Quantiles as optimal point forecasts\".
International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207010000063)
- [Spyros Makridakis, Evangelos Spiliotis, Vassilios Assimakopoulos, Zhi Chen, Anil Gaba, Ilia Tsetlin, Robert L. Winkler. (2022).
\"The M5 uncertainty competition: Results, findings and conclusions\".
International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207021001722)
- [Syama Sundar Rangapuram, Lucien D Werner, Konstantinos Benidis, Pedro Mercado, Jan Gasthaus, Tim Januschowski. (2021).
\"End-to-End Learning of Coherent Probabilistic Forecasts for Hierarchical Time Series\".
Proceedings of the 38th International Conference on Machine Learning (ICML).](https://proceedings.mlr.press/v139/rangapuram21a.html)
"""
eps = np.finfo(float).eps
norm = np.sum(np.abs(y))
loss = mqloss(y=y, y_hat=y_hat, quantiles=quantiles)
loss = 2 * loss * np.sum(np.ones(y.shape)) / (norm + eps)
return loss

# %% ../nbs/evaluation.ipynb 14
def energy_score(y, y_sample1, y_sample2, beta=2):
"""Energy Score

Calculates Gneiting's Energy Score sample approximation for
`y` and independent multivariate samples `y_sample1` and `y_sample2`.
The Energy Score generalizes the CRPS (`beta`=1) in the multivariate setting.

$$ \mathrm{ES}(\\mathbf{y}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}')
= \\frac{1}{2} \mathbb{E}_{\hat{P}} \\left[ ||\\mathbf{\hat{y}}_{\\tau} - \\mathbf{\hat{y}}_{\\tau}'||^{\\beta} \\right]
- \mathbb{E}_{\hat{P}} \\left[ ||\\mathbf{y}_{\\tau} - \\mathbf{\hat{y}}_{\\tau}||^{\\beta} \\right]
\quad \\beta \in (0,2]$$

where $\\mathbf{\hat{y}}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}'$ are independent samples drawn from $\hat{P}$.

**Parameters:**<br>
`y`: numpy array, Actual values of size (`n_series`, `horizon`).<br>
`y_sample1`: numpy array, predictive distribution sample of size (`n_series`, `horizon`, `n_samples`).<br>
`y_sample2`: numpy array, predictive distribution sample of size (`n_series`, `horizon`, `n_samples`).<br>
`beta`: float in (0,2], defines the energy score's power for the euclidean metric.<br>

**Returns:**<br>
`loss`: float.

**References:**<br>
- [Gneiting, Tilmann, and Adrian E. Raftery. (2007).
\"Strictly proper scoring rules, prediction and estimation\".
Journal of the American Statistical Association.](https://sites.stat.washington.edu/raftery/Research/PDF/Gneiting2007jasa.pdf)
- [Anastasios Panagiotelis, Puwasala Gamakumara, George Athanasopoulos, Rob J. Hyndman. (2022).
\"Probabilistic forecast reconciliation: Properties, evaluation and score optimisation\".
European Journal of Operational Research.](https://www.sciencedirect.com/science/article/pii/S0377221722006087)
"""
if beta>2 or beta<0:
raise Exception("beta needs to be between 0 and 2.")

dif1 = (y_sample1 - y_sample2)
dif2 = (y[:,:,None] - y_sample1)

term1 = np.linalg.norm(dif1, axis=0) ** beta
term2 = np.linalg.norm(dif2, axis=0) ** beta

loss = np.mean(term2 - 0.5 * term1)
return loss

# %% ../nbs/evaluation.ipynb 17
class HierarchicalEvaluation:
"""Hierarchical Evaluation Class.

You can use your own metrics to evaluate the performance of each level in the structure.
Expand All @@ -23,6 +241,8 @@ class HierarchicalEvaluation:

**Parameters:**<br>
`evaluators`: functions with arguments `y`, `y_hat` (numpy arrays).<br>

**References:**<br>
"""
def __init__(self,
evaluators: List[Callable]):
Expand Down
2 changes: 0 additions & 2 deletions hierarchicalforecast/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,7 @@ def sample(self,
if self.sampler is None:
raise Exception("This model instance does not have sampler. Call fit with `intervals_method`.")

# [samples, N, H] -> [N, H, samples]
samples = self.sampler.get_samples(num_samples=num_samples)
samples = samples.transpose((1, 2, 0))
return samples

# %% ../nbs/methods.ipynb 10
Expand Down
20 changes: 13 additions & 7 deletions hierarchicalforecast/probabilistic_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ def get_samples(self, num_samples: int=None):
partial_samples = state.multivariate_normal(mean=self.SP @ self.y_hat[:,t],
cov=self.cov_rec[t], size=num_samples)
samples[:,:,t] = partial_samples

# [samples, N, H] -> [N, H, samples]
samples = samples.transpose((1, 2, 0))
return samples

def get_prediction_levels(self, res, level):
Expand Down Expand Up @@ -168,16 +171,20 @@ def get_samples(self, num_samples: int=None):
SP = self.S @ self.P
samples = np.apply_along_axis(lambda path: np.matmul(SP, path),
axis=1, arr=samples)
return np.stack(samples)
samples = np.stack(samples)

# [samples, N, H] -> [N, H, samples]
samples = samples.transpose((1, 2, 0))
return samples

def get_prediction_levels(self, res, level):
""" Adds reconciled forecast levels to results dictionary """
samples = self.get_samples(num_samples=self.num_samples)
for lv in level:
min_q = (100 - lv) / 200
max_q = min_q + lv / 100
res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=0)
res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=0)
res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=2)
res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=2)
return res

# %% ../nbs/probabilistic_methods.ipynb 14
Expand Down Expand Up @@ -382,15 +389,14 @@ def get_samples(self, num_samples: int=None):
)

rec_samples[parent_idxs,:,:] = parent_samples

return np.transpose(rec_samples, (2, 0, 1))
return rec_samples

def get_prediction_levels(self, res, level):
""" Adds reconciled forecast levels to results dictionary """
samples = self.get_samples(num_samples=self.num_samples)
for lv in level:
min_q = (100 - lv) / 200
max_q = min_q + lv / 100
res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=0)
res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=0)
res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=2)
res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=2)
return res
9 changes: 3 additions & 6 deletions nbs/core.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@
"def _reverse_engineer_sigmah(Y_hat_df, y_hat, model_name, uids):\n",
" \"\"\"\n",
" This function assumes that the model creates prediction intervals\n",
" under a normality assumption with the following the Equation:\n",
" under a normality with the following the Equation:\n",
" $\\hat{y}_{t+h} + c \\hat{sigma}_{h}$\n",
"\n",
" In the future, we might deprecate this function in favor of a \n",
Expand Down Expand Up @@ -193,8 +193,8 @@
" intervals_method: str = 'normality'):\n",
" \"\"\"Hierarchical Reconciliation Method.\n",
"\n",
" The `reconcile` method is analogous to SKLearn `fit` method, it applies different \n",
" reconciliation methods instantiated in the `reconcilers` list. \n",
" The `reconcile` method is analogous to SKLearn `fit_predict` method, it \n",
" applies different reconciliation methods instantiated in the `reconcilers` list. \n",
"\n",
" Most reconciliation methods can be described by the following convenient \n",
" linear algebra notation:\n",
Expand Down Expand Up @@ -307,9 +307,6 @@
" end = time.time()\n",
" self.execution_times[f'{model_name}/{reconcile_fn_name}'] = (end - start)\n",
"\n",
" end = time.time()\n",
" self.execution_times[f'{model_name}/{reconcile_fn_name}'] = (end - start)\n",
"\n",
" if self.insample and has_fitted:\n",
" del reconciler_args['y_hat_insample']\n",
" gc.collect()\n",
Expand Down
Loading