diff --git a/hierarchicalforecast/_modidx.py b/hierarchicalforecast/_modidx.py
index 9c31fd68..3b2958f5 100644
--- a/hierarchicalforecast/_modidx.py
+++ b/hierarchicalforecast/_modidx.py
@@ -20,7 +20,19 @@
'hierarchicalforecast.evaluation.HierarchicalEvaluation.__init__': ( 'evaluation.html#hierarchicalevaluation.__init__',
'hierarchicalforecast/evaluation.py'),
'hierarchicalforecast.evaluation.HierarchicalEvaluation.evaluate': ( 'evaluation.html#hierarchicalevaluation.evaluate',
- 'hierarchicalforecast/evaluation.py')},
+ 'hierarchicalforecast/evaluation.py'),
+ 'hierarchicalforecast.evaluation._metric_protections': ( 'evaluation.html#_metric_protections',
+ 'hierarchicalforecast/evaluation.py'),
+ 'hierarchicalforecast.evaluation.energy_score': ( 'evaluation.html#energy_score',
+ 'hierarchicalforecast/evaluation.py'),
+ 'hierarchicalforecast.evaluation.mqloss': ( 'evaluation.html#mqloss',
+ 'hierarchicalforecast/evaluation.py'),
+ 'hierarchicalforecast.evaluation.mse': ( 'evaluation.html#mse',
+ 'hierarchicalforecast/evaluation.py'),
+ 'hierarchicalforecast.evaluation.msse': ( 'evaluation.html#msse',
+ 'hierarchicalforecast/evaluation.py'),
+ 'hierarchicalforecast.evaluation.scaled_crps': ( 'evaluation.html#scaled_crps',
+ 'hierarchicalforecast/evaluation.py')},
'hierarchicalforecast.methods': { 'hierarchicalforecast.methods.BottomUp': ( 'methods.html#bottomup',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.BottomUp._get_PW_matrices': ( 'methods.html#bottomup._get_pw_matrices',
diff --git a/hierarchicalforecast/core.py b/hierarchicalforecast/core.py
index bf58938a..fa804235 100644
--- a/hierarchicalforecast/core.py
+++ b/hierarchicalforecast/core.py
@@ -38,7 +38,7 @@ def _build_fn_name(fn) -> str:
def _reverse_engineer_sigmah(Y_hat_df, y_hat, model_name, uids):
"""
This function assumes that the model creates prediction intervals
- under a normality assumption with the following the Equation:
+ under a normality with the following the Equation:
$\hat{y}_{t+h} + c \hat{sigma}_{h}$
In the future, we might deprecate this function in favor of a
@@ -93,8 +93,8 @@ def reconcile(self,
intervals_method: str = 'normality'):
"""Hierarchical Reconciliation Method.
- The `reconcile` method is analogous to SKLearn `fit` method, it applies different
- reconciliation methods instantiated in the `reconcilers` list.
+ The `reconcile` method is analogous to SKLearn `fit_predict` method, it
+ applies different reconciliation methods instantiated in the `reconcilers` list.
Most reconciliation methods can be described by the following convenient
linear algebra notation:
@@ -207,9 +207,6 @@ def reconcile(self,
end = time.time()
self.execution_times[f'{model_name}/{reconcile_fn_name}'] = (end - start)
- end = time.time()
- self.execution_times[f'{model_name}/{reconcile_fn_name}'] = (end - start)
-
if self.insample and has_fitted:
del reconciler_args['y_hat_insample']
gc.collect()
diff --git a/hierarchicalforecast/evaluation.py b/hierarchicalforecast/evaluation.py
index 048d995e..296eabe4 100644
--- a/hierarchicalforecast/evaluation.py
+++ b/hierarchicalforecast/evaluation.py
@@ -1,17 +1,235 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/evaluation.ipynb.
# %% auto 0
-__all__ = ['HierarchicalEvaluation']
+__all__ = ['msse', 'scaled_crps', 'energy_score', 'HierarchicalEvaluation']
-# %% ../nbs/evaluation.ipynb 2
+# %% ../nbs/evaluation.ipynb 3
from inspect import signature
-from typing import Callable, Dict, List, Optional
+from typing import Callable, Dict, List, Optional, Union
import numpy as np
import pandas as pd
-# %% ../nbs/evaluation.ipynb 5
-class HierarchicalEvaluation:
+# %% ../nbs/evaluation.ipynb 6
+def _metric_protections(y: np.ndarray, y_hat: np.ndarray,
+ weights: Optional[np.ndarray]) -> None:
+ if not ((weights is None) or (np.sum(weights) > 0)):
+ raise Exception('Sum of `weights` cannot be 0')
+ if not ((weights is None) or (weights.shape == y.shape)):
+ raise Exception(
+ f'Wrong weight dimension weights.shape {weights.shape}, y.shape {y.shape}')
+
+def mse(y: np.ndarray, y_hat: np.ndarray,
+ weights: Optional[np.ndarray] = None,
+ axis: Optional[int] = None) -> Union[float, np.ndarray]:
+ """Mean Squared Error
+
+ Calculates Mean Squared Error between
+ `y` and `y_hat`. MSE measures the relative prediction
+ accuracy of a forecasting method by calculating the
+ squared deviation of the prediction and the true
+ value at a given time, and averages these devations
+ over the length of the series.
+
+ $$ \mathrm{MSE}(\\mathbf{y}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} (y_{\\tau} - \hat{y}_{\\tau})^{2} $$
+
+ **Parameters:**
+ `y`: numpy array, Actual values.
+ `y_hat`: numpy array, Predicted values.
+ `mask`: numpy array, Specifies date stamps per serie to consider in loss.
+
+ **Returns:**
+ `mse`: numpy array, (single value).
+ """
+ _metric_protections(y, y_hat, weights)
+
+ delta_y = np.square(y - y_hat)
+ if weights is not None:
+ mse = np.average(delta_y[~np.isnan(delta_y)],
+ weights=weights[~np.isnan(delta_y)],
+ axis=axis)
+ else:
+ mse = np.nanmean(delta_y, axis=axis)
+ return mse
+
+def mqloss(y: np.ndarray, y_hat: np.ndarray,
+ quantiles: np.ndarray,
+ weights: Optional[np.ndarray] = None,
+ axis: Optional[int] = None) -> Union[float, np.ndarray]:
+ """Multi-Quantile Loss
+
+ Calculates the Multi-Quantile loss (MQL) between `y` and `y_hat`.
+ MQL calculates the average multi-quantile Loss for
+ a given set of quantiles, based on the absolute
+ difference between predicted quantiles and observed values.
+
+ $$ \mathrm{MQL}(\\mathbf{y}_{\\tau},[\\mathbf{\hat{y}}^{(q_{1})}_{\\tau}, ... ,\hat{y}^{(q_{n})}_{\\tau}]) = \\frac{1}{n} \\sum_{q_{i}} \mathrm{QL}(\\mathbf{y}_{\\tau}, \\mathbf{\hat{y}}^{(q_{i})}_{\\tau}) $$
+
+ The limit behavior of MQL allows to measure the accuracy
+ of a full predictive distribution $\mathbf{\hat{F}}_{\\tau}$ with
+ the continuous ranked probability score (CRPS). This can be achieved
+ through a numerical integration technique, that discretizes the quantiles
+ and treats the CRPS integral with a left Riemann approximation, averaging over
+ uniformly distanced quantiles.
+
+ $$ \mathrm{CRPS}(y_{\\tau}, \mathbf{\hat{F}}_{\\tau}) = \int^{1}_{0} \mathrm{QL}(y_{\\tau}, \hat{y}^{(q)}_{\\tau}) dq $$
+
+ **Parameters:**
+ `y`: numpy array, Actual values.
+ `y_hat`: numpy array, Predicted values.
+ `quantiles`: numpy array. Quantiles between 0 and 1, to perform evaluation upon size (n_quantiles).
+ `mask`: numpy array, Specifies date stamps per serie to consider in loss.
+
+ **Returns:**
+ `mqloss`: numpy array, (single value).
+
+ **References:**
+ [Roger Koenker and Gilbert Bassett, Jr., "Regression Quantiles".](https://www.jstor.org/stable/1913643)
+ [James E. Matheson and Robert L. Winkler, "Scoring Rules for Continuous Probability Distributions".](https://www.jstor.org/stable/2629907)
+ """
+ if weights is None: weights = np.ones(y.shape)
+ if (np.sum(quantiles>1)>0 or np.sum(quantiles<0)>0):
+ raise Exception('`quantiles` need to be between 0 and 1')
+
+ _metric_protections(y, y_hat, weights)
+ n_q = len(quantiles)
+
+ y_rep = np.expand_dims(y, axis=-1)
+ error = y_hat - y_rep
+ sq = np.maximum(-error, np.zeros_like(error))
+ s1_q = np.maximum(error, np.zeros_like(error))
+ mqloss = (quantiles * sq + (1 - quantiles) * s1_q)
+
+ # Match y/weights dimensions and compute weighted average
+ weights = np.repeat(np.expand_dims(weights, axis=-1), repeats=n_q, axis=-1)
+ mqloss = np.average(mqloss, weights=weights, axis=axis)
+
+ return mqloss
+
+# %% ../nbs/evaluation.ipynb 8
+def msse(y, y_hat, y_train, mask=None):
+ """Mean Squared Scaled Error
+
+ Computes Mean squared scaled error (MSSE), as proposed by Hyndman & Koehler (2006)
+ as an alternative to percentage errors, to avoid measure unstability.
+
+ $$ \mathrm{MSSE}(\\mathbf{y}, \\mathbf{\hat{y}}, \\mathbf{\hat{y}}^{naive1}) =
+ \\frac{\mathrm{MSE}(\\mathbf{y}, \\mathbf{\hat{y}})}{\mathrm{MSE}(\\mathbf{y}, \\mathbf{\hat{y}}^{naive1})} $$
+
+ **Parameters:**
+ `y`: numpy array, Actual values of size (`n_series`, `horizon`).
+ `y_hat`: numpy array, Predicted values (`n_series`, `horizon`).
+ `mask`: numpy array, Specifies date stamps per serie to consider in loss.
+
+ **Returns:**
+ `loss`: float.
+
+ **References:**
+ - [Hyndman, R. J and Koehler, A. B. (2006).
+ "Another look at measures of forecast accuracy",
+ International Journal of Forecasting, Volume 22, Issue 4.](https://www.sciencedirect.com/science/article/pii/S0169207006000239)
+ - [Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker.
+ "Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures.
+ Submitted to the International Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)
+ """
+ if mask is None:
+ mask = np.ones_like(y)
+ n_series, horizon = y.shape
+
+ eps = np.finfo(float).eps
+ y_naive = np.repeat(y_train[:,[-1]], horizon, axis=1)
+ norm = mse(y=y, y_hat=y_naive)
+ loss = mse(y=y, y_hat=y_hat, weights=mask)
+ loss = loss / (norm + eps)
+ return loss
+
+# %% ../nbs/evaluation.ipynb 11
+def scaled_crps(y, y_hat, quantiles):
+ """Scaled Continues Ranked Probability Score
+
+ Calculates a scaled variation of the CRPS, as proposed by Rangapuram (2021),
+ to measure the accuracy of predicted quantiles `y_hat` compared to the observation `y`.
+
+ This metric averages percentual weighted absolute deviations as
+ defined by the quantile losses.
+
+ $$ \mathrm{sCRPS}(\hat{F}_{\\tau}, \mathbf{y}_{\\tau}) = 2 \sum_{i}
+ \int^{1}_{0}
+ \\frac{\mathrm{QL}(\hat{F}_{i,\\tau}, y_{i,\\tau})_{q}}{\sum_{i} | y_{i,\\tau} |} dq $$
+
+ where $\hat{F}_{\\tau}$ is the an estimated multivariate distribution, and $y_{i,\\tau}$
+ are its realizations.
+
+ **Parameters:**
+ `y`: numpy array, Actual values of size (`n_series`, `horizon`).
+ `y_hat`: numpy array, Predicted quantiles of size (`n_series`, `horizon`, `n_quantiles`).
+ `quantiles`: numpy array,(`n_quantiles`). Quantiles to estimate from the distribution of y.
+
+ **Returns:**
+ `loss`: float.
+
+ **References:**
+ - [Gneiting, Tilmann. (2011). \"Quantiles as optimal point forecasts\".
+ International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207010000063)
+ - [Spyros Makridakis, Evangelos Spiliotis, Vassilios Assimakopoulos, Zhi Chen, Anil Gaba, Ilia Tsetlin, Robert L. Winkler. (2022).
+ \"The M5 uncertainty competition: Results, findings and conclusions\".
+ International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207021001722)
+ - [Syama Sundar Rangapuram, Lucien D Werner, Konstantinos Benidis, Pedro Mercado, Jan Gasthaus, Tim Januschowski. (2021).
+ \"End-to-End Learning of Coherent Probabilistic Forecasts for Hierarchical Time Series\".
+ Proceedings of the 38th International Conference on Machine Learning (ICML).](https://proceedings.mlr.press/v139/rangapuram21a.html)
+ """
+ eps = np.finfo(float).eps
+ norm = np.sum(np.abs(y))
+ loss = mqloss(y=y, y_hat=y_hat, quantiles=quantiles)
+ loss = 2 * loss * np.sum(np.ones(y.shape)) / (norm + eps)
+ return loss
+
+# %% ../nbs/evaluation.ipynb 14
+def energy_score(y, y_sample1, y_sample2, beta=2):
+ """Energy Score
+
+ Calculates Gneiting's Energy Score sample approximation for
+ `y` and independent multivariate samples `y_sample1` and `y_sample2`.
+ The Energy Score generalizes the CRPS (`beta`=1) in the multivariate setting.
+
+ $$ \mathrm{ES}(\\mathbf{y}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}')
+ = \\frac{1}{2} \mathbb{E}_{\hat{P}} \\left[ ||\\mathbf{\hat{y}}_{\\tau} - \\mathbf{\hat{y}}_{\\tau}'||^{\\beta} \\right]
+ - \mathbb{E}_{\hat{P}} \\left[ ||\\mathbf{y}_{\\tau} - \\mathbf{\hat{y}}_{\\tau}||^{\\beta} \\right]
+ \quad \\beta \in (0,2]$$
+
+ where $\\mathbf{\hat{y}}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}'$ are independent samples drawn from $\hat{P}$.
+
+ **Parameters:**
+ `y`: numpy array, Actual values of size (`n_series`, `horizon`).
+ `y_sample1`: numpy array, predictive distribution sample of size (`n_series`, `horizon`, `n_samples`).
+ `y_sample2`: numpy array, predictive distribution sample of size (`n_series`, `horizon`, `n_samples`).
+ `beta`: float in (0,2], defines the energy score's power for the euclidean metric.
+
+ **Returns:**
+ `loss`: float.
+
+ **References:**
+ - [Gneiting, Tilmann, and Adrian E. Raftery. (2007).
+ \"Strictly proper scoring rules, prediction and estimation\".
+ Journal of the American Statistical Association.](https://sites.stat.washington.edu/raftery/Research/PDF/Gneiting2007jasa.pdf)
+ - [Anastasios Panagiotelis, Puwasala Gamakumara, George Athanasopoulos, Rob J. Hyndman. (2022).
+ \"Probabilistic forecast reconciliation: Properties, evaluation and score optimisation\".
+ European Journal of Operational Research.](https://www.sciencedirect.com/science/article/pii/S0377221722006087)
+ """
+ if beta>2 or beta<0:
+ raise Exception("beta needs to be between 0 and 2.")
+
+ dif1 = (y_sample1 - y_sample2)
+ dif2 = (y[:,:,None] - y_sample1)
+
+ term1 = np.linalg.norm(dif1, axis=0) ** beta
+ term2 = np.linalg.norm(dif2, axis=0) ** beta
+
+ loss = np.mean(term2 - 0.5 * term1)
+ return loss
+
+# %% ../nbs/evaluation.ipynb 17
+class HierarchicalEvaluation:
"""Hierarchical Evaluation Class.
You can use your own metrics to evaluate the performance of each level in the structure.
@@ -23,6 +241,8 @@ class HierarchicalEvaluation:
**Parameters:**
`evaluators`: functions with arguments `y`, `y_hat` (numpy arrays).
+
+ **References:**
"""
def __init__(self,
evaluators: List[Callable]):
diff --git a/hierarchicalforecast/methods.py b/hierarchicalforecast/methods.py
index 4f4f1538..1a327b2c 100644
--- a/hierarchicalforecast/methods.py
+++ b/hierarchicalforecast/methods.py
@@ -108,9 +108,7 @@ def sample(self,
if self.sampler is None:
raise Exception("This model instance does not have sampler. Call fit with `intervals_method`.")
- # [samples, N, H] -> [N, H, samples]
samples = self.sampler.get_samples(num_samples=num_samples)
- samples = samples.transpose((1, 2, 0))
return samples
# %% ../nbs/methods.ipynb 10
diff --git a/hierarchicalforecast/probabilistic_methods.py b/hierarchicalforecast/probabilistic_methods.py
index 5b089dd1..2c4f59ed 100644
--- a/hierarchicalforecast/probabilistic_methods.py
+++ b/hierarchicalforecast/probabilistic_methods.py
@@ -82,6 +82,9 @@ def get_samples(self, num_samples: int=None):
partial_samples = state.multivariate_normal(mean=self.SP @ self.y_hat[:,t],
cov=self.cov_rec[t], size=num_samples)
samples[:,:,t] = partial_samples
+
+ # [samples, N, H] -> [N, H, samples]
+ samples = samples.transpose((1, 2, 0))
return samples
def get_prediction_levels(self, res, level):
@@ -168,7 +171,11 @@ def get_samples(self, num_samples: int=None):
SP = self.S @ self.P
samples = np.apply_along_axis(lambda path: np.matmul(SP, path),
axis=1, arr=samples)
- return np.stack(samples)
+ samples = np.stack(samples)
+
+ # [samples, N, H] -> [N, H, samples]
+ samples = samples.transpose((1, 2, 0))
+ return samples
def get_prediction_levels(self, res, level):
""" Adds reconciled forecast levels to results dictionary """
@@ -176,8 +183,8 @@ def get_prediction_levels(self, res, level):
for lv in level:
min_q = (100 - lv) / 200
max_q = min_q + lv / 100
- res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=0)
- res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=0)
+ res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=2)
+ res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=2)
return res
# %% ../nbs/probabilistic_methods.ipynb 14
@@ -382,8 +389,7 @@ def get_samples(self, num_samples: int=None):
)
rec_samples[parent_idxs,:,:] = parent_samples
-
- return np.transpose(rec_samples, (2, 0, 1))
+ return rec_samples
def get_prediction_levels(self, res, level):
""" Adds reconciled forecast levels to results dictionary """
@@ -391,6 +397,6 @@ def get_prediction_levels(self, res, level):
for lv in level:
min_q = (100 - lv) / 200
max_q = min_q + lv / 100
- res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=0)
- res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=0)
+ res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=2)
+ res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=2)
return res
diff --git a/nbs/core.ipynb b/nbs/core.ipynb
index 215de23e..6ae0dbd8 100644
--- a/nbs/core.ipynb
+++ b/nbs/core.ipynb
@@ -131,7 +131,7 @@
"def _reverse_engineer_sigmah(Y_hat_df, y_hat, model_name, uids):\n",
" \"\"\"\n",
" This function assumes that the model creates prediction intervals\n",
- " under a normality assumption with the following the Equation:\n",
+ " under a normality with the following the Equation:\n",
" $\\hat{y}_{t+h} + c \\hat{sigma}_{h}$\n",
"\n",
" In the future, we might deprecate this function in favor of a \n",
@@ -193,8 +193,8 @@
" intervals_method: str = 'normality'):\n",
" \"\"\"Hierarchical Reconciliation Method.\n",
"\n",
- " The `reconcile` method is analogous to SKLearn `fit` method, it applies different \n",
- " reconciliation methods instantiated in the `reconcilers` list. \n",
+ " The `reconcile` method is analogous to SKLearn `fit_predict` method, it \n",
+ " applies different reconciliation methods instantiated in the `reconcilers` list. \n",
"\n",
" Most reconciliation methods can be described by the following convenient \n",
" linear algebra notation:\n",
@@ -307,9 +307,6 @@
" end = time.time()\n",
" self.execution_times[f'{model_name}/{reconcile_fn_name}'] = (end - start)\n",
"\n",
- " end = time.time()\n",
- " self.execution_times[f'{model_name}/{reconcile_fn_name}'] = (end - start)\n",
- "\n",
" if self.insample and has_fitted:\n",
" del reconciler_args['y_hat_insample']\n",
" gc.collect()\n",
diff --git a/nbs/evaluation.ipynb b/nbs/evaluation.ipynb
index 5a4b4c36..17c13f44 100644
--- a/nbs/evaluation.ipynb
+++ b/nbs/evaluation.ipynb
@@ -6,18 +6,25 @@
"metadata": {},
"outputs": [],
"source": [
- "#| default_exp evaluation\n",
- "# - [Anastasios Panagiotelis et. al. \"Probabilistic Forecast Reconciliation: Properties, Evaluation and Score Optimisation\"](https://www.monash.edu/business/ebs/research/publications/ebs/wp26-2020.pdf)"
+ "#| default_exp evaluation"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
- "# Hierarchical Evaluation \n",
+ "# Hierarchical Evaluation"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "To assist the evaluation of hierarchical forecasting systems, we make available accuracy metrics along with the `HierarchicalEvaluation` module that facilitates the measurement of prediction's accuracy through the hierarchy levels. \n",
"\n",
- "> To assist on the evaluation of hierarchical systems we make available the `HierarchicalEvaluation` module\n",
- "that facilitates the measurement of prediction's accuracy through the levels of a hierarchy.
"
+ "The available metrics include point and probabilistic multivariate scoring rules that were used in previous hierarchical forecasting studies."
]
},
{
@@ -28,7 +35,7 @@
"source": [
"#| export\n",
"from inspect import signature\n",
- "from typing import Callable, Dict, List, Optional\n",
+ "from typing import Callable, Dict, List, Optional, Union\n",
"\n",
"import numpy as np\n",
"import pandas as pd"
@@ -45,6 +52,309 @@
"from nbdev.showdoc import add_docs, show_doc"
]
},
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Accuracy Measurements "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exporti\n",
+ "def _metric_protections(y: np.ndarray, y_hat: np.ndarray, \n",
+ " weights: Optional[np.ndarray]) -> None:\n",
+ " if not ((weights is None) or (np.sum(weights) > 0)):\n",
+ " raise Exception('Sum of `weights` cannot be 0')\n",
+ " if not ((weights is None) or (weights.shape == y.shape)):\n",
+ " raise Exception(\n",
+ " f'Wrong weight dimension weights.shape {weights.shape}, y.shape {y.shape}')\n",
+ "\n",
+ "def mse(y: np.ndarray, y_hat: np.ndarray, \n",
+ " weights: Optional[np.ndarray] = None,\n",
+ " axis: Optional[int] = None) -> Union[float, np.ndarray]:\n",
+ " \"\"\"Mean Squared Error\n",
+ "\n",
+ " Calculates Mean Squared Error between\n",
+ " `y` and `y_hat`. MSE measures the relative prediction\n",
+ " accuracy of a forecasting method by calculating the \n",
+ " squared deviation of the prediction and the true\n",
+ " value at a given time, and averages these devations\n",
+ " over the length of the series.\n",
+ "\n",
+ " $$ \\mathrm{MSE}(\\\\mathbf{y}_{\\\\tau}, \\\\mathbf{\\hat{y}}_{\\\\tau}) = \\\\frac{1}{H} \\\\sum^{t+H}_{\\\\tau=t+1} (y_{\\\\tau} - \\hat{y}_{\\\\tau})^{2} $$\n",
+ "\n",
+ " **Parameters:**
\n",
+ " `y`: numpy array, Actual values.
\n",
+ " `y_hat`: numpy array, Predicted values.
\n",
+ " `mask`: numpy array, Specifies date stamps per serie to consider in loss.
\n",
+ "\n",
+ " **Returns:**
\n",
+ " `mse`: numpy array, (single value).\n",
+ " \"\"\"\n",
+ " _metric_protections(y, y_hat, weights)\n",
+ "\n",
+ " delta_y = np.square(y - y_hat)\n",
+ " if weights is not None:\n",
+ " mse = np.average(delta_y[~np.isnan(delta_y)],\n",
+ " weights=weights[~np.isnan(delta_y)],\n",
+ " axis=axis)\n",
+ " else:\n",
+ " mse = np.nanmean(delta_y, axis=axis)\n",
+ " return mse\n",
+ "\n",
+ "def mqloss(y: np.ndarray, y_hat: np.ndarray, \n",
+ " quantiles: np.ndarray, \n",
+ " weights: Optional[np.ndarray] = None,\n",
+ " axis: Optional[int] = None) -> Union[float, np.ndarray]:\n",
+ " \"\"\"Multi-Quantile Loss\n",
+ "\n",
+ " Calculates the Multi-Quantile loss (MQL) between `y` and `y_hat`.\n",
+ " MQL calculates the average multi-quantile Loss for\n",
+ " a given set of quantiles, based on the absolute \n",
+ " difference between predicted quantiles and observed values.\n",
+ "\n",
+ " $$ \\mathrm{MQL}(\\\\mathbf{y}_{\\\\tau},[\\\\mathbf{\\hat{y}}^{(q_{1})}_{\\\\tau}, ... ,\\hat{y}^{(q_{n})}_{\\\\tau}]) = \\\\frac{1}{n} \\\\sum_{q_{i}} \\mathrm{QL}(\\\\mathbf{y}_{\\\\tau}, \\\\mathbf{\\hat{y}}^{(q_{i})}_{\\\\tau}) $$\n",
+ "\n",
+ " The limit behavior of MQL allows to measure the accuracy \n",
+ " of a full predictive distribution $\\mathbf{\\hat{F}}_{\\\\tau}$ with \n",
+ " the continuous ranked probability score (CRPS). This can be achieved \n",
+ " through a numerical integration technique, that discretizes the quantiles \n",
+ " and treats the CRPS integral with a left Riemann approximation, averaging over \n",
+ " uniformly distanced quantiles. \n",
+ "\n",
+ " $$ \\mathrm{CRPS}(y_{\\\\tau}, \\mathbf{\\hat{F}}_{\\\\tau}) = \\int^{1}_{0} \\mathrm{QL}(y_{\\\\tau}, \\hat{y}^{(q)}_{\\\\tau}) dq $$\n",
+ "\n",
+ " **Parameters:**
\n",
+ " `y`: numpy array, Actual values.
\n",
+ " `y_hat`: numpy array, Predicted values.
\n",
+ " `quantiles`: numpy array. Quantiles between 0 and 1, to perform evaluation upon size (n_quantiles).
\n",
+ " `mask`: numpy array, Specifies date stamps per serie to consider in loss.
\n",
+ " \n",
+ " **Returns:**
\n",
+ " `mqloss`: numpy array, (single value).\n",
+ "\n",
+ " **References:**
\n",
+ " [Roger Koenker and Gilbert Bassett, Jr., \"Regression Quantiles\".](https://www.jstor.org/stable/1913643)
\n",
+ " [James E. Matheson and Robert L. Winkler, \"Scoring Rules for Continuous Probability Distributions\".](https://www.jstor.org/stable/2629907)\n",
+ " \"\"\"\n",
+ " if weights is None: weights = np.ones(y.shape)\n",
+ " if (np.sum(quantiles>1)>0 or np.sum(quantiles<0)>0):\n",
+ " raise Exception('`quantiles` need to be between 0 and 1')\n",
+ " \n",
+ " _metric_protections(y, y_hat, weights)\n",
+ " n_q = len(quantiles)\n",
+ " \n",
+ " y_rep = np.expand_dims(y, axis=-1)\n",
+ " error = y_hat - y_rep\n",
+ " sq = np.maximum(-error, np.zeros_like(error))\n",
+ " s1_q = np.maximum(error, np.zeros_like(error))\n",
+ " mqloss = (quantiles * sq + (1 - quantiles) * s1_q)\n",
+ " \n",
+ " # Match y/weights dimensions and compute weighted average\n",
+ " weights = np.repeat(np.expand_dims(weights, axis=-1), repeats=n_q, axis=-1)\n",
+ " mqloss = np.average(mqloss, weights=weights, axis=axis)\n",
+ "\n",
+ " return mqloss"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Mean Squared Scaled Error"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export\n",
+ "def msse(y, y_hat, y_train, mask=None):\n",
+ " \"\"\"Mean Squared Scaled Error\n",
+ "\n",
+ " Computes Mean squared scaled error (MSSE), as proposed by Hyndman & Koehler (2006)\n",
+ " as an alternative to percentage errors, to avoid measure unstability.\n",
+ "\n",
+ " $$ \\mathrm{MSSE}(\\\\mathbf{y}, \\\\mathbf{\\hat{y}}, \\\\mathbf{\\hat{y}}^{naive1}) =\n",
+ " \\\\frac{\\mathrm{MSE}(\\\\mathbf{y}, \\\\mathbf{\\hat{y}})}{\\mathrm{MSE}(\\\\mathbf{y}, \\\\mathbf{\\hat{y}}^{naive1})} $$\n",
+ "\n",
+ " **Parameters:**
\n",
+ " `y`: numpy array, Actual values of size (`n_series`, `horizon`).
\n",
+ " `y_hat`: numpy array, Predicted values (`n_series`, `horizon`).
\n",
+ " `mask`: numpy array, Specifies date stamps per serie to consider in loss.
\n",
+ "\n",
+ " **Returns:**
\n",
+ " `loss`: float. \n",
+ "\n",
+ " **References:**
\n",
+ " - [Hyndman, R. J and Koehler, A. B. (2006).\n",
+ " \"Another look at measures of forecast accuracy\",\n",
+ " International Journal of Forecasting, Volume 22, Issue 4.](https://www.sciencedirect.com/science/article/pii/S0169207006000239)\n",
+ " - [Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker. \n",
+ " \"Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. \n",
+ " Submitted to the International Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)\n",
+ " \"\"\"\n",
+ " if mask is None: \n",
+ " mask = np.ones_like(y)\n",
+ " n_series, horizon = y.shape\n",
+ "\n",
+ " eps = np.finfo(float).eps\n",
+ " y_naive = np.repeat(y_train[:,[-1]], horizon, axis=1)\n",
+ " norm = mse(y=y, y_hat=y_naive)\n",
+ " loss = mse(y=y, y_hat=y_hat, weights=mask)\n",
+ " loss = loss / (norm + eps)\n",
+ " return loss"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "show_doc(msse)"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Scaled CRPS"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export\n",
+ "def scaled_crps(y, y_hat, quantiles):\n",
+ " \"\"\"Scaled Continues Ranked Probability Score\n",
+ "\n",
+ " Calculates a scaled variation of the CRPS, as proposed by Rangapuram (2021),\n",
+ " to measure the accuracy of predicted quantiles `y_hat` compared to the observation `y`.\n",
+ "\n",
+ " This metric averages percentual weighted absolute deviations as \n",
+ " defined by the quantile losses.\n",
+ "\n",
+ " $$ \\mathrm{sCRPS}(\\hat{F}_{\\\\tau}, \\mathbf{y}_{\\\\tau}) = 2 \\sum_{i}\n",
+ " \\int^{1}_{0}\n",
+ " \\\\frac{\\mathrm{QL}(\\hat{F}_{i,\\\\tau}, y_{i,\\\\tau})_{q}}{\\sum_{i} | y_{i,\\\\tau} |} dq $$\n",
+ "\n",
+ " where $\\hat{F}_{\\\\tau}$ is the an estimated multivariate distribution, and $y_{i,\\\\tau}$\n",
+ " are its realizations.\n",
+ "\n",
+ " **Parameters:**
\n",
+ " `y`: numpy array, Actual values of size (`n_series`, `horizon`).
\n",
+ " `y_hat`: numpy array, Predicted quantiles of size (`n_series`, `horizon`, `n_quantiles`).
\n",
+ " `quantiles`: numpy array,(`n_quantiles`). Quantiles to estimate from the distribution of y.
\n",
+ "\n",
+ " **Returns:**
\n",
+ " `loss`: float.\n",
+ "\n",
+ " **References:**
\n",
+ " - [Gneiting, Tilmann. (2011). \\\"Quantiles as optimal point forecasts\\\". \n",
+ " International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207010000063)\n",
+ " - [Spyros Makridakis, Evangelos Spiliotis, Vassilios Assimakopoulos, Zhi Chen, Anil Gaba, Ilia Tsetlin, Robert L. Winkler. (2022). \n",
+ " \\\"The M5 uncertainty competition: Results, findings and conclusions\\\". \n",
+ " International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207021001722)\n",
+ " - [Syama Sundar Rangapuram, Lucien D Werner, Konstantinos Benidis, Pedro Mercado, Jan Gasthaus, Tim Januschowski. (2021). \n",
+ " \\\"End-to-End Learning of Coherent Probabilistic Forecasts for Hierarchical Time Series\\\". \n",
+ " Proceedings of the 38th International Conference on Machine Learning (ICML).](https://proceedings.mlr.press/v139/rangapuram21a.html)\n",
+ " \"\"\"\n",
+ " eps = np.finfo(float).eps\n",
+ " norm = np.sum(np.abs(y))\n",
+ " loss = mqloss(y=y, y_hat=y_hat, quantiles=quantiles)\n",
+ " loss = 2 * loss * np.sum(np.ones(y.shape)) / (norm + eps)\n",
+ " return loss"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "show_doc(scaled_crps)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Energy Score"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export\n",
+ "def energy_score(y, y_sample1, y_sample2, beta=2):\n",
+ " \"\"\"Energy Score\n",
+ "\n",
+ " Calculates Gneiting's Energy Score sample approximation for\n",
+ " `y` and independent multivariate samples `y_sample1` and `y_sample2`.\n",
+ " The Energy Score generalizes the CRPS (`beta`=1) in the multivariate setting.\n",
+ "\n",
+ " $$ \\mathrm{ES}(\\\\mathbf{y}_{\\\\tau}, \\\\mathbf{\\hat{y}}_{\\\\tau}, \\\\mathbf{\\hat{y}}_{\\\\tau}') \n",
+ " = \\\\frac{1}{2} \\mathbb{E}_{\\hat{P}} \\\\left[ ||\\\\mathbf{\\hat{y}}_{\\\\tau} - \\\\mathbf{\\hat{y}}_{\\\\tau}'||^{\\\\beta} \\\\right]\n",
+ " - \\mathbb{E}_{\\hat{P}} \\\\left[ ||\\\\mathbf{y}_{\\\\tau} - \\\\mathbf{\\hat{y}}_{\\\\tau}||^{\\\\beta} \\\\right] \n",
+ " \\quad \\\\beta \\in (0,2]$$\n",
+ "\n",
+ " where $\\\\mathbf{\\hat{y}}_{\\\\tau}, \\\\mathbf{\\hat{y}}_{\\\\tau}'$ are independent samples drawn from $\\hat{P}$.\n",
+ "\n",
+ " **Parameters:**
\n",
+ " `y`: numpy array, Actual values of size (`n_series`, `horizon`).
\n",
+ " `y_sample1`: numpy array, predictive distribution sample of size (`n_series`, `horizon`, `n_samples`).
\n",
+ " `y_sample2`: numpy array, predictive distribution sample of size (`n_series`, `horizon`, `n_samples`).
\n",
+ " `beta`: float in (0,2], defines the energy score's power for the euclidean metric.
\n",
+ "\n",
+ " **Returns:**
\n",
+ " `loss`: float.\n",
+ "\n",
+ " **References:**
\n",
+ " - [Gneiting, Tilmann, and Adrian E. Raftery. (2007). \n",
+ " \\\"Strictly proper scoring rules, prediction and estimation\\\". \n",
+ " Journal of the American Statistical Association.](https://sites.stat.washington.edu/raftery/Research/PDF/Gneiting2007jasa.pdf)\n",
+ " - [Anastasios Panagiotelis, Puwasala Gamakumara, George Athanasopoulos, Rob J. Hyndman. (2022). \n",
+ " \\\"Probabilistic forecast reconciliation: Properties, evaluation and score optimisation\\\". \n",
+ " European Journal of Operational Research.](https://www.sciencedirect.com/science/article/pii/S0377221722006087) \n",
+ " \"\"\"\n",
+ " if beta>2 or beta<0:\n",
+ " raise Exception(\"beta needs to be between 0 and 2.\")\n",
+ "\n",
+ " dif1 = (y_sample1 - y_sample2)\n",
+ " dif2 = (y[:,:,None] - y_sample1)\n",
+ "\n",
+ " term1 = np.linalg.norm(dif1, axis=0) ** beta\n",
+ " term2 = np.linalg.norm(dif2, axis=0) ** beta\n",
+ "\n",
+ " loss = np.mean(term2 - 0.5 * term1)\n",
+ " return loss"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "show_doc(energy_score)"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -59,7 +369,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class HierarchicalEvaluation: \n",
+ "class HierarchicalEvaluation:\n",
" \"\"\"Hierarchical Evaluation Class.\n",
" \n",
" You can use your own metrics to evaluate the performance of each level in the structure.\n",
@@ -71,6 +381,8 @@
"\n",
" **Parameters:**
\n",
" `evaluators`: functions with arguments `y`, `y_hat` (numpy arrays).
\n",
+ "\n",
+ " **References:**
\n",
" \"\"\"\n",
" def __init__(self, \n",
" evaluators: List[Callable]):\n",
@@ -182,6 +494,7 @@
"from hierarchicalforecast.core import HierarchicalReconciliation\n",
"from hierarchicalforecast.methods import BottomUp, MinTrace, ERM\n",
"from hierarchicalforecast.utils import aggregate\n",
+ "\n",
"df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/tourism.csv')\n",
"df = df.rename({'Trips': 'y', 'Quarter': 'ds'}, axis=1)\n",
"df.insert(0, 'Country', 'Australia')\n",
@@ -283,11 +596,42 @@
" Y_df=hier_grouped_df,\n",
" benchmark='y_model')"
]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# References \n",
+ "\n",
+ "- [Gneiting, Tilmann, and Adrian E. Raftery. (2007). \n",
+ "\\\"Strictly proper scoring rules, prediction and estimation\\\". \n",
+ "Journal of the American Statistical Association.](https://sites.stat.washington.edu/raftery/Research/PDF/Gneiting2007jasa.pdf)\n",
+ "- [Gneiting, Tilmann. (2011). \\\"Quantiles as optimal point forecasts\\\". \n",
+ "International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207010000063)\n",
+ "- [Spyros Makridakis, Evangelos Spiliotis, Vassilios Assimakopoulos, Zhi Chen, Anil Gaba, Ilia Tsetlin, Robert L. Winkler. (2022). \n",
+ "\\\"The M5 uncertainty competition: Results, findings and conclusions\\\". \n",
+ "International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207021001722)\n",
+ "- [Anastasios Panagiotelis, Puwasala Gamakumara, George Athanasopoulos, Rob J. Hyndman. (2022). \n",
+ "\\\"Probabilistic forecast reconciliation: Properties, evaluation and score optimisation\\\". \n",
+ "European Journal of Operational Research.](https://www.sciencedirect.com/science/article/pii/S0377221722006087)\n",
+ "- [Syama Sundar Rangapuram, Lucien D Werner, Konstantinos Benidis, Pedro Mercado, Jan Gasthaus, Tim Januschowski. (2021). \n",
+ "\\\"End-to-End Learning of Coherent Probabilistic Forecasts for Hierarchical Time Series\\\". \n",
+ "Proceedings of the 38th International Conference on Machine Learning (ICML).](https://proceedings.mlr.press/v139/rangapuram21a.html)\n",
+ "- [Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker (2022). \n",
+ "\"Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures\". \n",
+ "Submitted to the International Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": []
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": "hierarchicalforecast",
"language": "python",
"name": "python3"
}
diff --git a/nbs/examples/TourismLarge-Evaluation.ipynb b/nbs/examples/TourismLarge-Evaluation.ipynb
new file mode 100644
index 00000000..6d7a17a2
--- /dev/null
+++ b/nbs/examples/TourismLarge-Evaluation.ipynb
@@ -0,0 +1,261 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "9a2ffba7",
+ "metadata": {},
+ "source": [
+ "# • End to End Walkthrough\n",
+ "\n",
+ "> Hierarchical Forecast's reconciliation and evaluation."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3ebf6eb9",
+ "metadata": {},
+ "source": [
+ "This notebook offers a step to step guide to create a hierarchical forecasting pipeline.\n",
+ "\n",
+ "In the pipeline we will use `HierarchicalForecast` and `StatsForecast` core class, to create base predictions, reconcile and evaluate them. \n",
+ "\n",
+ "We will use the TourismL dataset that summarizes large Australian national visitor survey.\n",
+ "\n",
+ "Outline\n",
+ "1. Installing Packages\n",
+ "2. Prepare TourismL dataset\n",
+ " - Read and aggregate\n",
+ " - StatsForecast's Base Predictions\n",
+ "3. Reconciliar\n",
+ "4. Evaluar"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d08fca30",
+ "metadata": {},
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "690aea05",
+ "metadata": {},
+ "source": [
+ "## 1. Installing HierarchicalForecast"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "225fd9fe",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# %%capture\n",
+ "# !pip install hierarchicalforecast\n",
+ "# !pip install -U numba statsforecast datasetsforecast"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1624c79f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "\n",
+ "from datasetsforecast.hierarchical import HierarchicalData\n",
+ "\n",
+ "from statsforecast.core import StatsForecast\n",
+ "from statsforecast.models import AutoARIMA, Naive\n",
+ "\n",
+ "from hierarchicalforecast.utils import HierarchicalPlot\n",
+ "from hierarchicalforecast.core import HierarchicalReconciliation\n",
+ "from hierarchicalforecast.evaluation import HierarchicalEvaluation\n",
+ "from hierarchicalforecast.methods import BottomUp, MinTrace, ERM"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "58961ccd",
+ "metadata": {},
+ "source": [
+ "## 2. Preparing TourismL Dataset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8d002ec0",
+ "metadata": {},
+ "source": [
+ "### 2.1 Read and Aggregate"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0045589f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "Y_df, S_df, tags = HierarchicalData.load('./data', 'TourismLarge')\n",
+ "Y_df['ds'] = pd.to_datetime(Y_df['ds'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3958dc40",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "hplot = HierarchicalPlot(S=S_df, tags=tags)\n",
+ "hplot.plot_summing_matrix()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "da22dd3c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "Y_test_df = Y_df.groupby('unique_id').tail(12)\n",
+ "Y_train_df = Y_df.drop(Y_test_df.index)\n",
+ "\n",
+ "Y_test_df = Y_test_df.set_index('unique_id')\n",
+ "Y_train_df = Y_train_df.set_index('unique_id')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c56af1d9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "Y_train_df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "98fec0eb",
+ "metadata": {},
+ "source": [
+ "### 2.2 StatsForecast's Base Predictions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "76df4d31",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%capture\n",
+ "if os.path.isfile('Y_hat.csv'):\n",
+ " Y_hat_df = pd.read_csv('Y_hat.csv')\n",
+ " Y_hat_df = Y_hat_df.set_index('unique_id')\n",
+ "else:\n",
+ " fcst = StatsForecast(\n",
+ " df=Y_train_df, \n",
+ " models=[AutoARIMA(season_length=12)],\n",
+ " fallback_model=[Naive()],\n",
+ " freq='M', \n",
+ " n_jobs=-1\n",
+ " )\n",
+ " Y_hat_df = fcst.forecast(h=12, level=[80])\n",
+ " Y_hat_df.to_csv('Y_hat.csv', index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "740a5ea8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "Y_hat_df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e557adc7",
+ "metadata": {},
+ "source": [
+ "## 3. Reconciliate Predictions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e1982db8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# reconcilers = [\n",
+ "# BottomUp(),\n",
+ "# MinTrace(method='ols'),\n",
+ "# ERM(method='reg_bu', lambda_reg=100)\n",
+ "# ]\n",
+ "# hrec = HierarchicalReconciliation(reconcilers=reconcilers)\n",
+ "# Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_train_df, \n",
+ "# S=S_df, tags=tags)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7109676b",
+ "metadata": {},
+ "source": [
+ "## 4. Evaluation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a524e1c2",
+ "metadata": {},
+ "source": [
+ "For reference we can check the performance when compared to that reported by previous hierarchical forecasting studies from the [PMM-CNN paper](https://arxiv.org/abs/2110.13179)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c0b2c842",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0821f68d",
+ "metadata": {},
+ "source": [
+ "## References"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ffa4a749",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "hierarchicalforecast",
+ "language": "python",
+ "name": "hierarchicalforecast"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/nbs/methods.ipynb b/nbs/methods.ipynb
index daec40a9..b4f4fd56 100644
--- a/nbs/methods.ipynb
+++ b/nbs/methods.ipynb
@@ -180,9 +180,7 @@
" if self.sampler is None:\n",
" raise Exception(\"This model instance does not have sampler. Call fit with `intervals_method`.\")\n",
"\n",
- " # [samples, N, H] -> [N, H, samples]\n",
" samples = self.sampler.get_samples(num_samples=num_samples)\n",
- " samples = samples.transpose((1, 2, 0))\n",
" return samples"
]
},
diff --git a/nbs/probabilistic_methods.ipynb b/nbs/probabilistic_methods.ipynb
index 0a028176..565cad80 100644
--- a/nbs/probabilistic_methods.ipynb
+++ b/nbs/probabilistic_methods.ipynb
@@ -50,7 +50,7 @@
"source": [
"#| hide\n",
"from nbdev.showdoc import add_docs, show_doc\n",
- "from fastcore.test import ExceptionExpected, test_close, test_eq\n",
+ "from fastcore.test import ExceptionExpected, test_close, test_eq, test_fail\n",
"\n",
"from hierarchicalforecast.methods import BottomUp, TopDown, MiddleOut, MinTrace, OptimalCombination, ERM"
]
@@ -138,6 +138,9 @@
" partial_samples = state.multivariate_normal(mean=self.SP @ self.y_hat[:,t],\n",
" cov=self.cov_rec[t], size=num_samples)\n",
" samples[:,:,t] = partial_samples\n",
+ "\n",
+ " # [samples, N, H] -> [N, H, samples]\n",
+ " samples = samples.transpose((1, 2, 0))\n",
" return samples\n",
"\n",
" def get_prediction_levels(self, res, level):\n",
@@ -256,7 +259,11 @@
" SP = self.S @ self.P\n",
" samples = np.apply_along_axis(lambda path: np.matmul(SP, path),\n",
" axis=1, arr=samples)\n",
- " return np.stack(samples)\n",
+ " samples = np.stack(samples)\n",
+ "\n",
+ " # [samples, N, H] -> [N, H, samples]\n",
+ " samples = samples.transpose((1, 2, 0))\n",
+ " return samples\n",
"\n",
" def get_prediction_levels(self, res, level):\n",
" \"\"\" Adds reconciled forecast levels to results dictionary \"\"\"\n",
@@ -264,8 +271,8 @@
" for lv in level:\n",
" min_q = (100 - lv) / 200\n",
" max_q = min_q + lv / 100\n",
- " res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=0)\n",
- " res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=0)\n",
+ " res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=2)\n",
+ " res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=2)\n",
" return res"
]
},
@@ -502,8 +509,7 @@
" )\n",
"\n",
" rec_samples[parent_idxs,:,:] = parent_samples\n",
- "\n",
- " return np.transpose(rec_samples, (2, 0, 1))\n",
+ " return rec_samples\n",
"\n",
" def get_prediction_levels(self, res, level):\n",
" \"\"\" Adds reconciled forecast levels to results dictionary \"\"\"\n",
@@ -511,8 +517,8 @@
" for lv in level:\n",
" min_q = (100 - lv) / 200\n",
" max_q = min_q + lv / 100\n",
- " res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=0)\n",
- " res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=0)\n",
+ " res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=2)\n",
+ " res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=2)\n",
" return res"
]
},
@@ -541,6 +547,8 @@
"outputs": [],
"source": [
"#| hide\n",
+ "from hierarchicalforecast.evaluation import msse, energy_score, scaled_crps\n",
+ "\n",
"S = np.array([[1., 1., 1., 1.],\n",
" [1., 1., 0., 0.],\n",
" [0., 0., 1., 1.],\n",
@@ -568,7 +576,9 @@
"y_hat_base_insample = S @ y_hat_bottom_insample\n",
"sigma = np.nansum((y_base - y_hat_base_insample) ** 2, axis=1) / (y_base.shape[1] - 1)\n",
"sigma = np.sqrt(sigma)\n",
- "sigmah = sigma[:, None] * np.sqrt(np.vstack([np.arange(1, h + 1) for _ in range(y_base.shape[0])])) "
+ "sigmah = sigma[:, None] * np.sqrt(np.vstack([np.arange(1, h + 1) for _ in range(y_base.shape[0])]))\n",
+ "noise = np.random.normal(scale=sigmah)\n",
+ "y_test = y_hat_base + noise"
]
},
{
@@ -624,6 +634,43 @@
"test_eq(bootstrap_samples.shape, permbu_samples.shape)"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# test MSSE's execution\n",
+ "msse(y=y_test, y_hat=y_hat_base, y_train=y_base)\n",
+ "\n",
+ "# test energy score's execution\n",
+ "energy_score(y=y_test,\n",
+ " y_sample1=bootstrap_samples, y_sample2=permbu_samples)\n",
+ "\n",
+ "# test scaled CRPS' execution\n",
+ "quantiles = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])\n",
+ "bootstrap_quantiles = np.quantile(bootstrap_samples, q=quantiles, axis=2)\n",
+ "bootstrap_quantiles = bootstrap_quantiles.transpose((1,2,0)) # [Q,N,H] -> [N,H,Q]\n",
+ "scaled_crps(y=y_test, y_hat=bootstrap_quantiles, quantiles=quantiles)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# test quantile loss protections\n",
+ "quantiles = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.2])\n",
+ "test_fail(\n",
+ " scaled_crps,\n",
+ " contains='between 0 and 1',\n",
+ " args=(y_test, bootstrap_quantiles, quantiles), \n",
+ ")"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},