diff --git a/hierarchicalforecast/_modidx.py b/hierarchicalforecast/_modidx.py index 9c31fd68..3b2958f5 100644 --- a/hierarchicalforecast/_modidx.py +++ b/hierarchicalforecast/_modidx.py @@ -20,7 +20,19 @@ 'hierarchicalforecast.evaluation.HierarchicalEvaluation.__init__': ( 'evaluation.html#hierarchicalevaluation.__init__', 'hierarchicalforecast/evaluation.py'), 'hierarchicalforecast.evaluation.HierarchicalEvaluation.evaluate': ( 'evaluation.html#hierarchicalevaluation.evaluate', - 'hierarchicalforecast/evaluation.py')}, + 'hierarchicalforecast/evaluation.py'), + 'hierarchicalforecast.evaluation._metric_protections': ( 'evaluation.html#_metric_protections', + 'hierarchicalforecast/evaluation.py'), + 'hierarchicalforecast.evaluation.energy_score': ( 'evaluation.html#energy_score', + 'hierarchicalforecast/evaluation.py'), + 'hierarchicalforecast.evaluation.mqloss': ( 'evaluation.html#mqloss', + 'hierarchicalforecast/evaluation.py'), + 'hierarchicalforecast.evaluation.mse': ( 'evaluation.html#mse', + 'hierarchicalforecast/evaluation.py'), + 'hierarchicalforecast.evaluation.msse': ( 'evaluation.html#msse', + 'hierarchicalforecast/evaluation.py'), + 'hierarchicalforecast.evaluation.scaled_crps': ( 'evaluation.html#scaled_crps', + 'hierarchicalforecast/evaluation.py')}, 'hierarchicalforecast.methods': { 'hierarchicalforecast.methods.BottomUp': ( 'methods.html#bottomup', 'hierarchicalforecast/methods.py'), 'hierarchicalforecast.methods.BottomUp._get_PW_matrices': ( 'methods.html#bottomup._get_pw_matrices', diff --git a/hierarchicalforecast/core.py b/hierarchicalforecast/core.py index bf58938a..fa804235 100644 --- a/hierarchicalforecast/core.py +++ b/hierarchicalforecast/core.py @@ -38,7 +38,7 @@ def _build_fn_name(fn) -> str: def _reverse_engineer_sigmah(Y_hat_df, y_hat, model_name, uids): """ This function assumes that the model creates prediction intervals - under a normality assumption with the following the Equation: + under a normality with the following the Equation: $\hat{y}_{t+h} + c \hat{sigma}_{h}$ In the future, we might deprecate this function in favor of a @@ -93,8 +93,8 @@ def reconcile(self, intervals_method: str = 'normality'): """Hierarchical Reconciliation Method. - The `reconcile` method is analogous to SKLearn `fit` method, it applies different - reconciliation methods instantiated in the `reconcilers` list. + The `reconcile` method is analogous to SKLearn `fit_predict` method, it + applies different reconciliation methods instantiated in the `reconcilers` list. Most reconciliation methods can be described by the following convenient linear algebra notation: @@ -207,9 +207,6 @@ def reconcile(self, end = time.time() self.execution_times[f'{model_name}/{reconcile_fn_name}'] = (end - start) - end = time.time() - self.execution_times[f'{model_name}/{reconcile_fn_name}'] = (end - start) - if self.insample and has_fitted: del reconciler_args['y_hat_insample'] gc.collect() diff --git a/hierarchicalforecast/evaluation.py b/hierarchicalforecast/evaluation.py index 048d995e..296eabe4 100644 --- a/hierarchicalforecast/evaluation.py +++ b/hierarchicalforecast/evaluation.py @@ -1,17 +1,235 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/evaluation.ipynb. # %% auto 0 -__all__ = ['HierarchicalEvaluation'] +__all__ = ['msse', 'scaled_crps', 'energy_score', 'HierarchicalEvaluation'] -# %% ../nbs/evaluation.ipynb 2 +# %% ../nbs/evaluation.ipynb 3 from inspect import signature -from typing import Callable, Dict, List, Optional +from typing import Callable, Dict, List, Optional, Union import numpy as np import pandas as pd -# %% ../nbs/evaluation.ipynb 5 -class HierarchicalEvaluation: +# %% ../nbs/evaluation.ipynb 6 +def _metric_protections(y: np.ndarray, y_hat: np.ndarray, + weights: Optional[np.ndarray]) -> None: + if not ((weights is None) or (np.sum(weights) > 0)): + raise Exception('Sum of `weights` cannot be 0') + if not ((weights is None) or (weights.shape == y.shape)): + raise Exception( + f'Wrong weight dimension weights.shape {weights.shape}, y.shape {y.shape}') + +def mse(y: np.ndarray, y_hat: np.ndarray, + weights: Optional[np.ndarray] = None, + axis: Optional[int] = None) -> Union[float, np.ndarray]: + """Mean Squared Error + + Calculates Mean Squared Error between + `y` and `y_hat`. MSE measures the relative prediction + accuracy of a forecasting method by calculating the + squared deviation of the prediction and the true + value at a given time, and averages these devations + over the length of the series. + + $$ \mathrm{MSE}(\\mathbf{y}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} (y_{\\tau} - \hat{y}_{\\tau})^{2} $$ + + **Parameters:**
+ `y`: numpy array, Actual values.
+ `y_hat`: numpy array, Predicted values.
+ `mask`: numpy array, Specifies date stamps per serie to consider in loss.
+ + **Returns:**
+ `mse`: numpy array, (single value). + """ + _metric_protections(y, y_hat, weights) + + delta_y = np.square(y - y_hat) + if weights is not None: + mse = np.average(delta_y[~np.isnan(delta_y)], + weights=weights[~np.isnan(delta_y)], + axis=axis) + else: + mse = np.nanmean(delta_y, axis=axis) + return mse + +def mqloss(y: np.ndarray, y_hat: np.ndarray, + quantiles: np.ndarray, + weights: Optional[np.ndarray] = None, + axis: Optional[int] = None) -> Union[float, np.ndarray]: + """Multi-Quantile Loss + + Calculates the Multi-Quantile loss (MQL) between `y` and `y_hat`. + MQL calculates the average multi-quantile Loss for + a given set of quantiles, based on the absolute + difference between predicted quantiles and observed values. + + $$ \mathrm{MQL}(\\mathbf{y}_{\\tau},[\\mathbf{\hat{y}}^{(q_{1})}_{\\tau}, ... ,\hat{y}^{(q_{n})}_{\\tau}]) = \\frac{1}{n} \\sum_{q_{i}} \mathrm{QL}(\\mathbf{y}_{\\tau}, \\mathbf{\hat{y}}^{(q_{i})}_{\\tau}) $$ + + The limit behavior of MQL allows to measure the accuracy + of a full predictive distribution $\mathbf{\hat{F}}_{\\tau}$ with + the continuous ranked probability score (CRPS). This can be achieved + through a numerical integration technique, that discretizes the quantiles + and treats the CRPS integral with a left Riemann approximation, averaging over + uniformly distanced quantiles. + + $$ \mathrm{CRPS}(y_{\\tau}, \mathbf{\hat{F}}_{\\tau}) = \int^{1}_{0} \mathrm{QL}(y_{\\tau}, \hat{y}^{(q)}_{\\tau}) dq $$ + + **Parameters:**
+ `y`: numpy array, Actual values.
+ `y_hat`: numpy array, Predicted values.
+ `quantiles`: numpy array. Quantiles between 0 and 1, to perform evaluation upon size (n_quantiles).
+ `mask`: numpy array, Specifies date stamps per serie to consider in loss.
+ + **Returns:**
+ `mqloss`: numpy array, (single value). + + **References:**
+ [Roger Koenker and Gilbert Bassett, Jr., "Regression Quantiles".](https://www.jstor.org/stable/1913643)
+ [James E. Matheson and Robert L. Winkler, "Scoring Rules for Continuous Probability Distributions".](https://www.jstor.org/stable/2629907) + """ + if weights is None: weights = np.ones(y.shape) + if (np.sum(quantiles>1)>0 or np.sum(quantiles<0)>0): + raise Exception('`quantiles` need to be between 0 and 1') + + _metric_protections(y, y_hat, weights) + n_q = len(quantiles) + + y_rep = np.expand_dims(y, axis=-1) + error = y_hat - y_rep + sq = np.maximum(-error, np.zeros_like(error)) + s1_q = np.maximum(error, np.zeros_like(error)) + mqloss = (quantiles * sq + (1 - quantiles) * s1_q) + + # Match y/weights dimensions and compute weighted average + weights = np.repeat(np.expand_dims(weights, axis=-1), repeats=n_q, axis=-1) + mqloss = np.average(mqloss, weights=weights, axis=axis) + + return mqloss + +# %% ../nbs/evaluation.ipynb 8 +def msse(y, y_hat, y_train, mask=None): + """Mean Squared Scaled Error + + Computes Mean squared scaled error (MSSE), as proposed by Hyndman & Koehler (2006) + as an alternative to percentage errors, to avoid measure unstability. + + $$ \mathrm{MSSE}(\\mathbf{y}, \\mathbf{\hat{y}}, \\mathbf{\hat{y}}^{naive1}) = + \\frac{\mathrm{MSE}(\\mathbf{y}, \\mathbf{\hat{y}})}{\mathrm{MSE}(\\mathbf{y}, \\mathbf{\hat{y}}^{naive1})} $$ + + **Parameters:**
+ `y`: numpy array, Actual values of size (`n_series`, `horizon`).
+ `y_hat`: numpy array, Predicted values (`n_series`, `horizon`).
+ `mask`: numpy array, Specifies date stamps per serie to consider in loss.
+ + **Returns:**
+ `loss`: float. + + **References:**
+ - [Hyndman, R. J and Koehler, A. B. (2006). + "Another look at measures of forecast accuracy", + International Journal of Forecasting, Volume 22, Issue 4.](https://www.sciencedirect.com/science/article/pii/S0169207006000239) + - [Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker. + "Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. + Submitted to the International Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf) + """ + if mask is None: + mask = np.ones_like(y) + n_series, horizon = y.shape + + eps = np.finfo(float).eps + y_naive = np.repeat(y_train[:,[-1]], horizon, axis=1) + norm = mse(y=y, y_hat=y_naive) + loss = mse(y=y, y_hat=y_hat, weights=mask) + loss = loss / (norm + eps) + return loss + +# %% ../nbs/evaluation.ipynb 11 +def scaled_crps(y, y_hat, quantiles): + """Scaled Continues Ranked Probability Score + + Calculates a scaled variation of the CRPS, as proposed by Rangapuram (2021), + to measure the accuracy of predicted quantiles `y_hat` compared to the observation `y`. + + This metric averages percentual weighted absolute deviations as + defined by the quantile losses. + + $$ \mathrm{sCRPS}(\hat{F}_{\\tau}, \mathbf{y}_{\\tau}) = 2 \sum_{i} + \int^{1}_{0} + \\frac{\mathrm{QL}(\hat{F}_{i,\\tau}, y_{i,\\tau})_{q}}{\sum_{i} | y_{i,\\tau} |} dq $$ + + where $\hat{F}_{\\tau}$ is the an estimated multivariate distribution, and $y_{i,\\tau}$ + are its realizations. + + **Parameters:**
+ `y`: numpy array, Actual values of size (`n_series`, `horizon`).
+ `y_hat`: numpy array, Predicted quantiles of size (`n_series`, `horizon`, `n_quantiles`).
+ `quantiles`: numpy array,(`n_quantiles`). Quantiles to estimate from the distribution of y.
+ + **Returns:**
+ `loss`: float. + + **References:**
+ - [Gneiting, Tilmann. (2011). \"Quantiles as optimal point forecasts\". + International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207010000063) + - [Spyros Makridakis, Evangelos Spiliotis, Vassilios Assimakopoulos, Zhi Chen, Anil Gaba, Ilia Tsetlin, Robert L. Winkler. (2022). + \"The M5 uncertainty competition: Results, findings and conclusions\". + International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207021001722) + - [Syama Sundar Rangapuram, Lucien D Werner, Konstantinos Benidis, Pedro Mercado, Jan Gasthaus, Tim Januschowski. (2021). + \"End-to-End Learning of Coherent Probabilistic Forecasts for Hierarchical Time Series\". + Proceedings of the 38th International Conference on Machine Learning (ICML).](https://proceedings.mlr.press/v139/rangapuram21a.html) + """ + eps = np.finfo(float).eps + norm = np.sum(np.abs(y)) + loss = mqloss(y=y, y_hat=y_hat, quantiles=quantiles) + loss = 2 * loss * np.sum(np.ones(y.shape)) / (norm + eps) + return loss + +# %% ../nbs/evaluation.ipynb 14 +def energy_score(y, y_sample1, y_sample2, beta=2): + """Energy Score + + Calculates Gneiting's Energy Score sample approximation for + `y` and independent multivariate samples `y_sample1` and `y_sample2`. + The Energy Score generalizes the CRPS (`beta`=1) in the multivariate setting. + + $$ \mathrm{ES}(\\mathbf{y}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}') + = \\frac{1}{2} \mathbb{E}_{\hat{P}} \\left[ ||\\mathbf{\hat{y}}_{\\tau} - \\mathbf{\hat{y}}_{\\tau}'||^{\\beta} \\right] + - \mathbb{E}_{\hat{P}} \\left[ ||\\mathbf{y}_{\\tau} - \\mathbf{\hat{y}}_{\\tau}||^{\\beta} \\right] + \quad \\beta \in (0,2]$$ + + where $\\mathbf{\hat{y}}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}'$ are independent samples drawn from $\hat{P}$. + + **Parameters:**
+ `y`: numpy array, Actual values of size (`n_series`, `horizon`).
+ `y_sample1`: numpy array, predictive distribution sample of size (`n_series`, `horizon`, `n_samples`).
+ `y_sample2`: numpy array, predictive distribution sample of size (`n_series`, `horizon`, `n_samples`).
+ `beta`: float in (0,2], defines the energy score's power for the euclidean metric.
+ + **Returns:**
+ `loss`: float. + + **References:**
+ - [Gneiting, Tilmann, and Adrian E. Raftery. (2007). + \"Strictly proper scoring rules, prediction and estimation\". + Journal of the American Statistical Association.](https://sites.stat.washington.edu/raftery/Research/PDF/Gneiting2007jasa.pdf) + - [Anastasios Panagiotelis, Puwasala Gamakumara, George Athanasopoulos, Rob J. Hyndman. (2022). + \"Probabilistic forecast reconciliation: Properties, evaluation and score optimisation\". + European Journal of Operational Research.](https://www.sciencedirect.com/science/article/pii/S0377221722006087) + """ + if beta>2 or beta<0: + raise Exception("beta needs to be between 0 and 2.") + + dif1 = (y_sample1 - y_sample2) + dif2 = (y[:,:,None] - y_sample1) + + term1 = np.linalg.norm(dif1, axis=0) ** beta + term2 = np.linalg.norm(dif2, axis=0) ** beta + + loss = np.mean(term2 - 0.5 * term1) + return loss + +# %% ../nbs/evaluation.ipynb 17 +class HierarchicalEvaluation: """Hierarchical Evaluation Class. You can use your own metrics to evaluate the performance of each level in the structure. @@ -23,6 +241,8 @@ class HierarchicalEvaluation: **Parameters:**
`evaluators`: functions with arguments `y`, `y_hat` (numpy arrays).
+ + **References:**
""" def __init__(self, evaluators: List[Callable]): diff --git a/hierarchicalforecast/methods.py b/hierarchicalforecast/methods.py index 4f4f1538..1a327b2c 100644 --- a/hierarchicalforecast/methods.py +++ b/hierarchicalforecast/methods.py @@ -108,9 +108,7 @@ def sample(self, if self.sampler is None: raise Exception("This model instance does not have sampler. Call fit with `intervals_method`.") - # [samples, N, H] -> [N, H, samples] samples = self.sampler.get_samples(num_samples=num_samples) - samples = samples.transpose((1, 2, 0)) return samples # %% ../nbs/methods.ipynb 10 diff --git a/hierarchicalforecast/probabilistic_methods.py b/hierarchicalforecast/probabilistic_methods.py index 5b089dd1..2c4f59ed 100644 --- a/hierarchicalforecast/probabilistic_methods.py +++ b/hierarchicalforecast/probabilistic_methods.py @@ -82,6 +82,9 @@ def get_samples(self, num_samples: int=None): partial_samples = state.multivariate_normal(mean=self.SP @ self.y_hat[:,t], cov=self.cov_rec[t], size=num_samples) samples[:,:,t] = partial_samples + + # [samples, N, H] -> [N, H, samples] + samples = samples.transpose((1, 2, 0)) return samples def get_prediction_levels(self, res, level): @@ -168,7 +171,11 @@ def get_samples(self, num_samples: int=None): SP = self.S @ self.P samples = np.apply_along_axis(lambda path: np.matmul(SP, path), axis=1, arr=samples) - return np.stack(samples) + samples = np.stack(samples) + + # [samples, N, H] -> [N, H, samples] + samples = samples.transpose((1, 2, 0)) + return samples def get_prediction_levels(self, res, level): """ Adds reconciled forecast levels to results dictionary """ @@ -176,8 +183,8 @@ def get_prediction_levels(self, res, level): for lv in level: min_q = (100 - lv) / 200 max_q = min_q + lv / 100 - res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=0) - res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=0) + res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=2) + res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=2) return res # %% ../nbs/probabilistic_methods.ipynb 14 @@ -382,8 +389,7 @@ def get_samples(self, num_samples: int=None): ) rec_samples[parent_idxs,:,:] = parent_samples - - return np.transpose(rec_samples, (2, 0, 1)) + return rec_samples def get_prediction_levels(self, res, level): """ Adds reconciled forecast levels to results dictionary """ @@ -391,6 +397,6 @@ def get_prediction_levels(self, res, level): for lv in level: min_q = (100 - lv) / 200 max_q = min_q + lv / 100 - res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=0) - res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=0) + res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=2) + res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=2) return res diff --git a/nbs/core.ipynb b/nbs/core.ipynb index 215de23e..6ae0dbd8 100644 --- a/nbs/core.ipynb +++ b/nbs/core.ipynb @@ -131,7 +131,7 @@ "def _reverse_engineer_sigmah(Y_hat_df, y_hat, model_name, uids):\n", " \"\"\"\n", " This function assumes that the model creates prediction intervals\n", - " under a normality assumption with the following the Equation:\n", + " under a normality with the following the Equation:\n", " $\\hat{y}_{t+h} + c \\hat{sigma}_{h}$\n", "\n", " In the future, we might deprecate this function in favor of a \n", @@ -193,8 +193,8 @@ " intervals_method: str = 'normality'):\n", " \"\"\"Hierarchical Reconciliation Method.\n", "\n", - " The `reconcile` method is analogous to SKLearn `fit` method, it applies different \n", - " reconciliation methods instantiated in the `reconcilers` list. \n", + " The `reconcile` method is analogous to SKLearn `fit_predict` method, it \n", + " applies different reconciliation methods instantiated in the `reconcilers` list. \n", "\n", " Most reconciliation methods can be described by the following convenient \n", " linear algebra notation:\n", @@ -307,9 +307,6 @@ " end = time.time()\n", " self.execution_times[f'{model_name}/{reconcile_fn_name}'] = (end - start)\n", "\n", - " end = time.time()\n", - " self.execution_times[f'{model_name}/{reconcile_fn_name}'] = (end - start)\n", - "\n", " if self.insample and has_fitted:\n", " del reconciler_args['y_hat_insample']\n", " gc.collect()\n", diff --git a/nbs/evaluation.ipynb b/nbs/evaluation.ipynb index 5a4b4c36..17c13f44 100644 --- a/nbs/evaluation.ipynb +++ b/nbs/evaluation.ipynb @@ -6,18 +6,25 @@ "metadata": {}, "outputs": [], "source": [ - "#| default_exp evaluation\n", - "# - [Anastasios Panagiotelis et. al. \"Probabilistic Forecast Reconciliation: Properties, Evaluation and Score Optimisation\"](https://www.monash.edu/business/ebs/research/publications/ebs/wp26-2020.pdf)" + "#| default_exp evaluation" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "# Hierarchical Evaluation \n", + "# Hierarchical Evaluation" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To assist the evaluation of hierarchical forecasting systems, we make available accuracy metrics along with the `HierarchicalEvaluation` module that facilitates the measurement of prediction's accuracy through the hierarchy levels. \n", "\n", - "> To assist on the evaluation of hierarchical systems we make available the `HierarchicalEvaluation` module\n", - "that facilitates the measurement of prediction's accuracy through the levels of a hierarchy.

" + "The available metrics include point and probabilistic multivariate scoring rules that were used in previous hierarchical forecasting studies." ] }, { @@ -28,7 +35,7 @@ "source": [ "#| export\n", "from inspect import signature\n", - "from typing import Callable, Dict, List, Optional\n", + "from typing import Callable, Dict, List, Optional, Union\n", "\n", "import numpy as np\n", "import pandas as pd" @@ -45,6 +52,309 @@ "from nbdev.showdoc import add_docs, show_doc" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Accuracy Measurements " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| exporti\n", + "def _metric_protections(y: np.ndarray, y_hat: np.ndarray, \n", + " weights: Optional[np.ndarray]) -> None:\n", + " if not ((weights is None) or (np.sum(weights) > 0)):\n", + " raise Exception('Sum of `weights` cannot be 0')\n", + " if not ((weights is None) or (weights.shape == y.shape)):\n", + " raise Exception(\n", + " f'Wrong weight dimension weights.shape {weights.shape}, y.shape {y.shape}')\n", + "\n", + "def mse(y: np.ndarray, y_hat: np.ndarray, \n", + " weights: Optional[np.ndarray] = None,\n", + " axis: Optional[int] = None) -> Union[float, np.ndarray]:\n", + " \"\"\"Mean Squared Error\n", + "\n", + " Calculates Mean Squared Error between\n", + " `y` and `y_hat`. MSE measures the relative prediction\n", + " accuracy of a forecasting method by calculating the \n", + " squared deviation of the prediction and the true\n", + " value at a given time, and averages these devations\n", + " over the length of the series.\n", + "\n", + " $$ \\mathrm{MSE}(\\\\mathbf{y}_{\\\\tau}, \\\\mathbf{\\hat{y}}_{\\\\tau}) = \\\\frac{1}{H} \\\\sum^{t+H}_{\\\\tau=t+1} (y_{\\\\tau} - \\hat{y}_{\\\\tau})^{2} $$\n", + "\n", + " **Parameters:**
\n", + " `y`: numpy array, Actual values.
\n", + " `y_hat`: numpy array, Predicted values.
\n", + " `mask`: numpy array, Specifies date stamps per serie to consider in loss.
\n", + "\n", + " **Returns:**
\n", + " `mse`: numpy array, (single value).\n", + " \"\"\"\n", + " _metric_protections(y, y_hat, weights)\n", + "\n", + " delta_y = np.square(y - y_hat)\n", + " if weights is not None:\n", + " mse = np.average(delta_y[~np.isnan(delta_y)],\n", + " weights=weights[~np.isnan(delta_y)],\n", + " axis=axis)\n", + " else:\n", + " mse = np.nanmean(delta_y, axis=axis)\n", + " return mse\n", + "\n", + "def mqloss(y: np.ndarray, y_hat: np.ndarray, \n", + " quantiles: np.ndarray, \n", + " weights: Optional[np.ndarray] = None,\n", + " axis: Optional[int] = None) -> Union[float, np.ndarray]:\n", + " \"\"\"Multi-Quantile Loss\n", + "\n", + " Calculates the Multi-Quantile loss (MQL) between `y` and `y_hat`.\n", + " MQL calculates the average multi-quantile Loss for\n", + " a given set of quantiles, based on the absolute \n", + " difference between predicted quantiles and observed values.\n", + "\n", + " $$ \\mathrm{MQL}(\\\\mathbf{y}_{\\\\tau},[\\\\mathbf{\\hat{y}}^{(q_{1})}_{\\\\tau}, ... ,\\hat{y}^{(q_{n})}_{\\\\tau}]) = \\\\frac{1}{n} \\\\sum_{q_{i}} \\mathrm{QL}(\\\\mathbf{y}_{\\\\tau}, \\\\mathbf{\\hat{y}}^{(q_{i})}_{\\\\tau}) $$\n", + "\n", + " The limit behavior of MQL allows to measure the accuracy \n", + " of a full predictive distribution $\\mathbf{\\hat{F}}_{\\\\tau}$ with \n", + " the continuous ranked probability score (CRPS). This can be achieved \n", + " through a numerical integration technique, that discretizes the quantiles \n", + " and treats the CRPS integral with a left Riemann approximation, averaging over \n", + " uniformly distanced quantiles. \n", + "\n", + " $$ \\mathrm{CRPS}(y_{\\\\tau}, \\mathbf{\\hat{F}}_{\\\\tau}) = \\int^{1}_{0} \\mathrm{QL}(y_{\\\\tau}, \\hat{y}^{(q)}_{\\\\tau}) dq $$\n", + "\n", + " **Parameters:**
\n", + " `y`: numpy array, Actual values.
\n", + " `y_hat`: numpy array, Predicted values.
\n", + " `quantiles`: numpy array. Quantiles between 0 and 1, to perform evaluation upon size (n_quantiles).
\n", + " `mask`: numpy array, Specifies date stamps per serie to consider in loss.
\n", + " \n", + " **Returns:**
\n", + " `mqloss`: numpy array, (single value).\n", + "\n", + " **References:**
\n", + " [Roger Koenker and Gilbert Bassett, Jr., \"Regression Quantiles\".](https://www.jstor.org/stable/1913643)
\n", + " [James E. Matheson and Robert L. Winkler, \"Scoring Rules for Continuous Probability Distributions\".](https://www.jstor.org/stable/2629907)\n", + " \"\"\"\n", + " if weights is None: weights = np.ones(y.shape)\n", + " if (np.sum(quantiles>1)>0 or np.sum(quantiles<0)>0):\n", + " raise Exception('`quantiles` need to be between 0 and 1')\n", + " \n", + " _metric_protections(y, y_hat, weights)\n", + " n_q = len(quantiles)\n", + " \n", + " y_rep = np.expand_dims(y, axis=-1)\n", + " error = y_hat - y_rep\n", + " sq = np.maximum(-error, np.zeros_like(error))\n", + " s1_q = np.maximum(error, np.zeros_like(error))\n", + " mqloss = (quantiles * sq + (1 - quantiles) * s1_q)\n", + " \n", + " # Match y/weights dimensions and compute weighted average\n", + " weights = np.repeat(np.expand_dims(weights, axis=-1), repeats=n_q, axis=-1)\n", + " mqloss = np.average(mqloss, weights=weights, axis=axis)\n", + "\n", + " return mqloss" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Mean Squared Scaled Error" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "def msse(y, y_hat, y_train, mask=None):\n", + " \"\"\"Mean Squared Scaled Error\n", + "\n", + " Computes Mean squared scaled error (MSSE), as proposed by Hyndman & Koehler (2006)\n", + " as an alternative to percentage errors, to avoid measure unstability.\n", + "\n", + " $$ \\mathrm{MSSE}(\\\\mathbf{y}, \\\\mathbf{\\hat{y}}, \\\\mathbf{\\hat{y}}^{naive1}) =\n", + " \\\\frac{\\mathrm{MSE}(\\\\mathbf{y}, \\\\mathbf{\\hat{y}})}{\\mathrm{MSE}(\\\\mathbf{y}, \\\\mathbf{\\hat{y}}^{naive1})} $$\n", + "\n", + " **Parameters:**
\n", + " `y`: numpy array, Actual values of size (`n_series`, `horizon`).
\n", + " `y_hat`: numpy array, Predicted values (`n_series`, `horizon`).
\n", + " `mask`: numpy array, Specifies date stamps per serie to consider in loss.
\n", + "\n", + " **Returns:**
\n", + " `loss`: float. \n", + "\n", + " **References:**
\n", + " - [Hyndman, R. J and Koehler, A. B. (2006).\n", + " \"Another look at measures of forecast accuracy\",\n", + " International Journal of Forecasting, Volume 22, Issue 4.](https://www.sciencedirect.com/science/article/pii/S0169207006000239)\n", + " - [Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker. \n", + " \"Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. \n", + " Submitted to the International Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)\n", + " \"\"\"\n", + " if mask is None: \n", + " mask = np.ones_like(y)\n", + " n_series, horizon = y.shape\n", + "\n", + " eps = np.finfo(float).eps\n", + " y_naive = np.repeat(y_train[:,[-1]], horizon, axis=1)\n", + " norm = mse(y=y, y_hat=y_naive)\n", + " loss = mse(y=y, y_hat=y_hat, weights=mask)\n", + " loss = loss / (norm + eps)\n", + " return loss" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "show_doc(msse)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Scaled CRPS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "def scaled_crps(y, y_hat, quantiles):\n", + " \"\"\"Scaled Continues Ranked Probability Score\n", + "\n", + " Calculates a scaled variation of the CRPS, as proposed by Rangapuram (2021),\n", + " to measure the accuracy of predicted quantiles `y_hat` compared to the observation `y`.\n", + "\n", + " This metric averages percentual weighted absolute deviations as \n", + " defined by the quantile losses.\n", + "\n", + " $$ \\mathrm{sCRPS}(\\hat{F}_{\\\\tau}, \\mathbf{y}_{\\\\tau}) = 2 \\sum_{i}\n", + " \\int^{1}_{0}\n", + " \\\\frac{\\mathrm{QL}(\\hat{F}_{i,\\\\tau}, y_{i,\\\\tau})_{q}}{\\sum_{i} | y_{i,\\\\tau} |} dq $$\n", + "\n", + " where $\\hat{F}_{\\\\tau}$ is the an estimated multivariate distribution, and $y_{i,\\\\tau}$\n", + " are its realizations.\n", + "\n", + " **Parameters:**
\n", + " `y`: numpy array, Actual values of size (`n_series`, `horizon`).
\n", + " `y_hat`: numpy array, Predicted quantiles of size (`n_series`, `horizon`, `n_quantiles`).
\n", + " `quantiles`: numpy array,(`n_quantiles`). Quantiles to estimate from the distribution of y.
\n", + "\n", + " **Returns:**
\n", + " `loss`: float.\n", + "\n", + " **References:**
\n", + " - [Gneiting, Tilmann. (2011). \\\"Quantiles as optimal point forecasts\\\". \n", + " International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207010000063)\n", + " - [Spyros Makridakis, Evangelos Spiliotis, Vassilios Assimakopoulos, Zhi Chen, Anil Gaba, Ilia Tsetlin, Robert L. Winkler. (2022). \n", + " \\\"The M5 uncertainty competition: Results, findings and conclusions\\\". \n", + " International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207021001722)\n", + " - [Syama Sundar Rangapuram, Lucien D Werner, Konstantinos Benidis, Pedro Mercado, Jan Gasthaus, Tim Januschowski. (2021). \n", + " \\\"End-to-End Learning of Coherent Probabilistic Forecasts for Hierarchical Time Series\\\". \n", + " Proceedings of the 38th International Conference on Machine Learning (ICML).](https://proceedings.mlr.press/v139/rangapuram21a.html)\n", + " \"\"\"\n", + " eps = np.finfo(float).eps\n", + " norm = np.sum(np.abs(y))\n", + " loss = mqloss(y=y, y_hat=y_hat, quantiles=quantiles)\n", + " loss = 2 * loss * np.sum(np.ones(y.shape)) / (norm + eps)\n", + " return loss" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "show_doc(scaled_crps)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Energy Score" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "def energy_score(y, y_sample1, y_sample2, beta=2):\n", + " \"\"\"Energy Score\n", + "\n", + " Calculates Gneiting's Energy Score sample approximation for\n", + " `y` and independent multivariate samples `y_sample1` and `y_sample2`.\n", + " The Energy Score generalizes the CRPS (`beta`=1) in the multivariate setting.\n", + "\n", + " $$ \\mathrm{ES}(\\\\mathbf{y}_{\\\\tau}, \\\\mathbf{\\hat{y}}_{\\\\tau}, \\\\mathbf{\\hat{y}}_{\\\\tau}') \n", + " = \\\\frac{1}{2} \\mathbb{E}_{\\hat{P}} \\\\left[ ||\\\\mathbf{\\hat{y}}_{\\\\tau} - \\\\mathbf{\\hat{y}}_{\\\\tau}'||^{\\\\beta} \\\\right]\n", + " - \\mathbb{E}_{\\hat{P}} \\\\left[ ||\\\\mathbf{y}_{\\\\tau} - \\\\mathbf{\\hat{y}}_{\\\\tau}||^{\\\\beta} \\\\right] \n", + " \\quad \\\\beta \\in (0,2]$$\n", + "\n", + " where $\\\\mathbf{\\hat{y}}_{\\\\tau}, \\\\mathbf{\\hat{y}}_{\\\\tau}'$ are independent samples drawn from $\\hat{P}$.\n", + "\n", + " **Parameters:**
\n", + " `y`: numpy array, Actual values of size (`n_series`, `horizon`).
\n", + " `y_sample1`: numpy array, predictive distribution sample of size (`n_series`, `horizon`, `n_samples`).
\n", + " `y_sample2`: numpy array, predictive distribution sample of size (`n_series`, `horizon`, `n_samples`).
\n", + " `beta`: float in (0,2], defines the energy score's power for the euclidean metric.
\n", + "\n", + " **Returns:**
\n", + " `loss`: float.\n", + "\n", + " **References:**
\n", + " - [Gneiting, Tilmann, and Adrian E. Raftery. (2007). \n", + " \\\"Strictly proper scoring rules, prediction and estimation\\\". \n", + " Journal of the American Statistical Association.](https://sites.stat.washington.edu/raftery/Research/PDF/Gneiting2007jasa.pdf)\n", + " - [Anastasios Panagiotelis, Puwasala Gamakumara, George Athanasopoulos, Rob J. Hyndman. (2022). \n", + " \\\"Probabilistic forecast reconciliation: Properties, evaluation and score optimisation\\\". \n", + " European Journal of Operational Research.](https://www.sciencedirect.com/science/article/pii/S0377221722006087) \n", + " \"\"\"\n", + " if beta>2 or beta<0:\n", + " raise Exception(\"beta needs to be between 0 and 2.\")\n", + "\n", + " dif1 = (y_sample1 - y_sample2)\n", + " dif2 = (y[:,:,None] - y_sample1)\n", + "\n", + " term1 = np.linalg.norm(dif1, axis=0) ** beta\n", + " term2 = np.linalg.norm(dif2, axis=0) ** beta\n", + "\n", + " loss = np.mean(term2 - 0.5 * term1)\n", + " return loss" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "show_doc(energy_score)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -59,7 +369,7 @@ "outputs": [], "source": [ "#| export\n", - "class HierarchicalEvaluation: \n", + "class HierarchicalEvaluation:\n", " \"\"\"Hierarchical Evaluation Class.\n", " \n", " You can use your own metrics to evaluate the performance of each level in the structure.\n", @@ -71,6 +381,8 @@ "\n", " **Parameters:**
\n", " `evaluators`: functions with arguments `y`, `y_hat` (numpy arrays).
\n", + "\n", + " **References:**
\n", " \"\"\"\n", " def __init__(self, \n", " evaluators: List[Callable]):\n", @@ -182,6 +494,7 @@ "from hierarchicalforecast.core import HierarchicalReconciliation\n", "from hierarchicalforecast.methods import BottomUp, MinTrace, ERM\n", "from hierarchicalforecast.utils import aggregate\n", + "\n", "df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/tourism.csv')\n", "df = df.rename({'Trips': 'y', 'Quarter': 'ds'}, axis=1)\n", "df.insert(0, 'Country', 'Australia')\n", @@ -283,11 +596,42 @@ " Y_df=hier_grouped_df,\n", " benchmark='y_model')" ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# References \n", + "\n", + "- [Gneiting, Tilmann, and Adrian E. Raftery. (2007). \n", + "\\\"Strictly proper scoring rules, prediction and estimation\\\". \n", + "Journal of the American Statistical Association.](https://sites.stat.washington.edu/raftery/Research/PDF/Gneiting2007jasa.pdf)\n", + "- [Gneiting, Tilmann. (2011). \\\"Quantiles as optimal point forecasts\\\". \n", + "International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207010000063)\n", + "- [Spyros Makridakis, Evangelos Spiliotis, Vassilios Assimakopoulos, Zhi Chen, Anil Gaba, Ilia Tsetlin, Robert L. Winkler. (2022). \n", + "\\\"The M5 uncertainty competition: Results, findings and conclusions\\\". \n", + "International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207021001722)\n", + "- [Anastasios Panagiotelis, Puwasala Gamakumara, George Athanasopoulos, Rob J. Hyndman. (2022). \n", + "\\\"Probabilistic forecast reconciliation: Properties, evaluation and score optimisation\\\". \n", + "European Journal of Operational Research.](https://www.sciencedirect.com/science/article/pii/S0377221722006087)\n", + "- [Syama Sundar Rangapuram, Lucien D Werner, Konstantinos Benidis, Pedro Mercado, Jan Gasthaus, Tim Januschowski. (2021). \n", + "\\\"End-to-End Learning of Coherent Probabilistic Forecasts for Hierarchical Time Series\\\". \n", + "Proceedings of the 38th International Conference on Machine Learning (ICML).](https://proceedings.mlr.press/v139/rangapuram21a.html)\n", + "- [Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker (2022). \n", + "\"Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures\". \n", + "Submitted to the International Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "hierarchicalforecast", "language": "python", "name": "python3" } diff --git a/nbs/examples/TourismLarge-Evaluation.ipynb b/nbs/examples/TourismLarge-Evaluation.ipynb new file mode 100644 index 00000000..6d7a17a2 --- /dev/null +++ b/nbs/examples/TourismLarge-Evaluation.ipynb @@ -0,0 +1,261 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9a2ffba7", + "metadata": {}, + "source": [ + "# • End to End Walkthrough\n", + "\n", + "> Hierarchical Forecast's reconciliation and evaluation." + ] + }, + { + "cell_type": "markdown", + "id": "3ebf6eb9", + "metadata": {}, + "source": [ + "This notebook offers a step to step guide to create a hierarchical forecasting pipeline.\n", + "\n", + "In the pipeline we will use `HierarchicalForecast` and `StatsForecast` core class, to create base predictions, reconcile and evaluate them. \n", + "\n", + "We will use the TourismL dataset that summarizes large Australian national visitor survey.\n", + "\n", + "Outline\n", + "1. Installing Packages\n", + "2. Prepare TourismL dataset\n", + " - Read and aggregate\n", + " - StatsForecast's Base Predictions\n", + "3. Reconciliar\n", + "4. Evaluar" + ] + }, + { + "cell_type": "markdown", + "id": "d08fca30", + "metadata": {}, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "id": "690aea05", + "metadata": {}, + "source": [ + "## 1. Installing HierarchicalForecast" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "225fd9fe", + "metadata": {}, + "outputs": [], + "source": [ + "# %%capture\n", + "# !pip install hierarchicalforecast\n", + "# !pip install -U numba statsforecast datasetsforecast" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1624c79f", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from datasetsforecast.hierarchical import HierarchicalData\n", + "\n", + "from statsforecast.core import StatsForecast\n", + "from statsforecast.models import AutoARIMA, Naive\n", + "\n", + "from hierarchicalforecast.utils import HierarchicalPlot\n", + "from hierarchicalforecast.core import HierarchicalReconciliation\n", + "from hierarchicalforecast.evaluation import HierarchicalEvaluation\n", + "from hierarchicalforecast.methods import BottomUp, MinTrace, ERM" + ] + }, + { + "cell_type": "markdown", + "id": "58961ccd", + "metadata": {}, + "source": [ + "## 2. Preparing TourismL Dataset" + ] + }, + { + "cell_type": "markdown", + "id": "8d002ec0", + "metadata": {}, + "source": [ + "### 2.1 Read and Aggregate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0045589f", + "metadata": {}, + "outputs": [], + "source": [ + "Y_df, S_df, tags = HierarchicalData.load('./data', 'TourismLarge')\n", + "Y_df['ds'] = pd.to_datetime(Y_df['ds'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3958dc40", + "metadata": {}, + "outputs": [], + "source": [ + "hplot = HierarchicalPlot(S=S_df, tags=tags)\n", + "hplot.plot_summing_matrix()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da22dd3c", + "metadata": {}, + "outputs": [], + "source": [ + "Y_test_df = Y_df.groupby('unique_id').tail(12)\n", + "Y_train_df = Y_df.drop(Y_test_df.index)\n", + "\n", + "Y_test_df = Y_test_df.set_index('unique_id')\n", + "Y_train_df = Y_train_df.set_index('unique_id')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c56af1d9", + "metadata": {}, + "outputs": [], + "source": [ + "Y_train_df" + ] + }, + { + "cell_type": "markdown", + "id": "98fec0eb", + "metadata": {}, + "source": [ + "### 2.2 StatsForecast's Base Predictions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76df4d31", + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "if os.path.isfile('Y_hat.csv'):\n", + " Y_hat_df = pd.read_csv('Y_hat.csv')\n", + " Y_hat_df = Y_hat_df.set_index('unique_id')\n", + "else:\n", + " fcst = StatsForecast(\n", + " df=Y_train_df, \n", + " models=[AutoARIMA(season_length=12)],\n", + " fallback_model=[Naive()],\n", + " freq='M', \n", + " n_jobs=-1\n", + " )\n", + " Y_hat_df = fcst.forecast(h=12, level=[80])\n", + " Y_hat_df.to_csv('Y_hat.csv', index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "740a5ea8", + "metadata": {}, + "outputs": [], + "source": [ + "Y_hat_df" + ] + }, + { + "cell_type": "markdown", + "id": "e557adc7", + "metadata": {}, + "source": [ + "## 3. Reconciliate Predictions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1982db8", + "metadata": {}, + "outputs": [], + "source": [ + "# reconcilers = [\n", + "# BottomUp(),\n", + "# MinTrace(method='ols'),\n", + "# ERM(method='reg_bu', lambda_reg=100)\n", + "# ]\n", + "# hrec = HierarchicalReconciliation(reconcilers=reconcilers)\n", + "# Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_train_df, \n", + "# S=S_df, tags=tags)" + ] + }, + { + "cell_type": "markdown", + "id": "7109676b", + "metadata": {}, + "source": [ + "## 4. Evaluation" + ] + }, + { + "cell_type": "markdown", + "id": "a524e1c2", + "metadata": {}, + "source": [ + "For reference we can check the performance when compared to that reported by previous hierarchical forecasting studies from the [PMM-CNN paper](https://arxiv.org/abs/2110.13179)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0b2c842", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "0821f68d", + "metadata": {}, + "source": [ + "## References" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ffa4a749", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "hierarchicalforecast", + "language": "python", + "name": "hierarchicalforecast" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/nbs/methods.ipynb b/nbs/methods.ipynb index daec40a9..b4f4fd56 100644 --- a/nbs/methods.ipynb +++ b/nbs/methods.ipynb @@ -180,9 +180,7 @@ " if self.sampler is None:\n", " raise Exception(\"This model instance does not have sampler. Call fit with `intervals_method`.\")\n", "\n", - " # [samples, N, H] -> [N, H, samples]\n", " samples = self.sampler.get_samples(num_samples=num_samples)\n", - " samples = samples.transpose((1, 2, 0))\n", " return samples" ] }, diff --git a/nbs/probabilistic_methods.ipynb b/nbs/probabilistic_methods.ipynb index 0a028176..565cad80 100644 --- a/nbs/probabilistic_methods.ipynb +++ b/nbs/probabilistic_methods.ipynb @@ -50,7 +50,7 @@ "source": [ "#| hide\n", "from nbdev.showdoc import add_docs, show_doc\n", - "from fastcore.test import ExceptionExpected, test_close, test_eq\n", + "from fastcore.test import ExceptionExpected, test_close, test_eq, test_fail\n", "\n", "from hierarchicalforecast.methods import BottomUp, TopDown, MiddleOut, MinTrace, OptimalCombination, ERM" ] @@ -138,6 +138,9 @@ " partial_samples = state.multivariate_normal(mean=self.SP @ self.y_hat[:,t],\n", " cov=self.cov_rec[t], size=num_samples)\n", " samples[:,:,t] = partial_samples\n", + "\n", + " # [samples, N, H] -> [N, H, samples]\n", + " samples = samples.transpose((1, 2, 0))\n", " return samples\n", "\n", " def get_prediction_levels(self, res, level):\n", @@ -256,7 +259,11 @@ " SP = self.S @ self.P\n", " samples = np.apply_along_axis(lambda path: np.matmul(SP, path),\n", " axis=1, arr=samples)\n", - " return np.stack(samples)\n", + " samples = np.stack(samples)\n", + "\n", + " # [samples, N, H] -> [N, H, samples]\n", + " samples = samples.transpose((1, 2, 0))\n", + " return samples\n", "\n", " def get_prediction_levels(self, res, level):\n", " \"\"\" Adds reconciled forecast levels to results dictionary \"\"\"\n", @@ -264,8 +271,8 @@ " for lv in level:\n", " min_q = (100 - lv) / 200\n", " max_q = min_q + lv / 100\n", - " res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=0)\n", - " res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=0)\n", + " res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=2)\n", + " res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=2)\n", " return res" ] }, @@ -502,8 +509,7 @@ " )\n", "\n", " rec_samples[parent_idxs,:,:] = parent_samples\n", - "\n", - " return np.transpose(rec_samples, (2, 0, 1))\n", + " return rec_samples\n", "\n", " def get_prediction_levels(self, res, level):\n", " \"\"\" Adds reconciled forecast levels to results dictionary \"\"\"\n", @@ -511,8 +517,8 @@ " for lv in level:\n", " min_q = (100 - lv) / 200\n", " max_q = min_q + lv / 100\n", - " res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=0)\n", - " res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=0)\n", + " res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=2)\n", + " res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=2)\n", " return res" ] }, @@ -541,6 +547,8 @@ "outputs": [], "source": [ "#| hide\n", + "from hierarchicalforecast.evaluation import msse, energy_score, scaled_crps\n", + "\n", "S = np.array([[1., 1., 1., 1.],\n", " [1., 1., 0., 0.],\n", " [0., 0., 1., 1.],\n", @@ -568,7 +576,9 @@ "y_hat_base_insample = S @ y_hat_bottom_insample\n", "sigma = np.nansum((y_base - y_hat_base_insample) ** 2, axis=1) / (y_base.shape[1] - 1)\n", "sigma = np.sqrt(sigma)\n", - "sigmah = sigma[:, None] * np.sqrt(np.vstack([np.arange(1, h + 1) for _ in range(y_base.shape[0])])) " + "sigmah = sigma[:, None] * np.sqrt(np.vstack([np.arange(1, h + 1) for _ in range(y_base.shape[0])]))\n", + "noise = np.random.normal(scale=sigmah)\n", + "y_test = y_hat_base + noise" ] }, { @@ -624,6 +634,43 @@ "test_eq(bootstrap_samples.shape, permbu_samples.shape)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "# test MSSE's execution\n", + "msse(y=y_test, y_hat=y_hat_base, y_train=y_base)\n", + "\n", + "# test energy score's execution\n", + "energy_score(y=y_test,\n", + " y_sample1=bootstrap_samples, y_sample2=permbu_samples)\n", + "\n", + "# test scaled CRPS' execution\n", + "quantiles = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])\n", + "bootstrap_quantiles = np.quantile(bootstrap_samples, q=quantiles, axis=2)\n", + "bootstrap_quantiles = bootstrap_quantiles.transpose((1,2,0)) # [Q,N,H] -> [N,H,Q]\n", + "scaled_crps(y=y_test, y_hat=bootstrap_quantiles, quantiles=quantiles)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "# test quantile loss protections\n", + "quantiles = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.2])\n", + "test_fail(\n", + " scaled_crps,\n", + " contains='between 0 and 1',\n", + " args=(y_test, bootstrap_quantiles, quantiles), \n", + ")" + ] + }, { "cell_type": "markdown", "metadata": {},