Nixtla · kdgutier · Dec 16, 2022 · Dec 15, 2022 · Dec 15, 2022 · Dec 15, 2022
diff --git a/hierarchicalforecast/_modidx.py b/hierarchicalforecast/_modidx.py
@@ -20,7 +20,19 @@
                                                  'hierarchicalforecast.evaluation.HierarchicalEvaluation.__init__': ( 'evaluation.html#hierarchicalevaluation.__init__',
                                                                                                                       'hierarchicalforecast/evaluation.py'),
                                                  'hierarchicalforecast.evaluation.HierarchicalEvaluation.evaluate': ( 'evaluation.html#hierarchicalevaluation.evaluate',
-                                                                                                                      'hierarchicalforecast/evaluation.py')},
+                                                                                                                      'hierarchicalforecast/evaluation.py'),
+                                                 'hierarchicalforecast.evaluation._metric_protections': ( 'evaluation.html#_metric_protections',
+                                                                                                          'hierarchicalforecast/evaluation.py'),
+                                                 'hierarchicalforecast.evaluation.energy_score': ( 'evaluation.html#energy_score',
+                                                                                                   'hierarchicalforecast/evaluation.py'),
+                                                 'hierarchicalforecast.evaluation.mqloss': ( 'evaluation.html#mqloss',
+                                                                                             'hierarchicalforecast/evaluation.py'),
+                                                 'hierarchicalforecast.evaluation.mse': ( 'evaluation.html#mse',
+                                                                                          'hierarchicalforecast/evaluation.py'),
+                                                 'hierarchicalforecast.evaluation.msse': ( 'evaluation.html#msse',
+                                                                                           'hierarchicalforecast/evaluation.py'),
+                                                 'hierarchicalforecast.evaluation.scaled_crps': ( 'evaluation.html#scaled_crps',
+                                                                                                  'hierarchicalforecast/evaluation.py')},
             'hierarchicalforecast.methods': { 'hierarchicalforecast.methods.BottomUp': ( 'methods.html#bottomup',
                                                                                          'hierarchicalforecast/methods.py'),
                                               'hierarchicalforecast.methods.BottomUp._get_PW_matrices': ( 'methods.html#bottomup._get_pw_matrices',

diff --git a/hierarchicalforecast/core.py b/hierarchicalforecast/core.py
@@ -38,7 +38,7 @@ def _build_fn_name(fn) -> str:
 def _reverse_engineer_sigmah(Y_hat_df, y_hat, model_name, uids):
     """
     This function assumes that the model creates prediction intervals
-    under a normality assumption with the following the Equation:
+    under a normality with the following the Equation:
     $\hat{y}_{t+h} + c \hat{sigma}_{h}$
 
     In the future, we might deprecate this function in favor of a 
@@ -93,8 +93,8 @@ def reconcile(self,
                   intervals_method: str = 'normality'):
         """Hierarchical Reconciliation Method.
 
-        The `reconcile` method is analogous to SKLearn `fit` method, it applies different 
-        reconciliation methods instantiated in the `reconcilers` list. 
+        The `reconcile` method is analogous to SKLearn `fit_predict` method, it 
+        applies different reconciliation methods instantiated in the `reconcilers` list. 
 
         Most reconciliation methods can be described by the following convenient 
         linear algebra notation:
@@ -207,9 +207,6 @@ def reconcile(self,
                 end = time.time()
                 self.execution_times[f'{model_name}/{reconcile_fn_name}'] = (end - start)
 
-                end = time.time()
-                self.execution_times[f'{model_name}/{reconcile_fn_name}'] = (end - start)
-
                 if self.insample and has_fitted:
                     del reconciler_args['y_hat_insample']
                 gc.collect()

diff --git a/hierarchicalforecast/evaluation.py b/hierarchicalforecast/evaluation.py
@@ -1,17 +1,235 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/evaluation.ipynb.
 
 # %% auto 0
-__all__ = ['HierarchicalEvaluation']
+__all__ = ['msse', 'scaled_crps', 'energy_score', 'HierarchicalEvaluation']
 
-# %% ../nbs/evaluation.ipynb 2
+# %% ../nbs/evaluation.ipynb 3
 from inspect import signature
-from typing import Callable, Dict, List, Optional
+from typing import Callable, Dict, List, Optional, Union
 
 import numpy as np
 import pandas as pd
 
-# %% ../nbs/evaluation.ipynb 5
-class HierarchicalEvaluation:    
+# %% ../nbs/evaluation.ipynb 6
+def _metric_protections(y: np.ndarray, y_hat: np.ndarray, 
+                        weights: Optional[np.ndarray]) -> None:
+    if not ((weights is None) or (np.sum(weights) > 0)):
+        raise Exception('Sum of `weights` cannot be 0')
+    if not ((weights is None) or (weights.shape == y.shape)):
+        raise Exception(
+        f'Wrong weight dimension weights.shape {weights.shape}, y.shape {y.shape}')
+
+def mse(y: np.ndarray, y_hat: np.ndarray, 
+        weights: Optional[np.ndarray] = None,
+        axis: Optional[int] = None) -> Union[float, np.ndarray]:
+    """Mean Squared Error
+
+    Calculates Mean Squared Error between
+    `y` and `y_hat`. MSE measures the relative prediction
+    accuracy of a forecasting method by calculating the 
+    squared deviation of the prediction and the true
+    value at a given time, and averages these devations
+    over the length of the series.
+
+    $$ \mathrm{MSE}(\\mathbf{y}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}) = \\frac{1}{H} \\sum^{t+H}_{\\tau=t+1} (y_{\\tau} - \hat{y}_{\\tau})^{2} $$
+
+    **Parameters:**<br>
+    `y`: numpy array, Actual values.<br>
+    `y_hat`: numpy array, Predicted values.<br>
+    `mask`: numpy array, Specifies date stamps per serie to consider in loss.<br>
+
+    **Returns:**<br>
+    `mse`: numpy array, (single value).
+    """
+    _metric_protections(y, y_hat, weights)
+
+    delta_y = np.square(y - y_hat)
+    if weights is not None:
+        mse = np.average(delta_y[~np.isnan(delta_y)],
+                         weights=weights[~np.isnan(delta_y)],
+                         axis=axis)
+    else:
+        mse = np.nanmean(delta_y, axis=axis)
+    return mse
+
+def mqloss(y: np.ndarray, y_hat: np.ndarray, 
+           quantiles: np.ndarray, 
+           weights: Optional[np.ndarray] = None,
+           axis: Optional[int] = None) -> Union[float, np.ndarray]:
+    """Multi-Quantile Loss
+
+    Calculates the Multi-Quantile loss (MQL) between `y` and `y_hat`.
+    MQL calculates the average multi-quantile Loss for
+    a given set of quantiles, based on the absolute 
+    difference between predicted quantiles and observed values.
+
+    $$ \mathrm{MQL}(\\mathbf{y}_{\\tau},[\\mathbf{\hat{y}}^{(q_{1})}_{\\tau}, ... ,\hat{y}^{(q_{n})}_{\\tau}]) = \\frac{1}{n} \\sum_{q_{i}} \mathrm{QL}(\\mathbf{y}_{\\tau}, \\mathbf{\hat{y}}^{(q_{i})}_{\\tau}) $$
+
+    The limit behavior of MQL allows to measure the accuracy 
+    of a full predictive distribution $\mathbf{\hat{F}}_{\\tau}$ with 
+    the continuous ranked probability score (CRPS). This can be achieved 
+    through a numerical integration technique, that discretizes the quantiles 
+    and treats the CRPS integral with a left Riemann approximation, averaging over 
+    uniformly distanced quantiles.    
+
+    $$ \mathrm{CRPS}(y_{\\tau}, \mathbf{\hat{F}}_{\\tau}) = \int^{1}_{0} \mathrm{QL}(y_{\\tau}, \hat{y}^{(q)}_{\\tau}) dq $$
+
+    **Parameters:**<br>
+    `y`: numpy array, Actual values.<br>
+    `y_hat`: numpy array, Predicted values.<br>
+    `quantiles`: numpy array. Quantiles between 0 and 1, to perform evaluation upon size (n_quantiles).<br>
+    `mask`: numpy array, Specifies date stamps per serie to consider in loss.<br>
+
+    **Returns:**<br>
+    `mqloss`: numpy array, (single value).
+
+    **References:**<br>
+    [Roger Koenker and Gilbert Bassett, Jr., "Regression Quantiles".](https://www.jstor.org/stable/1913643)<br>
+    [James E. Matheson and Robert L. Winkler, "Scoring Rules for Continuous Probability Distributions".](https://www.jstor.org/stable/2629907)
+    """
+    if weights is None: weights = np.ones(y.shape)
+    if (np.sum(quantiles>1)>0 or np.sum(quantiles<0)>0):
+        raise Exception('`quantiles` need to be between 0 and 1')
+
+    _metric_protections(y, y_hat, weights)
+    n_q = len(quantiles)
+
+    y_rep  = np.expand_dims(y, axis=-1)
+    error  = y_hat - y_rep
+    sq     = np.maximum(-error, np.zeros_like(error))
+    s1_q   = np.maximum(error, np.zeros_like(error))
+    mqloss = (quantiles * sq + (1 - quantiles) * s1_q)
+
+    # Match y/weights dimensions and compute weighted average
+    weights = np.repeat(np.expand_dims(weights, axis=-1), repeats=n_q, axis=-1)
+    mqloss  = np.average(mqloss, weights=weights, axis=axis)
+
+    return mqloss
+
+# %% ../nbs/evaluation.ipynb 8
+def msse(y, y_hat, y_train, mask=None):
+    """Mean Squared Scaled Error
+
+    Computes Mean squared scaled error (MSSE), as proposed by Hyndman & Koehler (2006)
+    as an alternative to percentage errors, to avoid measure unstability.
+
+    $$ \mathrm{MSSE}(\\mathbf{y}, \\mathbf{\hat{y}}, \\mathbf{\hat{y}}^{naive1}) =
+    \\frac{\mathrm{MSE}(\\mathbf{y}, \\mathbf{\hat{y}})}{\mathrm{MSE}(\\mathbf{y}, \\mathbf{\hat{y}}^{naive1})} $$
+
+    **Parameters:**<br>
+    `y`: numpy array, Actual values of size (`n_series`, `horizon`).<br>
+    `y_hat`: numpy array, Predicted values (`n_series`, `horizon`).<br>
+    `mask`: numpy array, Specifies date stamps per serie to consider in loss.<br>
+
+    **Returns:**<br>
+    `loss`: float.    
+
+    **References:**<br>
+    - [Hyndman, R. J and Koehler, A. B. (2006).
+       "Another look at measures of forecast accuracy",
+       International Journal of Forecasting, Volume 22, Issue 4.](https://www.sciencedirect.com/science/article/pii/S0169207006000239)
+    - [Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker. 
+       "Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. 
+       Submitted to the International Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)
+    """
+    if mask is None: 
+       mask = np.ones_like(y)
+    n_series, horizon = y.shape
+
+    eps = np.finfo(float).eps
+    y_naive = np.repeat(y_train[:,[-1]], horizon, axis=1)
+    norm = mse(y=y, y_hat=y_naive)
+    loss = mse(y=y, y_hat=y_hat, weights=mask)
+    loss = loss / (norm + eps)
+    return loss
+
+# %% ../nbs/evaluation.ipynb 11
+def scaled_crps(y, y_hat, quantiles):
+    """Scaled Continues Ranked Probability Score
+
+    Calculates a scaled variation of the CRPS, as proposed by Rangapuram (2021),
+    to measure the accuracy of predicted quantiles `y_hat` compared to the observation `y`.
+
+    This metric averages percentual weighted absolute deviations as 
+    defined by the quantile losses.
+
+    $$ \mathrm{sCRPS}(\hat{F}_{\\tau}, \mathbf{y}_{\\tau}) = 2 \sum_{i}
+    \int^{1}_{0}
+    \\frac{\mathrm{QL}(\hat{F}_{i,\\tau}, y_{i,\\tau})_{q}}{\sum_{i} | y_{i,\\tau} |} dq $$
+
+    where $\hat{F}_{\\tau}$ is the an estimated multivariate distribution, and $y_{i,\\tau}$
+    are its realizations.
+
+    **Parameters:**<br>
+    `y`: numpy array, Actual values of size (`n_series`, `horizon`).<br>
+    `y_hat`: numpy array, Predicted quantiles of size (`n_series`, `horizon`, `n_quantiles`).<br>
+    `quantiles`: numpy array,(`n_quantiles`). Quantiles to estimate from the distribution of y.<br>
+
+    **Returns:**<br>
+    `loss`: float.
+
+    **References:**<br>
+    - [Gneiting, Tilmann. (2011). \"Quantiles as optimal point forecasts\". 
+    International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207010000063)
+    - [Spyros Makridakis, Evangelos Spiliotis, Vassilios Assimakopoulos, Zhi Chen, Anil Gaba, Ilia Tsetlin, Robert L. Winkler. (2022). 
+    \"The M5 uncertainty competition: Results, findings and conclusions\". 
+    International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207021001722)
+    - [Syama Sundar Rangapuram, Lucien D Werner, Konstantinos Benidis, Pedro Mercado, Jan Gasthaus, Tim Januschowski. (2021). 
+    \"End-to-End Learning of Coherent Probabilistic Forecasts for Hierarchical Time Series\". 
+    Proceedings of the 38th International Conference on Machine Learning (ICML).](https://proceedings.mlr.press/v139/rangapuram21a.html)
+    """
+    eps = np.finfo(float).eps
+    norm  = np.sum(np.abs(y))
+    loss  = mqloss(y=y, y_hat=y_hat, quantiles=quantiles)
+    loss  = 2 * loss * np.sum(np.ones(y.shape)) / (norm + eps)
+    return loss
+
+# %% ../nbs/evaluation.ipynb 14
+def energy_score(y, y_sample1, y_sample2, beta=2):
+    """Energy Score
+
+    Calculates Gneiting's Energy Score sample approximation for
+    `y` and independent multivariate samples `y_sample1` and `y_sample2`.
+    The Energy Score generalizes the CRPS (`beta`=1) in the multivariate setting.
+
+    $$ \mathrm{ES}(\\mathbf{y}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}') 
+    = \\frac{1}{2} \mathbb{E}_{\hat{P}} \\left[ ||\\mathbf{\hat{y}}_{\\tau} - \\mathbf{\hat{y}}_{\\tau}'||^{\\beta} \\right]
+    -  \mathbb{E}_{\hat{P}} \\left[ ||\\mathbf{y}_{\\tau} - \\mathbf{\hat{y}}_{\\tau}||^{\\beta} \\right] 
+    \quad \\beta \in (0,2]$$
+
+    where $\\mathbf{\hat{y}}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}'$ are independent samples drawn from $\hat{P}$.
+
+    **Parameters:**<br>
+    `y`: numpy array, Actual values of size (`n_series`, `horizon`).<br>
+    `y_sample1`: numpy array, predictive distribution sample of size (`n_series`, `horizon`, `n_samples`).<br>
+    `y_sample2`: numpy array, predictive distribution sample of size (`n_series`, `horizon`, `n_samples`).<br>
+    `beta`: float in (0,2], defines the energy score's power for the euclidean metric.<br>
+
+    **Returns:**<br>
+    `loss`: float.
+
+    **References:**<br>
+    - [Gneiting, Tilmann, and Adrian E. Raftery. (2007). 
+    \"Strictly proper scoring rules, prediction and estimation\". 
+    Journal of the American Statistical Association.](https://sites.stat.washington.edu/raftery/Research/PDF/Gneiting2007jasa.pdf)
+    - [Anastasios Panagiotelis, Puwasala Gamakumara, George Athanasopoulos, Rob J. Hyndman. (2022). 
+    \"Probabilistic forecast reconciliation: Properties, evaluation and score optimisation\". 
+    European Journal of Operational Research.](https://www.sciencedirect.com/science/article/pii/S0377221722006087)    
+    """
+    if beta>2 or beta<0:
+        raise Exception("beta needs to be between 0 and 2.")
+
+    dif1 = (y_sample1 - y_sample2)
+    dif2 = (y[:,:,None] - y_sample1)
+
+    term1 = np.linalg.norm(dif1, axis=0) ** beta
+    term2 = np.linalg.norm(dif2, axis=0) ** beta
+
+    loss = np.mean(term2 - 0.5 * term1)
+    return loss
+
+# %% ../nbs/evaluation.ipynb 17
+class HierarchicalEvaluation:
     """Hierarchical Evaluation Class.
 
     You can use your own metrics to evaluate the performance of each level in the structure.
@@ -23,6 +241,8 @@ class HierarchicalEvaluation:
 
     **Parameters:**<br>
     `evaluators`: functions with arguments `y`, `y_hat` (numpy arrays).<br>
+
+    **References:**<br>
     """
     def __init__(self, 
                  evaluators: List[Callable]):

diff --git a/hierarchicalforecast/methods.py b/hierarchicalforecast/methods.py
@@ -108,9 +108,7 @@ def sample(self,
         if self.sampler is None:
             raise Exception("This model instance does not have sampler. Call fit with `intervals_method`.")
 
-        # [samples, N, H] -> [N, H, samples]
         samples = self.sampler.get_samples(num_samples=num_samples)
-        samples = samples.transpose((1, 2, 0))
         return samples
 
 # %% ../nbs/methods.ipynb 10

diff --git a/hierarchicalforecast/probabilistic_methods.py b/hierarchicalforecast/probabilistic_methods.py
@@ -82,6 +82,9 @@ def get_samples(self, num_samples: int=None):
             partial_samples = state.multivariate_normal(mean=self.SP @ self.y_hat[:,t],
                                                   cov=self.cov_rec[t], size=num_samples)
             samples[:,:,t] = partial_samples
+
+        # [samples, N, H] -> [N, H, samples]
+        samples = samples.transpose((1, 2, 0))
         return samples
 
     def get_prediction_levels(self, res, level):
@@ -168,16 +171,20 @@ def get_samples(self, num_samples: int=None):
         SP = self.S @ self.P
         samples = np.apply_along_axis(lambda path: np.matmul(SP, path),
                                       axis=1, arr=samples)
-        return np.stack(samples)
+        samples = np.stack(samples)
+
+        # [samples, N, H] -> [N, H, samples]
+        samples = samples.transpose((1, 2, 0))
+        return samples
 
     def get_prediction_levels(self, res, level):
         """ Adds reconciled forecast levels to results dictionary """
         samples = self.get_samples(num_samples=self.num_samples)
         for lv in level:
             min_q = (100 - lv) / 200
             max_q = min_q + lv / 100
-            res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=0)
-            res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=0)
+            res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=2)
+            res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=2)
         return res
 
 # %% ../nbs/probabilistic_methods.ipynb 14
@@ -382,15 +389,14 @@ def get_samples(self, num_samples: int=None):
             )
 
             rec_samples[parent_idxs,:,:] = parent_samples
-
-        return np.transpose(rec_samples, (2, 0, 1))
+        return rec_samples
 
     def get_prediction_levels(self, res, level):
         """ Adds reconciled forecast levels to results dictionary """
         samples = self.get_samples(num_samples=self.num_samples)
         for lv in level:
             min_q = (100 - lv) / 200
             max_q = min_q + lv / 100
-            res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=0)
-            res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=0)
+            res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=2)
+            res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=2)
         return res
diff --git a/nbs/core.ipynb b/nbs/core.ipynb
@@ -131,7 +131,7 @@
     "def _reverse_engineer_sigmah(Y_hat_df, y_hat, model_name, uids):\n",
     "    \"\"\"\n",
     "    This function assumes that the model creates prediction intervals\n",
-    "    under a normality assumption with the following the Equation:\n",
+    "    under a normality with the following the Equation:\n",
     "    $\\hat{y}_{t+h} + c \\hat{sigma}_{h}$\n",
     "\n",
     "    In the future, we might deprecate this function in favor of a \n",
@@ -193,8 +193,8 @@
     "                  intervals_method: str = 'normality'):\n",
     "        \"\"\"Hierarchical Reconciliation Method.\n",
     "\n",
-    "        The `reconcile` method is analogous to SKLearn `fit` method, it applies different \n",
-    "        reconciliation methods instantiated in the `reconcilers` list. \n",
+    "        The `reconcile` method is analogous to SKLearn `fit_predict` method, it \n",
+    "        applies different reconciliation methods instantiated in the `reconcilers` list. \n",
     "\n",
     "        Most reconciliation methods can be described by the following convenient \n",
     "        linear algebra notation:\n",
@@ -307,9 +307,6 @@
     "                end = time.time()\n",
     "                self.execution_times[f'{model_name}/{reconcile_fn_name}'] = (end - start)\n",
     "\n",
-    "                end = time.time()\n",
-    "                self.execution_times[f'{model_name}/{reconcile_fn_name}'] = (end - start)\n",
-    "\n",
     "                if self.insample and has_fitted:\n",
     "                    del reconciler_args['y_hat_insample']\n",
     "                gc.collect()\n",