pymc-labs · juanitorduz · May 8, 2024 · May 2, 2024 · May 2, 2024 · May 2, 2024
diff --git a/docs/source/notebooks/general/other_nuts_samplers.ipynb b/docs/source/notebooks/general/other_nuts_samplers.ipynb
@@ -5,6 +5,7 @@
    "id": "51e3591e",
    "metadata": {},
    "source": [
+    "(other_nuts_samplers)=\n",
     "# Other NUTS Samplers\n",
     "\n",
     "In this notebook we show how to fit a CLV model with other NUTS samplers. These alternative samplers can be significantly faster and also sample on the GPU.\n",

diff --git a/docs/source/notebooks/mmm/mmm_example.ipynb b/docs/source/notebooks/mmm/mmm_example.ipynb
diff --git a/pymc_marketing/mmm/base.py b/pymc_marketing/mmm/base.py
@@ -40,6 +40,7 @@
 from pymc_marketing.mmm.budget_optimizer import budget_allocator
 from pymc_marketing.mmm.transformers import michaelis_menten
 from pymc_marketing.mmm.utils import (
+    apply_sklearn_transformer_across_dim,
     estimate_menten_parameters,
     estimate_sigmoid_parameters,
     find_sigmoid_inflection_point,
@@ -337,6 +338,19 @@
     def plot_posterior_predictive(
         self, original_scale: bool = False, ax: plt.Axes = None, **plt_kwargs: Any
     ) -> plt.Figure:
+        """Plot posterior distribution from the model fit.
+
+        Parameters
+        ----------
+        original_scale : bool, optional
+            Whether to plot in the original scale.
+        ax : plt.Axes, optional
+            Matplotlib axis object.
+
+        Returns
+        -------
+        plt.Figure
+        """
         posterior_predictive_data: Dataset = self.posterior_predictive
         likelihood_hdi_94: DataArray = az.hdi(
             ary=posterior_predictive_data, hdi_prob=0.94
@@ -394,7 +408,9 @@
                 np.asarray(posterior_predictive_data.date),
                 target_to_plot,
                 color="black",
+                label="Observed",
             )
+            ax.legend()
             ax.set(
                 title="Posterior Predictive Check",
                 xlabel="date",
@@ -404,6 +420,97 @@
             raise RuntimeError("The model hasn't been fit yet, call .fit() first")
         return fig
 
+    def plot_errors(
+        self, original_scale: bool = False, ax: plt.Axes = None, **plt_kwargs: Any
+    ) -> plt.Figure:
+        """Plot model errors by taking the difference between true values and predicted.
+
+        Parameters
+        ----------
+        original_scale : bool, optional
+            Whether to plot in the original scale.
+        ax : plt.Axes, optional
+            Matplotlib axis object.
+
+        Returns
+        -------
+        plt.Figure
+        """
+        posterior_predictive_data: Dataset = self.posterior_predictive
+
+        target = np.asarray(
+            transform_1d_array(self.get_target_transformer().transform, self.y)
+        )
+
+        if len(target) != len(posterior_predictive_data.date):
+            raise ValueError(
+                "The length of the target variable doesn't match the length of the date column. "
+                "If you are computing out-of-sample errors, please overwrite `self.y` with the "
+                "corresponding (non-transformed) target variable."
+            )
+
+        target_broadcast = np.atleast_1d(target)[np.newaxis, np.newaxis, ...]
+        errors = target_broadcast - posterior_predictive_data
+
+        errors_hdi_94: DataArray = az.hdi(ary=errors, hdi_prob=0.94)[self.output_var]
+        errors_hdi_50: DataArray = az.hdi(ary=errors, hdi_prob=0.50)[self.output_var]
+
+        if original_scale:
+            errors = apply_sklearn_transformer_across_dim(
+                data=errors,
+                func=self.get_target_transformer().inverse_transform,
+                dim_name="date",
+            )
+
+            errors_hdi_94 = self.get_target_transformer().inverse_transform(
+                Xt=errors_hdi_94
+            )
+            errors_hdi_50 = self.get_target_transformer().inverse_transform(
+                Xt=errors_hdi_50
+            )
+
+        if ax is None:
+            fig, ax = plt.subplots(**plt_kwargs)
+        else:
+            fig = ax.figure
+
+        if self.X is not None and self.y is not None:
+            ax.fill_between(
+                x=posterior_predictive_data.date,
+                y1=errors_hdi_94[:, 0],
+                y2=errors_hdi_94[:, 1],
+                color="C3",
+                alpha=0.2,
+                label="$94\%$ HDI",  # noqa: W605
+            )
+
+            ax.fill_between(
+                x=posterior_predictive_data.date,
+                y1=errors_hdi_50[:, 0],
+                y2=errors_hdi_50[:, 1],
+                color="C3",
+                alpha=0.3,
+                label="$50\%$ HDI",  # noqa: W605
+            )
+
+            ax.plot(
+                posterior_predictive_data.date,
+                errors[self.output_var].mean(dim=("chain", "draw")).to_numpy(),
+                color="C3",
+                label="Errors Mean",
+            )
+
+            ax.axhline(y=0.0, linestyle="--", color="black", label="zero")
+            ax.legend()
+            ax.set(
+                title="Errors Posterior Distribution",
+                xlabel="date",
+                ylabel="true - predictions",
+            )
+        else:
+            raise RuntimeError("The model hasn't been fit yet, call .fit() first")
+        return fig
+
     def _format_model_contributions(self, var_contribution: str) -> DataArray:
         contributions = az.extract(
             self.fit_result,
@@ -1411,14 +1518,20 @@
         cumulative_contribution = 0
 
         for index, row in dataframe.iterrows():
-            color = "lightblue" if row["contribution"] >= 0 else "salmon"
+            color = "C0" if row["contribution"] >= 0 else "C3"
 
             bar_start = (
                 cumulative_contribution + row["contribution"]
                 if row["contribution"] < 0
                 else cumulative_contribution
             )
-            ax.barh(row["component"], row["contribution"], left=bar_start, color=color)
+            ax.barh(
+                row["component"],
+                row["contribution"],
+                left=bar_start,
+                color=color,
+                alpha=0.5,
+            )
 
             if row["contribution"] > 0:
                 cumulative_contribution += row["contribution"]

diff --git a/tests/mmm/test_plotting.py b/tests/mmm/test_plotting.py
@@ -104,6 +104,9 @@ class ToyMMM(BaseDelayedSaturatedMMM, MaxAbsScaleTarget):
             ("plot_posterior_predictive", {}),
             ("plot_posterior_predictive", {"original_scale": True}),
             ("plot_posterior_predictive", {"ax": plt.subplots()[1]}),
+            ("plot_errors", {}),
+            ("plot_errors", {"original_scale": True}),
+            ("plot_errors", {"ax": plt.subplots()[1]}),
             ("plot_components_contributions", {}),
             ("plot_channel_parameter", {"param_name": "alpha"}),
             ("plot_waterfall_components_decomposition", {"original_scale": True}),