alchemistry · xiki-tempula · Jun 6, 2023 · Jun 3, 2023 · Jun 3, 2023 · Jun 3, 2023
diff --git a/CHANGES b/CHANGES
@@ -23,10 +23,15 @@ Changes
 Enhancements
   - Add a parser to read serialised pandas dataframe (parquet) (issue #316, PR#317).
   - workflow.ABFE allow parquet as input (issue #316, PR#317).
+  - Allow MBAR estimator to use bootstrap to compute error (issue #320, PR#322).
 
 Fixes
   - Fix the case where visualisation.plot_convergence would fail when the final
-   error is NaN (issue #318, PR#319).
+    error is NaN (issue #318, PR#319).
+
+DeprecationWarning
+  - The default MBAR error estimator in workflow.ABFE.estimate will change from
+  analytic to bootstrap=50 (issue #320, PR#322).
 
 
 06/04/2023 xiki-tempula

diff --git a/src/alchemlyb/estimators/mbar_.py b/src/alchemlyb/estimators/mbar_.py
@@ -26,6 +26,10 @@ class MBAR(BaseEstimator, _EstimatorMixOut):
         available via :func:`scipy.optimize.minimize` or
         :func:`scipy.optimize.root`.
 
+    n_bootstraps : int, optional
+        Whether to use bootstrap to estimate uncertainty. `0` means use analytic error
+        estimation. 50~200 is a reasonable range to do bootstrap.
+
     verbose : bool, optional
         Set to ``True`` if verbose debug output from :mod:`pymbar` is desired.
 
@@ -59,6 +63,8 @@ class MBAR(BaseEstimator, _EstimatorMixOut):
        `delta_f_`, `d_delta_f_`, `states_` are view of the original object.
     .. versionchanged:: 2.0.0
         default value for `method` was changed from "hybr" to "robust"
+    .. versionchanged:: 2.1.0
+        `n_bootstraps` option added.
     """
 
     def __init__(
@@ -67,13 +73,15 @@ def __init__(
         relative_tolerance=1.0e-7,
         initial_f_k=None,
         method="robust",
+        n_bootstraps=0,
         verbose=False,
     ):
         self.maximum_iterations = maximum_iterations
         self.relative_tolerance = relative_tolerance
         self.initial_f_k = initial_f_k
         self.method = method
         self.verbose = verbose
+        self.n_bootstraps = n_bootstraps
 
         # handle for pymbar.MBAR object
         self._mbar = None
@@ -108,8 +116,15 @@ def fit(self, u_nk):
             verbose=self.verbose,
             initial_f_k=self.initial_f_k,
             solver_protocol=self.method,
+            n_bootstraps=self.n_bootstraps,
+        )
+        if self.n_bootstraps == 0:
+            uncertainty_method = None
+        else:
+            uncertainty_method = "bootstrap"
+        out = self._mbar.compute_free_energy_differences(
+            return_theta=True, uncertainty_method=uncertainty_method
         )
-        out = self._mbar.compute_free_energy_differences(return_theta=True)
         self._delta_f_ = pd.DataFrame(
             out["Delta_f"], columns=self._states_, index=self._states_
         )

diff --git a/src/alchemlyb/tests/test_fep_estimators.py b/src/alchemlyb/tests/test_fep_estimators.py
@@ -134,3 +134,19 @@ def test_states_(self, estimator):
         _estimator = estimator()
         with pytest.raises(AttributeError):
             _estimator.states_ = 1
+
+
+def test_bootstrap(gmx_benzene_Coulomb_u_nk):
+    u_nk = alchemlyb.concat(gmx_benzene_Coulomb_u_nk)
+    mbar = MBAR(n_bootstraps=2)
+    mbar.fit(u_nk)
+    mbar_bootstrap_mean = mbar.delta_f_.loc[0.00, 1.00]
+    mbar_bootstrap_err = mbar.d_delta_f_.loc[0.00, 1.00]
+
+    mbar = MBAR()
+    mbar.fit(u_nk)
+    mbar_mean = mbar.delta_f_.loc[0.00, 1.00]
+    mbar_err = mbar.d_delta_f_.loc[0.00, 1.00]
+
+    assert mbar_bootstrap_mean == mbar_mean
+    assert mbar_bootstrap_err != mbar_err
diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py
@@ -240,6 +240,17 @@ def test_single_estimator_mbar(self, workflow, monkeypatch):
         summary = workflow.generate_result()
         assert np.isclose(summary["MBAR"]["Stages"]["TOTAL"], 21.645742066696315, 0.1)
 
+    def test_mbar_n_bootstraps(self, workflow, monkeypatch):
+        monkeypatch.setattr(workflow, "estimator", dict())
+        workflow.estimate(estimators="MBAR", n_bootstraps=2)
+        summary = workflow.generate_result()
+        bootstrap_error = summary["MBAR_Error"]["Stages"]["TOTAL"]
+        monkeypatch.setattr(workflow, "estimator", dict())
+        workflow.estimate(estimators="MBAR", n_bootstraps=0)
+        summary = workflow.generate_result()
+        non_bootstrap_error = summary["MBAR_Error"]["Stages"]["TOTAL"]
+        assert bootstrap_error != non_bootstrap_error
+
     def test_single_estimator_ti(self, workflow, monkeypatch):
         monkeypatch.setattr(workflow, "estimator", dict())
         monkeypatch.setattr(workflow, "summary", None)

diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py
@@ -403,6 +403,9 @@ def estimate(self, estimators=("MBAR", "BAR", "TI"), **kwargs):
             'MBAR']. Note that the estimators are in their original form where
             no unit conversion has been attempted.
 
+        .. versionchanged:: 2.1.0
+        DeprecationWarning for using analytic error for MBAR estimator.
+
         """
         # Make estimators into a tuple
         if isinstance(estimators, str):
@@ -436,6 +439,10 @@ def estimate(self, estimators=("MBAR", "BAR", "TI"), **kwargs):
         for estimator in estimators:
             if estimator == "MBAR":
                 logger.info("Run MBAR estimator.")
+                warnings.warn(
+                    "From 2.2.0, n_bootstraps=50 will be the default for estimating MBAR error.",
+                    DeprecationWarning,
+                )
                 self.estimator[estimator] = MBAR(**kwargs).fit(u_nk)
             elif estimator == "BAR":
                 logger.info("Run BAR estimator.")