blackjax-devs · albcab · May 25, 2023 · Apr 7, 2023 · Apr 7, 2023 · May 25, 2023
diff --git a/blackjax/__init__.py b/blackjax/__init__.py
@@ -21,6 +21,7 @@
     rmh,
     sghmc,
     sgld,
+    sgnht,
     tempered_smc,
     window_adaptation,
 )
@@ -42,6 +43,7 @@
     "ghmc",
     "sgld",  # stochastic gradient mcmc
     "sghmc",
+    "sgnht",
     "csgld",
     "window_adaptation",  # mcmc adaptation
     "meads_adaptation",

diff --git a/blackjax/kernels.py b/blackjax/kernels.py
@@ -41,6 +41,7 @@
     "sghmc",
     "meanfield_vi",
     "csgld",
+    "sgnht",
     "elliptical_slice",
     "meads_adaptation",
     "tempered_smc",
@@ -733,6 +734,84 @@ def step_fn(
         return MCMCSamplingAlgorithm(init_fn, step_fn)  # type: ignore[arg-type]
 
 
+class sgnht:
+    """Implements the (basic) user interface for the SGNHT kernel.
+
+    The general sgnht kernel (:meth:`blackjax.mcmc.sgnht.kernel`, alias
+    `blackjax.sgnht.kernel`) can be cumbersome to manipulate. Since most users
+    only need to specify the kernel parameters at initialization time, we
+    provide a helper function that specializes the general kernel.
+
+    Example
+    -------
+
+    To initialize a SGNHT kernel one needs to specify a schedule function, which
+    returns a step size at each sampling step, and a gradient estimator
+    function. Here for a constant step size, and `data_size` data samples:
+
+    .. code::
+
+        grad_estimator = blackjax.sgmcmc.gradients.grad_estimator(logprior_fn, loglikelihood_fn, data_size)
+
+    We can now initialize the sgnht kernel and the state.
+
+    .. code::
+
+        sgnht = blackjax.sgnht(grad_estimator)
+        state = sgnht.init(rng_key, position)
+
+    Assuming we have an iterator `batches` that yields batches of data we can
+    perform one step:
+
+    .. code::
+
+        step_size = 1e-3
+        minibatch = next(batches)
+        new_state = sgnht.step(rng_key, state, minibatch, step_size)
+
+    Kernels are not jit-compiled by default so you will need to do it manually:
+
+    .. code::
+
+       step = jax.jit(sgnht.step)
+       new_state = step(rng_key, state, minibatch, step_size)
+
+    Parameters
+    ----------
+    grad_estimator
+       A function that takes a position, a batch of data and returns an estimation
+       of the gradient of the log-density at this position.
+
+    Returns
+    -------
+    A ``MCMCSamplingAlgorithm``.
+
+    """
+
+    init = staticmethod(sgmcmc.sgnht.init)
+    kernel = staticmethod(sgmcmc.sgnht.kernel)
+
+    def __new__(  # type: ignore[misc]
+        cls,
+        grad_estimator: Callable,
+    ) -> MCMCSamplingAlgorithm:
+        step = cls.kernel()
+
+        def init_fn(position: PyTree, rng_key: PRNGKey):
+            return cls.init(rng_key, position)
+
+        def step_fn(rng_key: PRNGKey, state, minibatch: PyTree, step_size: float):
+            return step(
+                rng_key,
+                state,
+                grad_estimator,
+                minibatch,
+                step_size,
+            )
+
+        return MCMCSamplingAlgorithm(init_fn, step_fn)  # type: ignore[arg-type]
+
+
 # -----------------------------------------------------------------------------
 #                                 ADAPTATION
 # -----------------------------------------------------------------------------

diff --git a/blackjax/sgmcmc/__init__.py b/blackjax/sgmcmc/__init__.py
@@ -1,4 +1,4 @@
-from . import csgld, sghmc, sgld
+from . import csgld, sghmc, sgld, sgnht
 from .gradients import grad_estimator, logdensity_estimator
 
-__all__ = ["grad_estimator", "logdensity_estimator", "csgld", "sgld", "sghmc"]
+__all__ = ["grad_estimator", "logdensity_estimator", "csgld", "sgld", "sghmc", "sgnht"]
diff --git a/blackjax/sgmcmc/diffusions.py b/blackjax/sgmcmc/diffusions.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Solvers for Langevin diffusions."""
+import operator
+
 import jax
 import jax.numpy as jnp
 
 from blackjax.types import PRNGKey, PyTree
-from blackjax.util import generate_gaussian_noise
+from blackjax.util import generate_gaussian_noise, pytree_size
 
 __all__ = ["overdamped_langevin", "sghmc"]
 
@@ -51,7 +53,8 @@ def one_step(
 
 
 def sghmc(alpha: float = 0.01, beta: float = 0):
-    """Solver for the diffusion equation of the SGHMC algorithm :cite:p:`chen2014stochastic`.
+    """Euler solver for the diffusion equation of the SGHMC algorithm :cite:p:`chen2014stochastic`,
+    with parameters alpha and beta scaled according to :cite:p:`ma2015complete`.
 
     This algorithm was ported from :cite:p:`coullon2022sgmcmcjax`.
 
@@ -66,11 +69,13 @@ def one_step(
         temperature: float = 1.0,
     ):
         noise = generate_gaussian_noise(rng_key, position)
-        position = jax.tree_util.tree_map(lambda x, p: x + p, position, momentum)
+        position = jax.tree_util.tree_map(
+            lambda x, p: x + step_size * p, position, momentum
+        )
         momentum = jax.tree_util.tree_map(
-            lambda p, g, n: (1.0 - alpha) * p
+            lambda p, g, n: (1.0 - alpha * step_size) * p
             + step_size * g
-            + jnp.sqrt(2 * step_size * (alpha - beta) * temperature) * n,
+            + jnp.sqrt(step_size * (2 * alpha - step_size * beta) * temperature) * n,
             momentum,
             logdensity_grad,
             noise,
@@ -79,3 +84,41 @@ def one_step(
         return position, momentum
 
     return one_step
+
+
+def sgnht(alpha: float = 0.01, beta: float = 0):
+    """Euler solver for the diffusion equation of the SGNHT algorithm :cite:p:`ding2014bayesian`.
+
+    This algorithm was ported from :cite:p:`coullon2022sgmcmcjax`.
+
+    """
+
+    def one_step(
+        rng_key: PRNGKey,
+        position: PyTree,
+        momentum: PyTree,
+        xi: float,
+        logdensity_grad: PyTree,
+        step_size: float,
+        temperature: float = 1.0,
+    ):
+        noise = generate_gaussian_noise(rng_key, position)
+        position = jax.tree_util.tree_map(
+            lambda x, p: x + step_size * p, position, momentum
+        )
+        momentum = jax.tree_util.tree_map(
+            lambda p, g, n: (1.0 - xi * step_size) * p
+            + step_size * g
+            + jnp.sqrt(step_size * (2 * alpha - step_size * beta) * temperature) * n,
+            momentum,
+            logdensity_grad,
+            noise,
+        )
+        momentum_dot = jax.tree_util.tree_reduce(
+            operator.add, jax.tree_util.tree_map(lambda x: jnp.sum(x * x), momentum)
+        )
+        d = pytree_size(momentum)
+        xi = xi + step_size * (momentum_dot / d - 1)
+        return position, momentum, xi
+
+    return one_step
diff --git a/blackjax/sgmcmc/sghmc.py b/blackjax/sgmcmc/sghmc.py
@@ -45,7 +45,7 @@ def body_fn(state, rng_key):
             )
             return ((position, momentum), position)
 
-        momentum = generate_gaussian_noise(rng_key, position, 0, jnp.sqrt(step_size))
+        momentum = generate_gaussian_noise(rng_key, position)
         keys = jax.random.split(rng_key, num_integration_steps)
         (position, momentum), _ = jax.lax.scan(body_fn, (position, momentum), keys)
 

diff --git a/blackjax/sgmcmc/sgnht.py b/blackjax/sgmcmc/sgnht.py
@@ -0,0 +1,66 @@
+# Copyright 2020- The Blackjax Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Public API for the Stochastic gradient Nosé-Hoover Thermostat kernel."""
+from typing import Callable, NamedTuple
+
+import blackjax.sgmcmc.diffusions as diffusions
+from blackjax.types import PRNGKey, PyTree
+from blackjax.util import generate_gaussian_noise
+
+__all__ = ["SGNHTState", "init", "kernel"]
+
+
+class SGNHTState(NamedTuple):
+    r"""State of the SGNHT algorithm.
+
+    Parameters
+    ----------
+    position
+        Current position in the sample space.
+    momentum
+        Current momentum in the sample space.
+    xi
+        Scalar thermostat controlling kinetic energy.
+
+    """
+    position: PyTree
+    momentum: PyTree
+    xi: float
+
+
+def init(rng_key: PRNGKey, position: PyTree, alpha: float = 0.01):
+    momentum = generate_gaussian_noise(rng_key, position)
+    return SGNHTState(position, momentum, alpha)
+
+
+def kernel(alpha: float = 0.01, beta: float = 0) -> Callable:
+    """Stochastic gradient Nosé-Hoover Thermostat (SGNHT) algorithm."""
+    integrator = diffusions.sgnht(alpha, beta)
+
+    def one_step(
+        rng_key: PRNGKey,
+        state: SGNHTState,
+        grad_estimator: Callable,
+        minibatch: PyTree,
+        step_size: float,
+        temperature: float = 1.0,
+    ) -> PyTree:
+        position, momentum, xi = state
+        logdensity_grad = grad_estimator(position, minibatch)
+        position, momentum, xi = integrator(
+            rng_key, position, momentum, xi, logdensity_grad, step_size, temperature
+        )
+        return SGNHTState(position, momentum, xi)
+
+    return one_step
diff --git a/docs/refs.bib b/docs/refs.bib
@@ -327,3 +327,19 @@ @book{gelman2014bayesian
   year={2014},
   publisher={Chapman and Hall/CRC}
 }
+
+@article{ding2014bayesian,
+  title={Bayesian sampling using stochastic gradient thermostats},
+  author={Ding, Nan and Fang, Youhan and Babbush, Ryan and Chen, Changyou and Skeel, Robert D and Neven, Hartmut},
+  journal={Advances in neural information processing systems},
+  volume={27},
+  year={2014}
+}
+
+@article{ma2015complete,
+  title={A complete recipe for stochastic gradient MCMC},
+  author={Ma, Yi-An and Chen, Tianqi and Fox, Emily},
+  journal={Advances in neural information processing systems},
+  volume={28},
+  year={2015}
+}
diff --git a/tests/mcmc/test_sampling.py b/tests/mcmc/test_sampling.py
@@ -333,6 +333,46 @@ def test_linear_regression_sghmc_cv(self):
         data_batch = X_data[:100, :]
         _ = sghmc(rng_key, init_position, data_batch, 1e-3)
 
+    def test_linear_regression_sgnht(self):
+        rng_key, data_key = jax.random.split(self.key, 2)
+
+        data_size = 1000
+        X_data = jax.random.normal(data_key, shape=(data_size, 5))
+
+        grad_fn = blackjax.sgmcmc.grad_estimator(
+            self.logprior_fn, self.loglikelihood_fn, data_size
+        )
+        sgnht = blackjax.sgnht(grad_fn)
+
+        _, rng_key = jax.random.split(rng_key)
+        data_batch = X_data[100:200, :]
+        init_position = 1.0
+        data_batch = X_data[:100, :]
+        init_state = sgnht.init(init_position, self.key)
+        _ = sgnht.step(rng_key, init_state, data_batch, 1e-3)
+
+    def test_linear_regression_sgnhtc_cv(self):
+        rng_key, data_key = jax.random.split(self.key, 2)
+
+        data_size = 1000
+        X_data = jax.random.normal(data_key, shape=(data_size, 5))
+
+        centering_position = 1.0
+        grad_fn = blackjax.sgmcmc.grad_estimator(
+            self.logprior_fn, self.loglikelihood_fn, data_size
+        )
+        cv_grad_fn = blackjax.sgmcmc.gradients.control_variates(
+            grad_fn, centering_position, X_data
+        )
+
+        sgnht = blackjax.sgnht(cv_grad_fn)
+
+        _, rng_key = jax.random.split(rng_key)
+        init_position = 1.0
+        data_batch = X_data[:100, :]
+        init_state = sgnht.init(init_position, self.key)
+        _ = sgnht.step(rng_key, init_state, data_batch, 1e-3)
+
 
 class LatentGaussianTest(chex.TestCase):
     """Test sampling of a linear regression model."""