tskit-dev · hyanwong · Jan 21, 2025 · Dec 1, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,9 +5,13 @@
 **Features**
 
 - An `allow_unary` flag (``False by default``) has been added to all methods.
+
 - A `set_metadata` flag has been added so that node and mutation metadata can be
   omitted, saved (default), or overwritten even if this requires changing the schema.
 
+- An environment variable `TSDATE_ENABLE_NUMBA_CACHE` can be set to cache JIT
+  compiled code, speeding up loading time (useful when testing).
+
 **Documentation**
 
 - Various fixes in documentation, including documenting returned fits.

diff --git a/docs/installation.md b/docs/installation.md
@@ -35,3 +35,24 @@ or
 
 Alternatively, the {ref}`Python API <sec_python_api>` allows more fine-grained control
 of the inference process.
+
+(sec_installation_testing)=
+
+## Testing
+
+Unit tests can be run from a clone of the
+[Github repository](https://github.com/tskit-dev/tsdate) by running pytest
+at the top level of the repository
+
+    $python -m pytest
+
+_Tsdate_ makes extensive use of [numba](https://numba.pydata.org)'s
+"just in time" (jit) compilation to speed up time-consuming numerical functions.
+Because of the need to compile these functions, loading the tsdate package can take
+tens of seconds. To speed up loading time, you can set the environment variable
+
+    TSDATE_ENABLE_NUMBA_CACHE=1
+
+The compiled code is not cached by default as it can be problematic when
+e.g. running the same installation on different CPU types in a cluster,
+and can occassionally lead to unexpected crashes.
diff --git a/tests/exact_moments.py b/tests/exact_moments.py
@@ -11,7 +11,7 @@
 import numpy as np
 import scipy
 from scipy.special import betaln
-from scipy.special import gammaln
+from math import lgamma
 
 
 def moments(a_i, b_i, a_j, b_j, y_ij, mu_ij):
@@ -33,7 +33,7 @@ def moments(a_i, b_i, a_j, b_j, y_ij, mu_ij):
     s2 = s1 * (a + 1) * (b + 1) / (c + 1)
     d1 = s1 * exp(f1 - f0)
     d2 = s2 * exp(f2 - f0)
-    logl = f0 + betaln(y_ij + 1, a) + gammaln(b) - b * log(t)
+    logl = f0 + betaln(y_ij + 1, a) + lgamma(b) - b * log(t)
     mn_j = d1 / t
     sq_j = d2 / t**2
     va_j = sq_j - mn_j**2
@@ -56,7 +56,7 @@ def rootward_moments(t_j, a_i, b_i, y_ij, mu_ij):
     b = s + 1
     z = t_j * r
     if t_j == 0.0:
-        logl = gammaln(s) - s * log(r)
+        logl = lgamma(s) - s * log(r)
         mn_i = s / r
         va_i = s / r**2
         return logl, mn_i, va_i
@@ -65,7 +65,7 @@ def rootward_moments(t_j, a_i, b_i, y_ij, mu_ij):
     f2 = float(mpmath.log(mpmath.hyperu(a + 2, b + 2, z)))
     d0 = -a * exp(f1 - f0)
     d1 = -(a + 1) * exp(f2 - f1)
-    logl = f0 - b_i * t_j + (b - 1) * log(t_j) + gammaln(a)
+    logl = f0 - b_i * t_j + (b - 1) * log(t_j) + lgamma(a)
     mn_i = t_j * (1 - d0)
     va_i = t_j**2 * d0 * (d1 - d0)
     return logl, mn_i, va_i
@@ -112,7 +112,7 @@ def unphased_moments(a_i, b_i, a_j, b_j, y_ij, mu_ij):
     s2 = s1 * (a + 1) * (b + 1) / (c + 1)
     d1 = s1 * exp(f1 - f0)
     d2 = s2 * exp(f2 - f0)
-    logl = f0 + betaln(a_j, a_i) + gammaln(b) - b * log(t)
+    logl = f0 + betaln(a_j, a_i) + lgamma(b) - b * log(t)
     mn_j = d1 / t
     sq_j = d2 / t**2
     va_j = sq_j - mn_j**2
@@ -130,7 +130,7 @@ def twin_moments(a_i, b_i, y_ij, mu_ij):
     """
     s = a_i + y_ij
     r = b_i + 2 * mu_ij
-    logl = log(2) * y_ij + gammaln(s) - log(r) * s
+    logl = log(2) * y_ij + lgamma(s) - log(r) * s
     mn_i = s / r
     va_i = s / r**2
     return logl, mn_i, va_i
@@ -151,7 +151,7 @@ def sideways_moments(t_i, a_j, b_j, y_ij, mu_ij):
     f2 = float(mpmath.log(mpmath.hyperu(a + 2, b + 2, z)))
     d0 = -a * exp(f1 - f0)
     d1 = -(a + 1) * exp(f2 - f1)
-    logl = f0 - mu_ij * t_i + (b - 1) * log(t_i) + gammaln(a)
+    logl = f0 - mu_ij * t_i + (b - 1) * log(t_i) + lgamma(a)
     mn_j = -t_i * d0
     va_j = t_i**2 * d0 * (d1 - d0)
     return logl, mn_j, va_j

diff --git a/tests/test_hypergeo.py b/tests/test_hypergeo.py
@@ -24,6 +24,8 @@
 Test cases for numba-fied hypergeometric functions
 """
 
+from math import lgamma
+
 import mpmath
 import numdifftools as nd
 import numpy as np
@@ -38,8 +40,8 @@ class TestPolygamma:
     Test numba-fied gamma functions
     """
 
-    def test_gammaln(self, x):
-        assert np.isclose(hypergeo._gammaln(x), float(mpmath.re(mpmath.loggamma(x))))
+    def test_lgamma(self, x):
+        assert np.isclose(lgamma(x), float(mpmath.re(mpmath.loggamma(x))))
 
     def test_digamma(self, x):
         assert np.isclose(hypergeo._digamma(x), float(mpmath.psi(0, x)))
@@ -120,6 +122,7 @@ def _2f1_validate(a_i, b_i, a_j, b_j, y, mu, offset=1.0):
         val = mpmath.re(mpmath.hyp2f1(A, B, C, z, maxterms=1e7))
         return val / offset
 
+    @pytest.mark.skip(reason="_hyp2f1_unity now an inner function for numba")
     def test_2f1(self, pars):
         a_i, b_i, a_j, b_j, y, mu = pars
         A = a_j

diff --git a/tsdate/accelerate.py b/tsdate/accelerate.py
@@ -0,0 +1,27 @@
+import os
+from typing import Callable
+
+from numba import jit
+
+# By default we disable the numba cache. See e.g.
+# https://github.com/sgkit-dev/sgkit/blob/main/sgkit/accelerate.py
+_ENABLE_CACHE = os.environ.get("TSDATE_ENABLE_NUMBA_CACHE", "0")
+
+try:
+    CACHE_NUMBA = {"0": False, "1": True}[_ENABLE_CACHE]
+except KeyError as e:  # pragma: no cover
+    raise KeyError(
+        "Environment variable 'TSDATE_ENABLE_NUMBA_CACHE' must be '0' or '1'"
+    ) from e
+
+
+DEFAULT_NUMBA_ARGS = {
+    "nopython": True,
+    "cache": CACHE_NUMBA,
+}
+
+
+def numba_jit(*args, **kwargs) -> Callable:  # pragma: no cover
+    kwargs_ = DEFAULT_NUMBA_ARGS.copy()
+    kwargs_.update(kwargs)
+    return jit(*args, **kwargs_)