XENONnT · dachengx · Jul 26, 2023 · Jul 22, 2023 · Jul 22, 2023 · Jul 22, 2023
diff --git a/alea/__init__.py b/alea/__init__.py
@@ -13,3 +13,6 @@
 
 from . import simulators
 from .simulators import *
+
+from . import template_source
+from .template_source import *
diff --git a/alea/model.py b/alea/model.py
@@ -7,6 +7,7 @@
 from scipy.optimize import brentq
 from iminuit import Minuit
 from iminuit.util import make_func_code
+from blueice.likelihood import _needs_data
 from inference_interface import toydata_to_file
 
 from alea.parameters import Parameters
@@ -58,7 +59,18 @@ def __init__(
             confidence_interval_kind: str = "central",  # one of central, upper, lower
             confidence_interval_threshold: Callable[[float], float] = None,
         ):
-        self._data = data
+        """Initialize a statistical model"""
+        if type(self) == StatisticalModel:
+            raise RuntimeError(
+                "You cannot instantiate the StatisticalModel class directly, "
+                "you must use a subclass where the likelihood function and data generation "
+                "method are implemented")
+
+        # following https://github.com/JelleAalbers/blueice/blob/
+        # 7c10222a13227e78dc7224b1a7e56ff91e4a8043/blueice/likelihood.py#L97
+        self.is_data_set = False
+        if data is not None:
+            self.data = data
         self._confidence_level = confidence_level
         self._confidence_interval_kind = confidence_interval_kind
         self.confidence_interval_threshold = confidence_interval_threshold
@@ -93,6 +105,7 @@ def _generate_data(self, **kwargs):
             "You must write a data-generation method (_generate_data) for your statistical model"
             " or use a subclass where it is written for you")
 
+    @_needs_data
     def ll(self, **kwargs) -> float:
         """
         Likelihod function, returns the loglikelihood for the given parameters.
@@ -143,6 +156,7 @@ def data(self, data):
         representing the data-sets of one or more likelihood terms.
         """
         self._data = data
+        self.is_data_set = True
 
     def store_data(
             self, file_name, data_list, data_name_list=None, metadata = None):
@@ -205,6 +219,7 @@ def cost(args):
 
         return cost
 
+    @_needs_data
     def fit(self, verbose=False, **kwargs) -> Tuple[dict, float]:
         """
         Fit the model to the data by maximizing the likelihood

diff --git a/alea/model_configs/unbinned_wimp_statistical_model.yaml b/alea/model_configs/unbinned_wimp_statistical_model.yaml
@@ -62,7 +62,7 @@ parameter_definition:
 
 likelihood_config:
   likelihood_weights: [1, 1, 1]
-  template_folder: alea/templates
+  template_folder: []
   likelihood_terms:
     # SR0
     - name: sr0

diff --git a/alea/models/blueice_extended_model.py b/alea/models/blueice_extended_model.py
@@ -1,16 +1,17 @@
-from pydoc import locate  # to lookup likelihood class
 from typing import List
+from copy import deepcopy
+from pydoc import locate
 
 import yaml
 import numpy as np
 import scipy.stats as stats
 from blueice.likelihood import LogAncillaryLikelihood, LogLikelihoodSum
-from inference_interface import dict_to_structured_array
+from inference_interface import dict_to_structured_array, structured_array_to_dict
 
 from alea.model import StatisticalModel
+from alea.parameters import Parameters
 from alea.simulators import BlueiceDataGenerator
 from alea.utils import adapt_likelihood_config_for_blueice
-from alea.parameters import Parameters
 
 
 class BlueiceExtendedModel(StatisticalModel):
@@ -73,22 +74,27 @@ def data(self, data: list):
             ll_term.set_data(d)
 
         self._data = data
+        self.is_data_set = True
 
     def get_expectation_values(self, **kwargs) -> dict:
         """
         Return total expectation values (summed over all likelihood terms with the same name)
         given a number of named parameters (kwargs)
         """
         ret = dict()
-        # ancillary likelihood does not contribute
-        for ll in self._likelihood.likelihood_list[:-1]:
 
-            ll_pars = list(ll.rate_parameters.keys()) + list(ll.shape_parameters.keys())
+        # calling ll need data to be set
+        self_copy = deepcopy(self)
+        self_copy.data = self_copy.generate_data()
+
+        # ancillary likelihood does not contribute
+        for ll_term in self_copy._likelihood.likelihood_list[:-1]:
+            ll_pars = list(ll_term.rate_parameters.keys()) + list(ll_term.shape_parameters.keys())
             ll_pars += ["livetime_days"]
             call_args = {k: i for k, i in kwargs.items() if k in ll_pars}
 
-            mus = ll(full_output=True, **call_args)[1]
-            for n, mu in zip(ll.source_name_list, mus):
+            mus = ll_term(full_output=True, **call_args)[1]
+            for n, mu in zip(ll_term.source_name_list, mus):
                 ret[n] = ret.get(n, 0) + mu
         return ret
 
@@ -101,16 +107,19 @@ def _build_ll_from_config(self, likelihood_config: dict) -> "LogLikelihoodSum":
         """
         lls = []
 
+        if "template_folder" not in likelihood_config:
+            likelihood_config["template_folder"] = []
+        if isinstance(likelihood_config["template_folder"], str):
+            template_folder_list = [likelihood_config["template_folder"]]
+        elif isinstance(likelihood_config["template_folder"], list):
+            template_folder_list = likelihood_config["template_folder"]
+        else:
+            raise ValueError(
+                "template_folder must be either a string or a list of strings.")
+
         # Iterate through each likelihood term in the configuration
         for config in likelihood_config["likelihood_terms"]:
             likelihood_object = locate(config["likelihood_type"])
-            if isinstance(likelihood_config["template_folder"], str):
-                template_folder_list = [likelihood_config["template_folder"]]
-            elif isinstance(likelihood_config["template_folder"], list):
-                template_folder_list = likelihood_config["template_folder"]
-            else:
-                raise ValueError(
-                    "template_folder must be either a string or a list of strings.")
 
             blueice_config = adapt_likelihood_config_for_blueice(
                 config, template_folder_list)
@@ -134,7 +143,6 @@ def _build_ll_from_config(self, likelihood_config: dict) -> "LogLikelihoodSum":
             ll = likelihood_object(blueice_config)
 
             for source in config["sources"]:
-
                 # Set rate parameters
                 rate_parameters = [
                     p for p in source["parameters"] if self.parameters[p].ptype == "rate"]
@@ -283,10 +291,11 @@ def set_data(self, d: np.array):
             d (np.array): Data of ancillary measurements, stored as numpy array
         """
         # This results in shifted constraint terms.
-        if set(d.keys()) != set(self.parameters.names):
+        d_dict = structured_array_to_dict(d)
+        if set(d_dict.keys()) != set(self.parameters.names):
             raise ValueError(
                 "The data dict must contain all parameters as keys in CustomAncillaryLikelihood.")
-        self.constraint_functions = self._get_constraint_functions(**d)
+        self.constraint_functions = self._get_constraint_functions(**d_dict)
 
     def ancillary_likelihood_sum(self, evaluate_at: dict) -> float:
         """Return the sum of all constraint terms.

diff --git a/alea/parameters.py b/alea/parameters.py
@@ -274,8 +274,9 @@ def __call__(
                 values[name] = new_val if new_val is not None else param.nominal_value
         if any(i is None for k, i in values.items()):
             emptypars = ", ".join([k for k, i in values.items() if i is None])
-            raise AssertionError("All parameters must be set explicitly, or have a nominal value,"
-                                 " encountered for: " + emptypars)
+            raise AssertionError(
+                "All parameters must be set explicitly, or have a nominal value,"
+                " encountered for: " + emptypars)
         return values
 
     def __getattr__(self, name: str) -> Parameter:
@@ -291,9 +292,9 @@ def __getattr__(self, name: str) -> Parameter:
         Raises:
             AttributeError: If the attribute is not found.
         """
-        if name in self.parameters:
-            return self.parameters[name]
-        else:
+        try:
+            return super().__getattribute__('parameters')[name]
+        except KeyError:
             raise AttributeError(f"Attribute '{name}' not found.")
 
     def __getitem__(self, name: str) -> Parameter:

diff --git a/alea/template_source.py b/alea/template_source.py
@@ -24,6 +24,7 @@ class TemplateSource(blueice.HistogramPdfSource):
     :param log10_bins: List of axis numbers.
     If True, bin edges on this axis in the root file are log10() of the actual bin edges.
     """
+
     def build_histogram(self):
         format_dict = {
             k: self.config[k]
@@ -71,7 +72,6 @@ def build_histogram(self):
                                 stop=slice_axis_limits[1])
                     logging.debug(f"Normalization after slicing: {h.n}.")
 
-
             if collapse_axis is not None:
                 if collapse_slices is None:
                     raise ValueError(
@@ -94,9 +94,8 @@ def build_histogram(self):
                     self.config['analysis_space']):
                 expected_bin_edges = np.array(expected_bin_edges)
                 seen_bin_edges = h.bin_edges[axis_i]
-                if len(
-                        self.config['analysis_space']
-                ) == 1:  # If 1D, hist1d returns bin_edges straight, not as list
+                # If 1D, hist1d returns bin_edges straight, not as list
+                if len(self.config['analysis_space']) == 1:
                     seen_bin_edges = h.bin_edges
                 logging.debug("axis_i: " + str(axis_i))
                 logging.debug("expected_bin_edges: " + str(expected_bin_edges))
@@ -175,6 +174,7 @@ class CombinedSource(blueice.HistogramPdfSource):
     Must be 1 shorter than histnames, templatenames
     :param histogram_parameters: names of parameters that should be put in the hdf5/histogram names,
     """
+
     def build_histogram(self):
         weight_names = self.config.get("weight_names")
         weights = [
@@ -355,27 +355,28 @@ def simulate(self, n_events):
         return ret
 
 
-def get_json_spectrum(fn):
-    """
-    Translates bbf-style JSON files to spectra.
-    units are keV and /kev*day*kg
-    """
-    contents = json.load(open(fn, "r"))
-    logging.debug(contents["description"])
-    esyst = contents["coordinate_system"][0][1]
-    ret = interp1d(
-        np.linspace(*esyst), contents["map"],
-        bounds_error=False, fill_value=0.)
-    return ret
-
-
 class SpectrumTemplateSource(blueice.HistogramPdfSource):
     """
     :param spectrum_name: name of bbf json-like spectrum _OR_ function that can be called
     templatename #3D histogram (Etrue,S1,S2) to open
     :param histname: histogram name
     :param named_parameters: list of config settings to pass to .format on histname and filename
     """
+
+    @staticmethod
+    def _get_json_spectrum(fn):
+        """
+        Translates bbf-style JSON files to spectra.
+        units are keV and /kev*day*kg
+        """
+        contents = json.load(open(fn, "r"))
+        logging.debug(contents["description"])
+        esyst = contents["coordinate_system"][0][1]
+        ret = interp1d(
+            np.linspace(*esyst), contents["map"],
+            bounds_error=False, fill_value=0.)
+        return ret
+
     def build_histogram(self):
         logging.debug("building a hist")
         format_dict = {
@@ -387,7 +388,7 @@ def build_histogram(self):
 
         spectrum = self.config["spectrum"]
         if type(spectrum) is str:
-            spectrum = get_json_spectrum(spectrum.format(**format_dict))
+            spectrum = self._get_json_spectrum(spectrum.format(**format_dict))
 
         slice_args = self.config.get("slice_args", {})
         if type(slice_args) is dict: