XENONnT · dachengx · Nov 13, 2023 · Nov 10, 2023 · Nov 10, 2023 · Nov 10, 2023
diff --git a/alea/model.py b/alea/model.py
@@ -255,7 +255,7 @@ def get_expectation_values(self, **parameter_values):
             parameter_values: values of the parameters
 
         """
-        return NotImplementedError("get_expectation_values is optional to implement")
+        raise NotImplementedError("get_expectation_values is optional to implement")
 
     @property
     def nominal_expectation_values(self):

diff --git a/alea/models/blueice_extended_model.py b/alea/models/blueice_extended_model.py
@@ -2,6 +2,7 @@
 from typing import List, Dict, Callable, Optional, Union, cast
 from copy import deepcopy
 from pydoc import locate
+import itertools
 
 import numpy as np
 import scipy.stats as stats
@@ -138,11 +139,31 @@ def get_source_name_list(self, likelihood_name: str) -> list:
         ll_index = self.likelihood_names.index(likelihood_name)
         return self.likelihood_list[ll_index].source_name_list
 
+    def get_all_source_name(self) -> set:
+        """Return a set of possible source names from all likelihood terms.
+
+        Args:
+            likelihood_name (str): Name of the likelihood.
+
+        Returns:
+            set: set of source names.
+
+        """
+        source_name = set(
+            itertools.chain.from_iterable([ll.source_name_list for ll in self.likelihood_list[:-1]])
+        )
+        return source_name
+
     @property
     def likelihood_list(self) -> List:
         """Return a list of likelihood terms."""
         return self._likelihood.likelihood_list
 
+    @property
+    def likelihood_parameters(self) -> List:
+        """Return a list of likelihood parameters."""
+        return self._likelihood.likelihood_parameters
+
     def get_expectation_values(self, per_likelihood_term=False, **kwargs) -> dict:
         """Return total expectation values (summed over all likelihood terms with the same name)
         given a number of named parameters (kwargs)
@@ -179,9 +200,9 @@ def get_expectation_values(self, per_likelihood_term=False, **kwargs) -> dict:
 
         # ancillary likelihood does not contribute
         for ll_term, ll_name, parameter_names, livetime_parameter in zip(
-            self_copy._likelihood.likelihood_list[:-1],
+            self_copy.likelihood_list[:-1],
             self_copy.likelihood_names[:-1],
-            self_copy._likelihood.likelihood_parameters,
+            self_copy.likelihood_parameters,
             self_copy.livetime_parameter_names,
         ):
             ret[ll_name] = {}

diff --git a/alea/runner.py b/alea/runner.py
@@ -119,6 +119,7 @@ def __init__(
             statistical_model_args = {}
         # nominal_values is keyword argument
         self.nominal_values = nominal_values if nominal_values else {}
+        # initialize nominal_values only once
         statistical_model_args["nominal_values"] = self.nominal_values
         # likelihood_config is keyword argument, because not all statistical model needs it
         statistical_model_args["likelihood_config"] = likelihood_config
@@ -338,6 +339,13 @@ def write_output(self, results):
         metadata["poi"] = self.poi
         metadata["common_hypothesis"] = self.common_hypothesis
         metadata["generate_values"] = self.generate_values
+        metadata["nominal_values"] = self.nominal_values
+        try:
+            metadata["expectation_values"] = self.model.get_expectation_values(
+                **self.generate_values
+            )
+        except NotImplementedError:
+            metadata["expectation_values"] = {}
 
         array_metadatas = [{"hypotheses_values": ea} for ea in self._hypotheses_values]
         numpy_arrays_and_names = [(r, rn) for r, rn in zip(results, result_names)]

diff --git a/alea/submitters/local.py b/alea/submitters/local.py
@@ -5,10 +5,12 @@
 import subprocess
 import itertools
 import operator
+from glob import glob
 from functools import reduce
 from copy import deepcopy
 from typing import List, Dict, Any, Optional, Callable, cast
 
+import h5py
 import numpy as np
 from scipy.interpolate import interp1d, RegularGridInterpolator
 from inference_interface import toyfiles_to_numpy
@@ -156,19 +158,82 @@ def submit(
                 raise ValueError("confidence_levels" + message)
 
             # prepare the needed nominal_values and generate_values
-            nominal_values = runner_args["nominal_values"]
-            generate_values = runner_args["generate_values"]
+            nominal_values = deepcopy(runner_args["nominal_values"])
+            generate_values = deepcopy(runner_args["generate_values"])
             needed_kwargs = {
                 **nominal_values,
                 **generate_values,
             }
 
             # read the likelihood ratio
-            output_filename = runner_args["output_filename"]
-            # add a * to the output_filename to read all the files
+            output_filename = runner_args["output_filename"].format(**needed_kwargs)
+            # try to add a * to the output_filename to read all the files
             fpat_split = os.path.splitext(output_filename)
-            output_filename = fpat_split[0] + "*" + fpat_split[1]
-            results = toyfiles_to_numpy(output_filename.format(**needed_kwargs))
+            _output_filename = fpat_split[0] + "_*" + fpat_split[1]
+            if len(sorted(glob(_output_filename))) != 0:
+                output_filename = _output_filename
+            output_filename_list = sorted(glob(output_filename))
+            if len(output_filename_list) == 0:
+                raise ValueError(f"Can not find any output file {output_filename}!")
+
+            # read metadata including generate_values
+            metadata_list = []
+            for _output_filename in output_filename_list:
+                with h5py.File(_output_filename, "r", libver="latest", swmr=True) as ipt:
+                    metadata = dict(
+                        zip(
+                            ipt.attrs.keys(),
+                            [json.loads(ipt.attrs[key]) for key in ipt.attrs.keys()],
+                        )
+                    )
+                metadata.pop("date", None)
+                metadata_list.append(metadata)
+            if len(set([deterministic_hash(m) for m in metadata_list])) != 1:
+                raise ValueError(
+                    "The metadata are not the same for all "
+                    f"the {len(output_filename_list)} output!"
+                )
+            metadata = metadata_list[0]
+            if metadata["poi"] != self.poi:
+                raise ValueError(
+                    f"The poi in the metadata {metadata['poi']} is not "
+                    f"the same as the poi {self.poi}!"
+                )
+
+            # read poi and poi_expectation
+            needed_kwargs = {**metadata["generate_values"], **needed_kwargs}
+            poi_expectation = needed_kwargs.pop("poi_expectation", None)
+            poi_value = needed_kwargs.get(self.poi, None)
+            if poi_value is None:
+                raise ValueError("Can not find the poi value in the generate_values in metadata!")
+            # read expectation_values from metadata
+            expectation_values = metadata["expectation_values"]
+            # check if the poi_expectation is in expectation_values
+            source = self.poi.replace("_rate_multiplier", "")
+            if source not in expectation_values:
+                warnings.warn(
+                    f"poi {self.poi} does not corresponds to any source in the model!"
+                    " so can not calculate the poi_expectation!"
+                )
+            else:
+                _poi_expectation = expectation_values[source]
+                if poi_expectation is not None:
+                    # check if the poi_expectation is consistent
+                    if not np.isclose(poi_expectation, _poi_expectation):
+                        raise ValueError(
+                            f"The poi_expectation from model {poi_expectation} is not "
+                            f"the same as the poi_expectation from toymc {_poi_expectation}!"
+                        )
+                else:
+                    warnings.warn(
+                        "Can not find the poi_expectation in the generate_values "
+                        f"{generate_values}, so will not check if the poi_expectation "
+                        "are consistent!"
+                    )
+                    poi_expectation = _poi_expectation
+
+            # read the likelihood ratio
+            results = toyfiles_to_numpy(output_filename)
             llfree = results[free_name]["ll"]
             lltrue = results[true_name]["ll"]
             llrs = 2.0 * (llfree - lltrue)
@@ -179,25 +244,13 @@ def submit(
                     f"{(llrs < 0.0).sum() / len(llrs):.2f}, "
                     "the median if negative log likelihood ratios "
                     f"is {np.median(llrs[llrs < 0.0]):.2e}, "
-                    f"there might be a problem in your fitting.",
+                    f"there might be a problem in your fitting."
                 )
             if len(llrs) < 1000:
                 self.logging.warning(
-                    "The number of toys is less than 1000, the threshold might not be accurate!",
+                    "The number of toys is less than 1000, the threshold might not be accurate!"
                 )
 
-            # update poi according to poi_expectation
-            runner_args["statistical_model_args"].pop("limit_threshold", None)
-            runner = Runner(**runner_args)
-            expectation_values = runner.model.get_expectation_values(
-                **{**nominal_values, **generate_values}
-            )
-            # in some rare cases the poi is not a rate multiplier
-            # then the poi_expectation is not in the nominal_expectation_values
-            component = self.poi.replace("_rate_multiplier", "")
-            poi_expectation = expectation_values.get(component, None)
-            poi_value = generate_values.pop(self.poi)
-
             # make sure no poi and poi_expectation in the hashed_keys
             generate_values.pop(self.poi, None)
             generate_values.pop("poi_expectation", None)
@@ -226,7 +279,7 @@ def submit(
                         "threshold": [],
                         "poi_expectation": [],
                     }
-                    threshold[threshold_key] = threshold_value
+                    threshold[threshold_key] = deepcopy(threshold_value)
                 threshold[threshold_key][self.poi].append(poi_value)
                 threshold[threshold_key]["threshold"].append(q_llr)
                 threshold[threshold_key]["poi_expectation"].append(poi_expectation)