Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add first tests module and file indexing system #54

Merged
merged 24 commits into from
Jul 26, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
a259c9a
Add tests of blueice_extended_model
dachengx Jul 22, 2023
905b09f
Merge remote-tracking branch 'origin/main' into init_unittest
dachengx Jul 22, 2023
9fc8e43
Add docstrings
dachengx Jul 22, 2023
ef1d713
Add iminuit
dachengx Jul 22, 2023
7feabb4
Set data before get expectation values
dachengx Jul 22, 2023
863ef67
Merge remote-tracking branch 'origin/main' into init_unittest
dachengx Jul 25, 2023
33fe8ab
Change import path
dachengx Jul 25, 2023
ecd10ee
Merge remote-tracking branch 'origin/main' into init_unittest
dachengx Jul 25, 2023
59a0122
Add a fucntion get_file_path to get file path
dachengx Jul 25, 2023
ca81ac2
Make Parameters deepcopyable
dachengx Jul 25, 2023
82e858d
Happier code style
dachengx Jul 25, 2023
166033e
Move template_folder_list outside the loop
dachengx Jul 25, 2023
e769327
Raise error when initialize StatisticalModel directly
dachengx Jul 25, 2023
3c27ec7
Do not need to be too cautious because 166033e9a3157d40bc41f009d2e919…
dachengx Jul 25, 2023
d3a3723
Merge remote-tracking branch 'origin/main' into init_unittest
dachengx Jul 25, 2023
a1654dc
This is why we want to accelerate the unittest
dachengx Jul 25, 2023
37f00ff
Directly call ll of model
dachengx Jul 25, 2023
5273334
some more fitting tests
kdund Jul 25, 2023
264e663
rd drudgery
kdund Jul 25, 2023
59a3902
Merge pull request #59 from XENONnT/knuttest
dachengx Jul 25, 2023
258a15b
Use setUp instead of __init__ of TestCase
dachengx Jul 25, 2023
7da8d53
Remove url_base, add get_template_folder_list function
dachengx Jul 26, 2023
0250340
Add TODO comment on get_expectation_values
dachengx Jul 26, 2023
34d1c44
Minor change
dachengx Jul 26, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions alea/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@

from . import simulators
from .simulators import *

from . import template_source
from .template_source import *
17 changes: 16 additions & 1 deletion alea/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from scipy.optimize import brentq
from iminuit import Minuit
from iminuit.util import make_func_code
from blueice.likelihood import _needs_data
from inference_interface import toydata_to_file

from alea.parameters import Parameters
Expand Down Expand Up @@ -58,7 +59,18 @@ def __init__(
confidence_interval_kind: str = "central", # one of central, upper, lower
confidence_interval_threshold: Callable[[float], float] = None,
):
self._data = data
"""Initialize a statistical model"""
if type(self) == StatisticalModel:
raise RuntimeError(
"You cannot instantiate the StatisticalModel class directly, "
"you must use a subclass where the likelihood function and data generation "
"method are implemented")

# following https://github.com/JelleAalbers/blueice/blob/
# 7c10222a13227e78dc7224b1a7e56ff91e4a8043/blueice/likelihood.py#L97
self.is_data_set = False
if data is not None:
self.data = data
self._confidence_level = confidence_level
self._confidence_interval_kind = confidence_interval_kind
self.confidence_interval_threshold = confidence_interval_threshold
Expand Down Expand Up @@ -93,6 +105,7 @@ def _generate_data(self, **kwargs):
"You must write a data-generation method (_generate_data) for your statistical model"
" or use a subclass where it is written for you")

@_needs_data
def ll(self, **kwargs) -> float:
"""
Likelihod function, returns the loglikelihood for the given parameters.
Expand Down Expand Up @@ -143,6 +156,7 @@ def data(self, data):
representing the data-sets of one or more likelihood terms.
"""
self._data = data
self.is_data_set = True

def store_data(
self, file_name, data_list, data_name_list=None, metadata = None):
Expand Down Expand Up @@ -205,6 +219,7 @@ def cost(args):

return cost

@_needs_data
def fit(self, verbose=False, **kwargs) -> Tuple[dict, float]:
"""
Fit the model to the data by maximizing the likelihood
Expand Down
2 changes: 1 addition & 1 deletion alea/model_configs/unbinned_wimp_statistical_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ parameter_definition:

likelihood_config:
likelihood_weights: [1, 1, 1]
template_folder: alea/templates
template_folder: []
likelihood_terms:
# SR0
- name: sr0
Expand Down
45 changes: 27 additions & 18 deletions alea/models/blueice_extended_model.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
from pydoc import locate # to lookup likelihood class
from typing import List
from copy import deepcopy
from pydoc import locate

import yaml
import numpy as np
import scipy.stats as stats
from blueice.likelihood import LogAncillaryLikelihood, LogLikelihoodSum
from inference_interface import dict_to_structured_array
from inference_interface import dict_to_structured_array, structured_array_to_dict

from alea.model import StatisticalModel
from alea.parameters import Parameters
from alea.simulators import BlueiceDataGenerator
from alea.utils import adapt_likelihood_config_for_blueice
from alea.parameters import Parameters


class BlueiceExtendedModel(StatisticalModel):
Expand Down Expand Up @@ -73,22 +74,27 @@ def data(self, data: list):
ll_term.set_data(d)

self._data = data
self.is_data_set = True

def get_expectation_values(self, **kwargs) -> dict:
"""
Return total expectation values (summed over all likelihood terms with the same name)
given a number of named parameters (kwargs)
"""
ret = dict()
# ancillary likelihood does not contribute
for ll in self._likelihood.likelihood_list[:-1]:

ll_pars = list(ll.rate_parameters.keys()) + list(ll.shape_parameters.keys())
# calling ll need data to be set
self_copy = deepcopy(self)
self_copy.data = self_copy.generate_data()

# ancillary likelihood does not contribute
for ll_term in self_copy._likelihood.likelihood_list[:-1]:
ll_pars = list(ll_term.rate_parameters.keys()) + list(ll_term.shape_parameters.keys())
ll_pars += ["livetime_days"]
call_args = {k: i for k, i in kwargs.items() if k in ll_pars}

mus = ll(full_output=True, **call_args)[1]
for n, mu in zip(ll.source_name_list, mus):
mus = ll_term(full_output=True, **call_args)[1]
for n, mu in zip(ll_term.source_name_list, mus):
ret[n] = ret.get(n, 0) + mu
return ret

Expand All @@ -101,16 +107,19 @@ def _build_ll_from_config(self, likelihood_config: dict) -> "LogLikelihoodSum":
"""
lls = []

if "template_folder" not in likelihood_config:
likelihood_config["template_folder"] = []
if isinstance(likelihood_config["template_folder"], str):
template_folder_list = [likelihood_config["template_folder"]]
elif isinstance(likelihood_config["template_folder"], list):
template_folder_list = likelihood_config["template_folder"]
else:
raise ValueError(
"template_folder must be either a string or a list of strings.")

# Iterate through each likelihood term in the configuration
for config in likelihood_config["likelihood_terms"]:
likelihood_object = locate(config["likelihood_type"])
if isinstance(likelihood_config["template_folder"], str):
template_folder_list = [likelihood_config["template_folder"]]
elif isinstance(likelihood_config["template_folder"], list):
template_folder_list = likelihood_config["template_folder"]
else:
raise ValueError(
"template_folder must be either a string or a list of strings.")

blueice_config = adapt_likelihood_config_for_blueice(
config, template_folder_list)
Expand All @@ -134,7 +143,6 @@ def _build_ll_from_config(self, likelihood_config: dict) -> "LogLikelihoodSum":
ll = likelihood_object(blueice_config)

for source in config["sources"]:

# Set rate parameters
rate_parameters = [
p for p in source["parameters"] if self.parameters[p].ptype == "rate"]
Expand Down Expand Up @@ -283,10 +291,11 @@ def set_data(self, d: np.array):
d (np.array): Data of ancillary measurements, stored as numpy array
"""
# This results in shifted constraint terms.
if set(d.keys()) != set(self.parameters.names):
d_dict = structured_array_to_dict(d)
if set(d_dict.keys()) != set(self.parameters.names):
raise ValueError(
"The data dict must contain all parameters as keys in CustomAncillaryLikelihood.")
self.constraint_functions = self._get_constraint_functions(**d)
self.constraint_functions = self._get_constraint_functions(**d_dict)

def ancillary_likelihood_sum(self, evaluate_at: dict) -> float:
"""Return the sum of all constraint terms.
Expand Down
11 changes: 6 additions & 5 deletions alea/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,9 @@ def __call__(
values[name] = new_val if new_val is not None else param.nominal_value
if any(i is None for k, i in values.items()):
emptypars = ", ".join([k for k, i in values.items() if i is None])
raise AssertionError("All parameters must be set explicitly, or have a nominal value,"
" encountered for: " + emptypars)
raise AssertionError(
"All parameters must be set explicitly, or have a nominal value,"
" encountered for: " + emptypars)
return values

def __getattr__(self, name: str) -> Parameter:
Expand All @@ -291,9 +292,9 @@ def __getattr__(self, name: str) -> Parameter:
Raises:
AttributeError: If the attribute is not found.
"""
if name in self.parameters:
return self.parameters[name]
else:
try:
return super().__getattribute__('parameters')[name]
except KeyError:
raise AttributeError(f"Attribute '{name}' not found.")

def __getitem__(self, name: str) -> Parameter:
Expand Down
39 changes: 20 additions & 19 deletions alea/template_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class TemplateSource(blueice.HistogramPdfSource):
:param log10_bins: List of axis numbers.
If True, bin edges on this axis in the root file are log10() of the actual bin edges.
"""

def build_histogram(self):
format_dict = {
k: self.config[k]
Expand Down Expand Up @@ -71,7 +72,6 @@ def build_histogram(self):
stop=slice_axis_limits[1])
logging.debug(f"Normalization after slicing: {h.n}.")


if collapse_axis is not None:
if collapse_slices is None:
raise ValueError(
Expand All @@ -94,9 +94,8 @@ def build_histogram(self):
self.config['analysis_space']):
expected_bin_edges = np.array(expected_bin_edges)
seen_bin_edges = h.bin_edges[axis_i]
if len(
self.config['analysis_space']
) == 1: # If 1D, hist1d returns bin_edges straight, not as list
# If 1D, hist1d returns bin_edges straight, not as list
if len(self.config['analysis_space']) == 1:
seen_bin_edges = h.bin_edges
logging.debug("axis_i: " + str(axis_i))
logging.debug("expected_bin_edges: " + str(expected_bin_edges))
Expand Down Expand Up @@ -175,6 +174,7 @@ class CombinedSource(blueice.HistogramPdfSource):
Must be 1 shorter than histnames, templatenames
:param histogram_parameters: names of parameters that should be put in the hdf5/histogram names,
"""

def build_histogram(self):
weight_names = self.config.get("weight_names")
weights = [
Expand Down Expand Up @@ -355,27 +355,28 @@ def simulate(self, n_events):
return ret


def get_json_spectrum(fn):
"""
Translates bbf-style JSON files to spectra.
units are keV and /kev*day*kg
"""
contents = json.load(open(fn, "r"))
logging.debug(contents["description"])
esyst = contents["coordinate_system"][0][1]
ret = interp1d(
np.linspace(*esyst), contents["map"],
bounds_error=False, fill_value=0.)
return ret


class SpectrumTemplateSource(blueice.HistogramPdfSource):
"""
:param spectrum_name: name of bbf json-like spectrum _OR_ function that can be called
templatename #3D histogram (Etrue,S1,S2) to open
:param histname: histogram name
:param named_parameters: list of config settings to pass to .format on histname and filename
"""

@staticmethod
def _get_json_spectrum(fn):
"""
Translates bbf-style JSON files to spectra.
units are keV and /kev*day*kg
"""
contents = json.load(open(fn, "r"))
logging.debug(contents["description"])
esyst = contents["coordinate_system"][0][1]
ret = interp1d(
np.linspace(*esyst), contents["map"],
bounds_error=False, fill_value=0.)
return ret

def build_histogram(self):
logging.debug("building a hist")
format_dict = {
Expand All @@ -387,7 +388,7 @@ def build_histogram(self):

spectrum = self.config["spectrum"]
if type(spectrum) is str:
spectrum = get_json_spectrum(spectrum.format(**format_dict))
spectrum = self._get_json_spectrum(spectrum.format(**format_dict))

slice_args = self.config.get("slice_args", {})
if type(slice_args) is dict:
Expand Down
Loading