Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Shape parameters #58

Merged
merged 34 commits into from
Jul 27, 2023
Merged
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
cfd15c3
first implementation of shape parameters
hammannr Jul 17, 2023
73b47c5
add shifted er templates
hammannr Jul 17, 2023
51e33cc
fix shape parameter init indent
hammannr Jul 17, 2023
ec68e8f
small fixes that were ovrelooked
hammannr Jul 19, 2023
fd82c52
switch to dict formatted data
hammannr Jul 19, 2023
cfed681
fix kwarg filetering
hammannr Jul 19, 2023
2215cc1
fix nominal rate parameter treatment
hammannr Jul 20, 2023
c0a28a7
Trying to befriend the dog
hammannr Jul 25, 2023
d4257d4
iminuit requirement
hammannr Jul 25, 2023
4495021
first implementation of shape parameters
hammannr Jul 17, 2023
b850f78
add shifted er templates
hammannr Jul 17, 2023
1412c69
fix shape parameter init indent
hammannr Jul 17, 2023
865c739
small fixes that were ovrelooked
hammannr Jul 19, 2023
281ec07
switch to dict formatted data
hammannr Jul 19, 2023
def9195
fix kwarg filetering
hammannr Jul 19, 2023
8b3a457
fix nominal rate parameter treatment
hammannr Jul 20, 2023
a56bdf9
Trying to befriend the dog
hammannr Jul 25, 2023
d33cfa2
iminuit requirement
hammannr Jul 25, 2023
a2ee6b3
Merge branch 'shape_parameters' of github.com:XENONnT/alea into shape…
hammannr Jul 25, 2023
fa0e94f
comply with structured-array data format
hammannr Jul 25, 2023
7a05c1e
re-introduce examples folder
hammannr Jul 25, 2023
889b276
fix init and config
hammannr Jul 25, 2023
4a1183a
fix init issue
hammannr Jul 25, 2023
c2ad202
clean-up inits
hammannr Jul 25, 2023
b44f678
Move MinuitWrap outside
hammannr Jul 25, 2023
b07019b
undo changing folders
hammannr Jul 26, 2023
e337b09
forgot the templates..
hammannr Jul 26, 2023
806235d
Merge branch 'main' into shape_parameters
hammannr Jul 26, 2023
5a225dc
patch for files with placeholders
hammannr Jul 26, 2023
962e258
update folder
hammannr Jul 26, 2023
ba4096a
Find formatted filepath in get_file_path
dachengx Jul 26, 2023
ab7dbdd
Happier code style
dachengx Jul 26, 2023
abb842b
Happier code style
dachengx Jul 26, 2023
d074e6d
Some formatting and small things
hammannr Jul 27, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@ __pycache__
*.pdf
*.ipynb
!notebooks/*.ipynb
!alea/examples/*.ipynb
*.h5
*.hdf5
!templates/*.h5
!templates/*.hdf5
!alea/examples/templates/*.h5
!alea/examples/templates/*.hdf5
!alea/templates/*.h5
!alea/templates/*.hdf5
docs/build
build
debug.py
6 changes: 3 additions & 3 deletions alea/_likelihoods/ll_nt_from_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import numpy as np
import scipy.stats as sps
from alea._plotting import pdf_plotter
from alea.simulators import simulate_interpolated
from alea.simulators import BlueiceDataGenerator
from blueice.inference import bestfit_scipy, one_parameter_interval
from blueice.likelihood import LogLikelihoodSum
from inference_interface import (dict_to_structured_array,
Expand Down Expand Up @@ -272,13 +272,13 @@ def set_toy_reading_mode(self, toydata_mode, toydata_file):
self.toydata_mode = toydata_mode
self.toydata_file = toydata_file
if toydata_mode == "none":
self.rgs = [simulate_interpolated(ll, binned=b) for ll, b in zip(self.lls, self.binned)]
self.rgs = [BlueiceDataGenerator(ll, binned=b) for ll, b in zip(self.lls, self.binned)]
if hasattr(self, "datasets_array"):
del self.datasets_array
if hasattr(self, "toydata_index"):
del self.toydata_index
elif toydata_mode == "write":
self.rgs = [simulate_interpolated(ll, binned=b) for ll, b in zip(self.lls, self.binned)]
self.rgs = [BlueiceDataGenerator(ll, binned=b) for ll, b in zip(self.lls, self.binned)]
self.datasets_array = []
if hasattr(self, "toydata_index"):
del self.toydata_index
Expand Down
41 changes: 19 additions & 22 deletions alea/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,36 +237,18 @@ def fit(self, verbose=False, **kwargs) -> Tuple[dict, float]:

cost = self.make_objective(minus=True, **kwargs)

class MinuitWrap:
"""
Wrapper for functions to be called by Minuit
s_args must be a list of argument names of function f
the names in this list must be the same as the keys of
the dictionary passed to the Minuit call.
"""

def __init__(self, f, s_args):
self.func = f
self.s_args = s_args
self.func_code = make_func_code(s_args)

def __call__(self, *args):
return self.func(args)

# Make the Minuit object
cost.errordef = Minuit.LIKELIHOOD
m = Minuit(
MinuitWrap(cost, s_args=self.parameters.names),
**defaults)
m = Minuit(MinuitWrap(cost, parameters=self.parameters),
**defaults)
m.errordef = Minuit.LIKELIHOOD
fixed_params = [] if fixed_parameters is None else fixed_parameters
fixed_params += self.parameters.not_fittable
for par in fixed_params:
m.fixed[par] = True
for n, l in self.parameters.fit_limits.items():
m.limits[n] = l

# Call migrad to do the actual minimization
m.migrad()
self.minuit_object = m
if verbose:
print(m)
# alert! This gives the _maximum_ likelihood
Expand Down Expand Up @@ -393,3 +375,18 @@ def t(hypothesis):
dl = np.nan

return dl, ul


class MinuitWrap:
"""
Wrapper for functions to be called by Minuit.
Initialized with a function f and a Parameters instance.
"""

def __init__(self, f, parameters: Parameters):
self.func = f
self.s_args = parameters.names
self._parameters = {p.name: p.fit_limits for p in parameters}

def __call__(self, *args):
return self.func(args)
44 changes: 25 additions & 19 deletions alea/model_configs/unbinned_wimp_statistical_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,24 +45,26 @@ parameter_definition:
fit_guess: 1.0
description: Parameter to account for the uncertain signal expectation given a certain cross-section

# er_band_shift:
# nominal_value: 0
# ptype: shape
# uncertainty: scipy.stats.uniform(loc=-1, scale=2)
# relative_uncertainty: false
# fittable: true
# blueice_anchors:
# - -1
# - 0
# - 1
# fit_limits:
# - -1
# - 1
# description: ER band shape parameter (shifts the ER band up and down)
er_band_shift:
nominal_value: 0
ptype: shape
uncertainty: null # scipy.stats.uniform(loc=-1, scale=2)
# relative_uncertainty: false
fittable: true
blueice_anchors:
- -2
- -1
- 0
- 1
- 2
fit_limits:
- -2
- 2
description: ER band shape parameter (shifts the ER band up and down)

likelihood_config:
likelihood_weights: [1, 1, 1]
template_folder: []
template_folder: alea/templates
likelihood_terms:
# SR0
- name: sr0
Expand All @@ -79,8 +81,10 @@ likelihood_config:
histname: er_template # TODO: implement a default histname based on the source name
parameters:
- er_rate_multiplier
# - er_band_shift
template_filename: er_template.h5
- er_band_shift
named_parameters:
- er_band_shift
template_filename: er_template_{er_band_shift}.h5
histogram_scale_factor: 1

- name: wimp
Expand Down Expand Up @@ -108,8 +112,10 @@ likelihood_config:
histname: er_template
parameters:
- er_rate_multiplier
# - er_band_shift
template_filename: er_template.h5
- er_band_shift
named_parameters:
- er_band_shift
template_filename: er_template_{er_band_shift}.h5
histogram_scale_factor: 2

- name: wimp
Expand Down
1 change: 0 additions & 1 deletion alea/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
from .gaussian_model import *

from .blueice_extended_model import *
71 changes: 41 additions & 30 deletions alea/models/blueice_extended_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,21 +57,24 @@ def from_config(cls, config_file_path: str) -> "BlueiceExtendedModel":
return cls(**config)

@property
def data(self) -> list:
def data(self) -> dict:
"""Returns the data of the statistical model."""
return super().data

@data.setter
def data(self, data: list):
def data(self, data: dict):
"""
Overrides default setter. Will also set the data of the blueice ll.
Data-sets are expected to be in the form of a list of one
or more structured arrays-- representing the data-sets of one or more likelihood terms.
"""
# iterate through all likelihood terms and set the science data in the blueice ll
# last entry in data are the generate_values
for d, ll_term in zip(data[:-1], self._likelihood.likelihood_list):
ll_term.set_data(d)
for i, (dataset_name, d) in enumerate(data.items()):
if dataset_name != "generate_values":
ll_term = self._likelihood.likelihood_list[i]
assert dataset_name == ll_term.pdf_base_config["name"], "Likelihood names do not match."
ll_term.set_data(d)

self._data = data
self.is_data_set = True
Expand All @@ -84,17 +87,18 @@ def get_expectation_values(self, **kwargs) -> dict:
because it calls the ll and requires data to be set.
We should update this function in the future after we stop using blueice.
"""
generate_values = self.parameters(**kwargs) # kwarg or nominal value
ret = dict()

# calling ll need data to be set
self_copy = deepcopy(self)
self_copy.data = self_copy.generate_data()

# ancillary likelihood does not contribute
for ll_term in self_copy._likelihood.likelihood_list[:-1]:
ll_pars = list(ll_term.rate_parameters.keys()) + list(ll_term.shape_parameters.keys())
ll_pars += ["livetime_days"]
call_args = {k: i for k, i in kwargs.items() if k in ll_pars}
# TODO: Make a self.likelihood_temrs dict with the likelihood names as keys and the corresponding likelihood terms as values.
for ll_term, parameter_names in zip(self_copy._likelihood.likelihood_list[:-1],
self_copy._likelihood.likelihood_parameters): # ancillary likelihood does not contribute
call_args = {k: i for k, i in generate_values.items() if k in parameter_names} # WARNING: This silently drops parameters it can't handle!

mus = ll_term(full_output=True, **call_args)[1]
for n, mu in zip(ll_term.source_name_list, mus):
Expand All @@ -121,7 +125,10 @@ def _build_ll_from_config(self, likelihood_config: dict) -> "LogLikelihoodSum":
blueice_config["livetime_days"] = self.parameters[
blueice_config["livetime_parameter"]].nominal_value
for p in self.parameters:
blueice_config[p.name] = blueice_config.get(p.name, p.nominal_value)
# adding the nominal rate values will screw things up in blueice!
# So here we're just adding the nominal values of all other parameters
if p.ptype != "rate":
blueice_config[p.name] = blueice_config.get(p.name, p.nominal_value)

# add all parameters to extra_dont_hash for each source unless it is used:
for i, source in enumerate(config["sources"]):
Expand Down Expand Up @@ -153,25 +160,28 @@ def _build_ll_from_config(self, likelihood_config: dict) -> "LogLikelihoodSum":
"Only rate multipliers that end on _rate_multiplier"
" are currently supported.")

# Set shape parameters
shape_parameters = [
p for p in source["parameters"] if self.parameters[p].ptype == "shape"]
if shape_parameters:
# TODO: Implement setting shape parameters
raise NotImplementedError("Shape parameters are not yet supported.")

# Set efficiency parameters
if source.get("apply_efficiency", False):
self._set_efficiency(source, ll)

# Set shape parameters
shape_parameters = [
p for p in source["parameters"] if self.parameters[p].ptype == "shape"]
for p in shape_parameters:
anchors = self.parameters[p].blueice_anchors
if anchors is None:
raise ValueError(f"Shape parameter {p} does not have any anchors.")
ll.add_shape_parameter(p, anchors=anchors, log_prior=None)

ll.prepare()
lls.append(ll)

# Ancillary likelihood
ll = CustomAncillaryLikelihood(self.parameters.with_uncertainty)
lls.append(ll)

return LogLikelihoodSum(lls, likelihood_weights=likelihood_config["likelihood_weights"])
likelihood_weights = likelihood_config.get("likelihood_weights", None)
return LogLikelihoodSum(lls, likelihood_weights=likelihood_weights)

def _build_data_generators(self) -> list:
# last one is AncillaryLikelihood
Expand All @@ -182,20 +192,21 @@ def _build_data_generators(self) -> list:
def _ll(self, **generate_values) -> float:
return self._likelihood(**generate_values)

def _generate_data(self, **generate_values) -> list:
def _generate_data(self, **generate_values) -> dict:
# generate_values are already filtered and filled by the nominal values\
# through the generate_data method in the parent class
science_data = self._generate_science_data(**generate_values)
data = self._generate_science_data(**generate_values)
ancillary_keys = self.parameters.with_uncertainty.names
generate_values_anc = {k: v for k, v in generate_values.items() if k in ancillary_keys}
ancillary_measurements = self._generate_ancillary_measurements(
data["ancillary_likelihood"] = self._generate_ancillary_measurements(
**generate_values_anc)
return science_data + [ancillary_measurements] + [dict_to_structured_array(generate_values)]
data["generate_values"] = dict_to_structured_array(generate_values)
return data

def _generate_science_data(self, **generate_values) -> list:
science_data = [
gen.simulate(**generate_values) for gen in self.data_generators]
return science_data
def _generate_science_data(self, **generate_values) -> dict:
science_data = [gen.simulate(**generate_values)
for gen in self.data_generators]
return dict(zip(self.likelihood_names, science_data))

def _generate_ancillary_measurements(self, **generate_values) -> dict:
ancillary_measurements = {}
Expand Down Expand Up @@ -233,7 +244,7 @@ def _set_efficiency(self, source, ll):
raise ValueError(
f"Efficiency parameters including {efficiency_name:s}"
" must be constrained to be nonnegative")
if ~np.isfinite(limits[1]):
if ~np.isfinite(limits[1]):
raise ValueError(
f"Efficiency parameters including {efficiency_name:s}"
" must be constrained to be finite")
Expand Down Expand Up @@ -265,10 +276,10 @@ def __init__(self, parameters: Parameters):
parameter_list = self.parameters.names

self.constraint_functions = self._get_constraint_functions()
super().__init__(
func=self.ancillary_likelihood_sum,
parameter_list=parameter_list,
config=self.parameters.nominal_values)
super().__init__(func=self.ancillary_likelihood_sum,
parameter_list=parameter_list,
config=self.parameters.nominal_values)
self.pdf_base_config["name"] = "ancillary_likelihood"

@property
def constraint_terms(self) -> dict:
Expand Down
2 changes: 1 addition & 1 deletion alea/simulators.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def __init__(self, ll_term):
source_histograms.append(mh.Histdd(bins=bins))

self.ll = ll
logging.info("initing simulator, binned: " + str(binned))
logging.debug("initing simulator, binned: " + str(binned))
self.binned = binned
self.bincs = bincs
self.direction_names = direction_names
Expand Down
8 changes: 4 additions & 4 deletions alea/template_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def build_histogram(self):
slice_axis_limits = sa.get(
"slice_axis_limits", [bes[0], bes[-1]])
if sum_axis:
logging.info(f"Slice and sum over axis {slice_axis} from {slice_axis_limits[0]} to {slice_axis_limits[1]}")
logging.debug(f"Slice and sum over axis {slice_axis} from {slice_axis_limits[0]} to {slice_axis_limits[1]}")
axis_names = h.axis_names_without(slice_axis)
h = h.slicesum(
axis=slice_axis,
Expand All @@ -66,7 +66,7 @@ def build_histogram(self):
h.axis_names = axis_names
else:
logging.debug(f"Normalization before slicing: {h.n}.")
logging.info(f"Slice over axis {slice_axis} from {slice_axis_limits[0]} to {slice_axis_limits[1]}")
logging.debug(f"Slice over axis {slice_axis} from {slice_axis_limits[0]} to {slice_axis_limits[1]}")
h = h.slice(axis=slice_axis,
start=slice_axis_limits[0],
stop=slice_axis_limits[1])
Expand Down Expand Up @@ -125,8 +125,8 @@ def build_histogram(self):
# Shouldn't be in HistogramSource... anyway
self._n_events_histogram = h.similar_blank_histogram()

h *= self.config.get('histogram_multiplier', 1)
logging.debug(f"Multiplying histogram with histogram_multiplier {self.config.get('histogram_multiplier', 1)}. Histogram is now normalised to {h.n}.")
h *= self.config.get('histogram_scale_factor', 1)
logging.debug(f"Multiplying histogram with histogram_scale_factor {self.config.get('histogram_scale_factor', 1)}. Histogram is now normalised to {h.n}.")

# Convert h to density...
if self.config.get('in_events_per_bin'):
Expand Down
Binary file added alea/templates/er_template_-1.h5
Binary file not shown.
Binary file added alea/templates/er_template_-2.h5
Binary file not shown.
Binary file added alea/templates/er_template_0.h5
Binary file not shown.
Binary file added alea/templates/er_template_1.h5
Binary file not shown.
Binary file added alea/templates/er_template_2.h5
Binary file not shown.
Loading