diff --git a/.gitignore b/.gitignore index 8579db61..0cbea2de 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,4 @@ development_scripts/*hdf __pycache__/ *ipynb !examples/*.ipynb +debug.py diff --git a/alea/blueice_extended_model.py b/alea/blueice_extended_model.py new file mode 100644 index 00000000..03bf7413 --- /dev/null +++ b/alea/blueice_extended_model.py @@ -0,0 +1,298 @@ +from pydoc import locate # to lookup likelihood class +from typing import List + +from alea.statistical_model import StatisticalModel +from alea.simulators import BlueiceDataGenerator +from alea.utils import adapt_likelihood_config_for_blueice +from alea.parameters import Parameters +import yaml +import numpy as np +import scipy.stats as stats +from blueice.likelihood import LogAncillaryLikelihood +from blueice.likelihood import LogLikelihoodSum + + +class BlueiceExtendedModel(StatisticalModel): + """ + A statistical model based on blueice likelihoods. + + This class extends the `StatisticalModel` class and provides methods + for generating data and computing likelihoods based on blueice. + + Args: + parameter_definition (dict): A dictionary defining the model parameters. + likelihood_config (dict): A dictionary defining the likelihood. + """ + + def __init__(self, parameter_definition: dict, likelihood_config: dict): + """Initializes the statistical model. + + Args: + parameter_definition (dict): A dictionary defining the model parameters. + likelihood_config (dict): A dictionary defining the likelihood. + """ + super().__init__(parameter_definition=parameter_definition) + self._likelihood = self._build_ll_from_config(likelihood_config) + self.likelihood_names = [t["name"] for t in likelihood_config["likelihood_terms"]] + self.likelihood_names.append("ancillary_likelihood") + self.data_generators = self._build_data_generators() + + # TODO analysis_space could be inferred from the data (assert that all sources have the same analysis space) + + @classmethod + def from_config(cls, config_file_path: str) -> "BlueiceExtendedModel": + """Initializes the statistical model from a yaml config file. + + Args: + config_file_path (str): Path to the yaml config file. + + Returns: + BlueiceExtendedModel: Statistical model. + """ + with open(config_file_path, "r") as f: + config = yaml.safe_load(f) + return cls(**config) + + @property + def data(self) -> list: + """Returns the data of the statistical model.""" + return super().data + + @data.setter + def data(self, data: list): + """ + Overrides default setter. Will also set the data of the blueice ll. + Data-sets are expected to be in the form of a list of one or more structured arrays-- representing the data-sets of one or more likelihood terms. + """ + # iterate through all likelihood terms and set the science data in the blueice ll + # last entry in data are the generate_values + for d, ll_term in zip(data[:-1], self._likelihood.likelihood_list): + ll_term.set_data(d) + + self._data = data + + def get_expectation_values(self, **kwargs) -> dict: + """ + Return total expectation values (summed over all likelihood terms with the same name) + given a number of named parameters (kwargs) + """ + ret = dict() + for ll in self._likelihood.likelihood_list[:-1]: # ancillary likelihood does not contribute + + ll_pars = list(ll.rate_parameters.keys()) + list(ll.shape_parameters.keys()) + ll_pars += ["livetime_days"] + call_args = {k: i for k, i in kwargs.items() if k in ll_pars} + + mus = ll(full_output=True, **call_args)[1] + for n, mu in zip(ll.source_name_list, mus): + ret[n] = ret.get(n, 0) + mu + return ret + + def _build_ll_from_config(self, likelihood_config: dict) -> "LogLikelihoodSum": + """ + Iterate through all likelihood terms and build blueice ll + + Args: + likelihood_config (dict): A dictionary defining the likelihood. + """ + lls = [] + + # Iterate through each likelihood term in the configuration + for config in likelihood_config["likelihood_terms"]: + likelihood_object = locate(config["likelihood_type"]) + if isinstance(likelihood_config["template_folder"], str): + template_folder_list = [likelihood_config["template_folder"]] + elif isinstance(likelihood_config["template_folder"], list): + template_folder_list = likelihood_config["template_folder"] + else: + raise ValueError( + "template_folder must be either a string or a list of strings.") + + blueice_config = adapt_likelihood_config_for_blueice( + config, template_folder_list) + blueice_config["livetime_days"] = self.parameters[ + blueice_config["livetime_parameter"]].nominal_value + for p in self.parameters: + blueice_config[p.name] = blueice_config.get(p.name, p.nominal_value) + + # add all parameters to extra_dont_hash for each source unless it is used: + for i, source in enumerate(config["sources"]): + parameters_to_ignore: List[str] = [p.name for p in self.parameters if (p.ptype == "shape") + & (p.name not in source["parameters"])] + # no efficiency affects PDF: + parameters_to_ignore += [p.name for p in self.parameters if (p.ptype == "efficiency")] + parameters_to_ignore += source.get("extra_dont_hash_settings", []) + + # ignore all shape parameters known to this model not named specifically in the source: + blueice_config["sources"][i]["extra_dont_hash_settings"] = parameters_to_ignore + + ll = likelihood_object(blueice_config) + + for source in config["sources"]: + + # Set rate parameters + rate_parameters = [ + p for p in source["parameters"] if self.parameters[p].ptype == "rate"] + if len(rate_parameters) != 1: + raise ValueError( + f"Source {source['name']} must have exactly one rate parameter.") + rate_parameter = rate_parameters[0] + if rate_parameter.endswith("_rate_multiplier"): + rate_parameter = rate_parameter.replace("_rate_multiplier", "") + ll.add_rate_parameter(rate_parameter, log_prior=None) + else: + raise NotImplementedError( + "Only rate multipliers that end on _rate_multiplier are currently supported.") + + # Set shape parameters + shape_parameters = [ + p for p in source["parameters"] if self.parameters[p].ptype == "shape"] + if shape_parameters: + # TODO: Implement setting shape parameters + raise NotImplementedError("Shape parameters are not yet supported.") + + # Set efficiency parameters + if source.get("apply_efficiency", False): + self._set_efficiency(source, ll) + + ll.prepare() + lls.append(ll) + + # Ancillary likelihood + ll = CustomAncillaryLikelihood(self.parameters.with_uncertainty) + lls.append(ll) + + return LogLikelihoodSum(lls, likelihood_weights=likelihood_config["likelihood_weights"]) + + def _build_data_generators(self) -> list: + # last one is AncillaryLikelihood + # IDEA: Also implement data generator for ancillary ll term. + return [BlueiceDataGenerator(ll_term) for ll_term in self._likelihood.likelihood_list[:-1]] + + def _ll(self, **generate_values) -> float: + return self._likelihood(**generate_values) + + def _generate_data(self, **generate_values) -> list: + # generate_values are already filtered and filled by the nominal values\ + # through the generate_data method in the parent class + science_data = self._generate_science_data(**generate_values) + ancillary_keys = self.parameters.with_uncertainty.names + generate_values_anc = {k: v for k, v in generate_values.items() if k in ancillary_keys} + ancillary_measurements = self._generate_ancillary_measurements( + **generate_values_anc) + return science_data + [ancillary_measurements] + [generate_values] + + def _generate_science_data(self, **generate_values) -> list: + science_data = [gen.simulate(**generate_values) + for gen in self.data_generators] + return science_data + + def _generate_ancillary_measurements(self, **generate_values) -> dict: + ancillary_measurements = {} + anc_ll = self._likelihood.likelihood_list[-1] + ancillary_generators = anc_ll._get_constraint_functions(**generate_values) + for name, gen in ancillary_generators.items(): + parameter_meas = gen.rvs() + # correct parameter_meas if out of bounds + param = self.parameters[name] + if not param.value_in_fit_limits(parameter_meas): + if param.fit_limits[0] is not None and parameter_meas < param.fit_limits[0]: + parameter_meas = param.fit_limits[0] + elif param.fit_limits[1] is not None and parameter_meas > param.fit_limits[1]: + parameter_meas = param.fit_limits[1] + ancillary_measurements[name] = parameter_meas + + return ancillary_measurements + + def _set_efficiency(self, source, ll): + assert "efficiency_name" in source, "Unspecified efficiency_name for source {:s}".format(source["name"]) + efficiency_name = source["efficiency_name"] + assert efficiency_name in source[ + "parameters"], "The efficiency_name for source {:s} is not in its parameter list".format(source["name"]) + efficiency_parameter = self.parameters[efficiency_name] + assert efficiency_parameter.ptype == "efficiency", "The parameter {:s} must" \ + " be an efficiency".format(efficiency_name) + limits = efficiency_parameter.fit_limits + assert 0 <= limits[0], 'Efficiency parameters including {:s} must be' \ + ' constrained to be nonnegative'.format(efficiency_name) + assert np.isfinite(limits[1]), 'Efficiency parameters including {:s} must be' \ + ' constrained to be finite'.format(efficiency_name) + ll.add_shape_parameter(efficiency_name, anchors=(limits[0], limits[1])) + + +class CustomAncillaryLikelihood(LogAncillaryLikelihood): + """ + Custom ancillary likelihood that can be used to add constraint terms + for parameters of the likelihood. + + Args: + parameters (Parameters): Parameters object containing the + parameters to be constrained. + """ + + def __init__(self, parameters: Parameters): + """Initialize the CustomAncillaryLikelihood. + + Args: + parameters (Parameters): Parameters object containing the + parameters to be constrained. + """ + self.parameters = parameters + # check that there are no None values in the uncertainties dict + assert set(self.parameters.uncertainties.keys()) == set(self.parameters.names) + parameter_list = self.parameters.names + + self.constraint_functions = self._get_constraint_functions() + super().__init__(func=self.ancillary_likelihood_sum, + parameter_list=parameter_list, + config=self.parameters.nominal_values) + + @property + def constraint_terms(self) -> dict: + """ + Dict of all constraint terms (logpdf of constraint functions) + of the ancillary likelihood. + """ + return {name: func.logpdf for name, func in self.constraint_functions.items()} + + def set_data(self, d: dict): + """ + Set the data of the ancillary likelihood (ancillary measurements). + + Args: + d (dict): Data in this case is a dict of ancillary measurements. + """ + # This results in shifted constraint terms. + assert set(d.keys()) == set(self.parameters.names) + self.constraint_functions = self._get_constraint_functions(**d) + + def ancillary_likelihood_sum(self, evaluate_at: dict) -> float: + """Return the sum of all constraint terms. + + Args: + evaluate_at (dict): Values of the ancillary measurements. + + Returns: + float: Sum of all constraint terms. + """ + evaluated_constraint_terms = [ + term(evaluate_at[name]) for name, term in self.constraint_terms.items() + ] + return np.sum(evaluated_constraint_terms) + + def _get_constraint_functions(self, **generate_values) -> dict: + central_values = self.parameters(**generate_values) + constraint_functions = {} + for name, uncertainty in self.parameters.uncertainties.items(): + param = self.parameters[name] + if param.relative_uncertainty: + uncertainty *= param.nominal_value + if isinstance(uncertainty, float): + func = stats.norm(central_values[name], + uncertainty) + else: + # TODO: Implement str-type uncertainties + NotImplementedError( + "Only float uncertainties are supported at the moment.") + constraint_functions[name] = func + return constraint_functions diff --git a/alea/examples/gaussian_model.py b/alea/examples/gaussian_model.py index 0c39de9d..9b755786 100644 --- a/alea/examples/gaussian_model.py +++ b/alea/examples/gaussian_model.py @@ -7,7 +7,8 @@ class GaussianModel(StatisticalModel): - def __init__(self, parameter_definition: Optional[dict or list] = None): + def __init__(self, parameter_definition: Optional[dict or list] = None, + **kwargs): """ Initialise a model of a gaussian measurement (hatmu), where the model has parameters mu and sigma @@ -15,8 +16,8 @@ def __init__(self, parameter_definition: Optional[dict or list] = None): sigma is fixed in this example. """ if parameter_definition is None: - parameter_definition = ['mu', 'sigma'] - super().__init__(parameter_definition=parameter_definition) + parameter_definition = ["mu", "sigma"] + super().__init__(parameter_definition=parameter_definition, **kwargs) def _ll(self, mu=None, sigma=None): hat_mu = self.data[0]['hat_mu'][0] diff --git a/alea/examples/unbinned_wimp_running.yaml b/alea/examples/unbinned_wimp_running.yaml new file mode 100644 index 00000000..254f73db --- /dev/null +++ b/alea/examples/unbinned_wimp_running.yaml @@ -0,0 +1,54 @@ +# Just a placeholder for now to help thinking about the structure. +statistical_model_config: unbinned_wimp_statistical_model.yaml + +poi: wimp_rate_multiplier + +computation: + discovery_power: + parameters_to_zip: {} + parameters_to_vary: + {poi_expectation: "np.linspace(0, 30, 10)", wimp_mass: [10, 50, 200] } + parameters_in_common: + { + hypotheses: ["true", "null", "free"], + output_filename: "toymc_power_wimp_mass_{wimp_mass:d}_poi_expectation_{poi_expectation:.2f}.hdf5", + n_mc: 5000, + n_batch: 40, + } + toydata_mode: "generate" + + threshold: + parameters_to_zip: {} + parameters_to_vary: { wimp_mass: [10, 50, 200] } + parameters_in_common: + { + hypotheses: ["true", "null", "free"], + output_filename: "toymc_power_wimp_mass_{wimp_mass:d}_poi_expectation_{poi_expectation:.2f}.hdf5", + n_mc: 5000, + n_batch: 40, + } + limit_threshold: "thresholds.hdf5" + toydata_mode: "generate" + parameters_as_wildcards: ["poi_expectation", "n_mc", "n_batch"] + + sensitivity: + parameters_to_zip: {} + parameters_to_vary: { poi_expectation: [0.], wimp_mass: [10, 50, 200] } + parameters_in_common: + { + hypotheses: ["true", "null", "free"], + output_filename: "toymc_power_wimp_mass_{wimp_mass:d}_poi_expectation_{poi_expectation:.2f}.hdf5", + n_mc: 5000, + n_batch: 40, + compute_confidence_interval: True, + limit_threshold: "thresholds.hdf5", + } + toydata_mode: "generate" + +midway_path: null +OSG_path: null +OSG_parameters: + request_memory: "8000Mb" + singularity_container: null + +outputfolder: null \ No newline at end of file diff --git a/alea/examples/unbinned_wimp_statistical_model.yaml b/alea/examples/unbinned_wimp_statistical_model.yaml new file mode 100644 index 00000000..58b38c2b --- /dev/null +++ b/alea/examples/unbinned_wimp_statistical_model.yaml @@ -0,0 +1,123 @@ +parameter_definition: + wimp_mass: + nominal_value: 50 + fittable: false + description: WIMP mass in GeV/c^2 + + livetime_sr0: + nominal_value: 0.2 + fittable: false + description: Livetime of SR0 in years + + livetime_sr1: + nominal_value: 1.0 + fittable: false + description: Livetime of SR1 in years + + wimp_rate_multiplier: + nominal_value: 1.0 + ptype: rate + fittable: true + fit_limits: + - 0 + - null + + er_rate_multiplier: + nominal_value: 1.0 + ptype: rate + uncertainty: 0.2 + relative_uncertainty: true + fittable: true + fit_limits: + - 0 + - null + fit_guess: 1.0 + + signal_efficiency: + nominal_value: 1.0 + ptype: efficiency + uncertainty: 0.1 + relative_uncertainty: true + fittable: true + fit_limits: + - 0 + - 10. + fit_guess: 1.0 + description: Parameter to account for the uncertain signal expectation given a certain cross-section + + # er_band_shift: + # nominal_value: 0 + # ptype: shape + # uncertainty: scipy.stats.uniform(loc=-1, scale=2) + # relative_uncertainty: false + # fittable: true + # blueice_anchors: + # - -1 + # - 0 + # - 1 + # fit_limits: + # - -1 + # - 1 + # description: ER band shape parameter (shifts the ER band up and down) + +likelihood_config: + likelihood_weights: [1, 1, 1] + template_folder: alea/examples/templates/wimp_templates + likelihood_terms: + # SR0 + - name: sr0 + default_source_class: alea.template_source.TemplateSource + likelihood_type: blueice.likelihood.UnbinnedLogLikelihood + analysis_space: + - "cs1": 'np.arange(0, 102, 2)' + - "cs2": 'np.geomspace(100, 100000, 51)' + in_events_per_bin: true + livetime_parameter: livetime_sr0 + slice_args: {} + sources: + - name: er + histname: er_template # TODO: implement a default histname based on the source name + parameters: + - er_rate_multiplier + # - er_band_shift + template_filename: er_template.h5 + histogram_scale_factor: 1 + + - name: wimp + histname: wimp_template + parameters: + - wimp_rate_multiplier + - wimp_mass + - signal_efficiency + template_filename: wimp50gev_template.h5 + apply_efficiency: True + efficiency_name: signal_efficiency + + # SR1 + - name: sr1 + default_source_class: alea.template_source.TemplateSource + likelihood_type: blueice.likelihood.UnbinnedLogLikelihood + analysis_space: + - "cs1": 'np.arange(0, 102, 2)' + - "cs2": 'np.geomspace(100, 100000, 51)' + in_events_per_bin: true + livetime_parameter: livetime_sr1 + slice_args: {} + sources: + - name: er + histname: er_template + parameters: + - er_rate_multiplier + # - er_band_shift + template_filename: er_template.h5 + histogram_scale_factor: 2 + + - name: wimp + histname: wimp_template + parameters: + - wimp_rate_multiplier + - wimp_mass + - signal_efficiency + template_filename: wimp50gev_template.h5 + apply_efficiency: True + efficiency_name: signal_efficiency \ No newline at end of file diff --git a/alea/parameters.py b/alea/parameters.py index ed59a40c..17448ffa 100644 --- a/alea/parameters.py +++ b/alea/parameters.py @@ -9,7 +9,7 @@ class Parameter: name (str): The name of the parameter. nominal_value (float, optional): The nominal value of the parameter. fittable (bool, optional): Indicates if the parameter is fittable or always fixed. - ptype (str, optional): The type of the parameter. + ptype (str, optional): The ptype of the parameter. uncertainty (float or str, optional): The uncertainty of the parameter. If a string, it can be evaluated as a numpy or scipy function to define non-gaussian constraints. @@ -39,7 +39,7 @@ def __init__( self.name = name self.nominal_value = nominal_value self.fittable = fittable - self.type = ptype + self.ptype = ptype self._uncertainty = uncertainty self.relative_uncertainty = relative_uncertainty self.blueice_anchors = blueice_anchors @@ -120,6 +120,10 @@ class Parameters: def __init__(self): self.parameters: Dict[str, Parameter] = {} + def __iter__(self) -> iter: + """Returns an iterator over the parameters. Each iteration returns a Parameter object.""" + return iter(self.parameters.values()) + @classmethod def from_config(cls, config: Dict[str, dict]): """ @@ -213,6 +217,25 @@ def not_fittable(self) -> List[str]: """ return [name for name, param in self.parameters.items() if not param.fittable] + @property + def uncertainties(self) -> dict: + """ + return a dict of name:uncertainty for all parameters with a not-NaN uncertainty. + """ + return {k: i.uncertainty for k, i in self.parameters.items() if i.uncertainty is not None} + + @property + def with_uncertainty(self) -> "Parameters": + """ + Return parameters with a not-NaN uncertainty. + The parameters are the same objects as in the original Parameters object, not a copy. + """ + param_dict = {k: i for k, i in self.parameters.items() if i.uncertainty is not None} + params = Parameters() + for param in param_dict.values(): + params.add_parameter(param) + return params + @property def nominal_values(self) -> dict: """ diff --git a/alea/simulators.py b/alea/simulators.py index b517921e..96a1b018 100644 --- a/alea/simulators.py +++ b/alea/simulators.py @@ -4,12 +4,29 @@ from itertools import product import scipy.stats as sps import logging -logging.basicConfig( level=logging.INFO) +import blueice +logging.basicConfig(level=logging.INFO) -class simulate_interpolated: - def __init__(self, ll_template, binned=False, mute=True): - ll = deepcopy(ll_template) +class BlueiceDataGenerator: + """ + A class for generating data from a blueice likelihood term. + + Args: + ll_term (blueice.likelihood.BinnedLogLikelihood + or blueice.likelihood.UnbinnedLogLikelihood): + A blueice likelihood term. + """ + + def __init__(self, ll_term): + if isinstance(ll_term, blueice.likelihood.BinnedLogLikelihood): + binned = True + elif isinstance(ll_term, blueice.likelihood.UnbinnedLogLikelihood): + binned = False + else: + raise NotImplementedError + + ll = deepcopy(ll_term) bins = [] # bin edges bincs = [] # bin centers of each component direction_names = [] @@ -20,7 +37,6 @@ def __init__(self, ll_template, binned=False, mute=True): bins.append(direction[1]) binc = 0.5 * (direction[1][1::] + direction[1][0:-1]) bincs.append(binc) - #binc_dict[direction[0]] = binc dtype.append((direction[0], float)) data_length *= len(direction[1]) - 1 data_lengths.append(len(direction[1]) - 1) diff --git a/alea/statistical_model.py b/alea/statistical_model.py index 624eed1a..9e201b63 100644 --- a/alea/statistical_model.py +++ b/alea/statistical_model.py @@ -32,16 +32,12 @@ class StatisticalModel: _generate_data optional to implement: - get_mus - get_likelihood_term_names + get_expectation_values Implemented here: store_data fit - get_confidence_interval - get_expectations get_parameter_list - print_config Other members: _data = None @@ -98,18 +94,35 @@ def _generate_data(self, **kwargs): " or use a subclass where it is written for you") def ll(self, **kwargs) -> float: - # CAUTION: - # This implementation won't allow you to call the likelihood by positional arguments. + """ + Likelihod function, returns the loglikelihood for the given parameters. + The parameters are passed as keyword arguments, positional arguments are not possible. + If a parameter is not given, the default value is used. + + Returns: + float: Likelihood value + """ parameters = self.parameters(**kwargs) return self._ll(**parameters) def generate_data(self, **kwargs): + """ + Generate data for the given parameters. + The parameters are passed as keyword arguments, positional arguments are not possible. + If a parameter is not given, the default value is used. + + Raises: + ValueError: If the parameters are not within the fit limits + + Returns: + Data + """ # CAUTION: # This implementation won't allow you to call generate_data by positional arguments. if not self.parameters.values_in_fit_limits(**kwargs): raise ValueError("Values are not within fit limits") - parameters = self.parameters(**kwargs) - return self._generate_data(**parameters) + generate_values = self.parameters(**kwargs) + return self._generate_data(**generate_values) @property def data(self): @@ -150,39 +163,33 @@ def store_data( kw = {'metadata': metadata} if metadata is not None else dict() toydata_to_file(file_name, data_list, data_name_list, **kw) - def get_expectations(self): - return NotImplementedError("get_expectation is optional to implement") + def get_expectation_values(self, **parameter_values): + return NotImplementedError("get_expectation_values is optional to implement") - def get_likelihood_term_names(self): + @property + def nominal_expectation_values(self): """ - It may be convenient to partition the likelihood in several terms, - you can implement this function to give them names (list of strings) + Nominal expectation values for the sources of the likelihood. + + For this to work, you must implement `get_expectation_values`. """ - raise NotImplementedError("get_likelihood_term_names is optional to implement") + return self.get_expectation_values() # no kwargs for nominal def get_likelihood_term_from_name(self, likelihood_name): """ Returns the index of a likelihood term if the likelihood has several names """ - try: - if hasattr(self, 'likelihood_names'): - likelihood_names = self.likelihood_names - else: - likelihood_names = self.get_likelihood_term_names() - return {n: i for i, n in enumerate(likelihood_names)}[likelihood_name] - except Exception as e: - print(e) - return None + if hasattr(self, "likelihood_names"): + likelihood_names = self.likelihood_names + return {n:i for i,n in enumerate(likelihood_names)}[likelihood_name] + else: + raise NotImplementedError("The attribute likelihood_names is not defined.") def get_parameter_list(self): """ Returns a set of all parameters that the generate_data and likelihood accepts """ - return self._parameter_list - - def print_config(self): - for k, i in self.config: - print(k, i) + return self.parameters.names def make_objective(self, minus=True, **kwargs): sign = -1 if minus else 1 @@ -251,7 +258,7 @@ def __call__(self, *args): return m.values.to_dict(), -1 * m.fval def _confidence_interval_checks( - self, parameter: str, + self, poi_name: str, parameter_interval_bounds: Tuple[float, float], confidence_level: float, confidence_interval_kind: str, @@ -266,9 +273,9 @@ def _confidence_interval_checks( mask = (confidence_level > 0) and (confidence_level < 1) assert mask, "the confidence level must lie between 0 and 1" - parameter_of_interest = self.parameters[parameter] + parameter_of_interest = self.parameters[poi_name] assert parameter_of_interest.fittable, "The parameter of interest must be fittable" - assert parameter not in kwargs, "you cannot set the parameter you're constraining" + assert poi_name not in kwargs, "you cannot set the parameter you're constraining" if parameter_interval_bounds is None: parameter_interval_bounds = parameter_of_interest.parameter_interval_bounds @@ -277,15 +284,19 @@ def _confidence_interval_checks( "You must set parameter_interval_bounds in the parameter config" " or when calling confidence_interval") - if parameter_of_interest.type == "rate": + if parameter_of_interest.ptype == "rate": try: - mu_parameter = self.get_expectations(**kwargs)[parameter_of_interest.name] + if parameter_of_interest.ptype == "rate" and poi_name.endswith("_rate_multiplier"): + source_name = poi_name.replace("_rate_multiplier", "") + else: + source_name = poi_name + mu_parameter = self.get_expectation_values(**kwargs)[source_name] parameter_interval_bounds = ( parameter_interval_bounds[0] / mu_parameter, parameter_interval_bounds[1] / mu_parameter) except NotImplementedError: warnings.warn( - "The statistical model does not have a get_expectations model implemented," + "The statistical model does not have a get_expectation_values model implemented," " confidence interval bounds will be set directly.") pass # no problem, continuing with bounds as set @@ -304,7 +315,7 @@ def _confidence_interval_checks( return confidence_interval_kind, confidence_interval_threshold, parameter_interval_bounds def confidence_interval( - self, parameter: str, + self, poi_name: str, parameter_interval_bounds: Tuple[float, float] = None, confidence_level: float = None, confidence_interval_kind: str = None, @@ -312,7 +323,7 @@ def confidence_interval( """ Uses self.fit to compute confidence intervals for a certain named parameter - parameter: string, name of fittable parameter of the model + poi_name: string, name of fittable parameter of the model parameter_interval_bounds: range in which to search for the confidence interval edges. May be specified as: - setting the property "parameter_interval_bounds" for the parameter @@ -324,7 +335,7 @@ def confidence_interval( """ ci_objects = self._confidence_interval_checks( - parameter, + poi_name, parameter_interval_bounds, confidence_level, confidence_interval_kind, @@ -333,7 +344,7 @@ def confidence_interval( # find best-fit: best_result, best_ll = self.fit(**kwargs) - best_parameter = best_result[parameter] + best_parameter = best_result[poi_name] mask = (parameter_interval_bounds[0] < best_parameter) mask &= (best_parameter < parameter_interval_bounds[1]) assert mask, ("the best-fit is outside your confidence interval" @@ -344,7 +355,7 @@ def confidence_interval( def t(hypothesis): # define the intersection # between the profile-log-likelihood curve and the rejection threshold - kwargs[parameter] = hypothesis + kwargs[poi_name] = hypothesis _, ll = self.fit(**kwargs) # ll is + log-likelihood here ret = 2. * (best_ll - ll) # likelihood curve "right way up" (smiling) # if positive, hypothesis is excluded diff --git a/alea/template_source.py b/alea/template_source.py index f6088699..aee2cef2 100644 --- a/alea/template_source.py +++ b/alea/template_source.py @@ -118,7 +118,7 @@ def build_histogram(self): try: np.testing.assert_almost_equal(seen_bin_edges, expected_bin_edges, - decimal=4) + decimal=2) except AssertionError: warnings.warn( "Axis %d of histogram %s in root file %s has bin edges %s, " diff --git a/alea/utils.py b/alea/utils.py index 7f8587e9..48e69aec 100644 --- a/alea/utils.py +++ b/alea/utils.py @@ -208,7 +208,7 @@ def detect_path(config_data): path = config_data["midway_path"] on_remote = True else: - path = "alea/data" + path = "alea" on_remote = False return path, on_remote @@ -263,6 +263,49 @@ def adapt_inference_object_config(config_data, wimp_mass, cache_dir=None): return inference_object_config +def adapt_likelihood_config_for_blueice(likelihood_config: dict, + template_folder_list: list) -> dict: + """ + Adapt likelihood config to be compatible with blueice. + + Args: + likelihood_config (dict): likelihood config dict + template_folder_list (list): list of possible base folders where + templates are located. If a folder starts with alea/, + the alea folder is used as base. + Ordered by priority. + + Returns: + dict: adapted likelihood config + """ + template_folder = None + for template_folder in template_folder_list: + # if template folder starts with alea: get location of alea + if template_folder.startswith("alea/"): + import alea + alea_dir = os.path.dirname(os.path.abspath(alea.__file__)) + template_folder = os.path.join(alea_dir, template_folder.replace("alea/", "")) + # check if template folder exists + if not os.path.isdir(template_folder): + template_folder = None + else: + break + # raise error if no template folder is found + if template_folder is None: + raise FileNotFoundError("No template folder found. Please provide a valid template folder.") + + likelihood_config["analysis_space"] = get_analysis_space( + likelihood_config["analysis_space"]) + + likelihood_config["default_source_class"] = locate( + likelihood_config["default_source_class"]) + + for source in likelihood_config["sources"]: + source["templatename"] = os.path.join(template_folder, + source["template_filename"]) + return likelihood_config + + class VariationConvenience(object): """This class serves as a convenience wrapper to carry out cartesian products. The same functionality can be achieved with plain alea but this might @@ -725,3 +768,31 @@ def _get_generate_args_from_toyfile(toydata_file): if not all_generate_args_equal: raise ValueError("generate_args in toyfile are not all the same") return structured_array_to_dict(generate_args_list[0]) + + +def get_analysis_space(analysis_space: dict) -> list: + eval_analysis_space = [] + + for element in analysis_space: + for key, value in element.items(): + if value.startswith("np."): + eval_element = (key, eval(value)) + else: + eval_element = (key, + np.fromstring(value, + dtype=float, + sep=" ")) + eval_analysis_space.append(eval_element) + return eval_analysis_space + +def find_file_resource(file: str, possible_locations: list) -> str: + """ + Function to look for a file in a list of folders (the file name may include folder structure too). + Return the complete path of the first location found, + and throw error if no file is found. + """ + for loc in possible_locations: + fname = Path(loc, file) + if fname.is_file(): + return str(fname) + raise FileNotFoundError("{:s} not found in any of {:s}".format(file, ",".join(possible_locations)))