From 9be89b225685d348c0ea28175e09a4f3c0d6a1e9 Mon Sep 17 00:00:00 2001 From: dachengx Date: Thu, 27 Jul 2023 21:50:21 +0800 Subject: [PATCH 1/4] Recover examples folder, update file indexing, add notebooks folder --- alea/__init__.py | 2 ++ alea/examples/__init__.py | 1 + .../configs}/unbinned_wimp_running.yaml | 0 .../unbinned_wimp_statistical_model.yaml | 0 alea/{models => examples}/gaussian_model.py | 0 alea/{ => examples}/templates/er_template.h5 | Bin .../templates/er_template_-1.h5 | Bin .../templates/er_template_-2.h5 | Bin .../{ => examples}/templates/er_template_0.h5 | Bin .../{ => examples}/templates/er_template_1.h5 | Bin .../{ => examples}/templates/er_template_2.h5 | Bin .../templates/wimp50gev_template.h5 | Bin alea/models/__init__.py | 1 - alea/utils.py | 2 +- notebooks/placeholder.ipynb | 33 ++++++++++++++++++ tests/test_gaussian_model.py | 2 +- 16 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 alea/examples/__init__.py rename alea/{runner_configs => examples/configs}/unbinned_wimp_running.yaml (100%) rename alea/{model_configs => examples/configs}/unbinned_wimp_statistical_model.yaml (100%) rename alea/{models => examples}/gaussian_model.py (100%) rename alea/{ => examples}/templates/er_template.h5 (100%) rename alea/{ => examples}/templates/er_template_-1.h5 (100%) rename alea/{ => examples}/templates/er_template_-2.h5 (100%) rename alea/{ => examples}/templates/er_template_0.h5 (100%) rename alea/{ => examples}/templates/er_template_1.h5 (100%) rename alea/{ => examples}/templates/er_template_2.h5 (100%) rename alea/{ => examples}/templates/wimp50gev_template.h5 (100%) create mode 100644 notebooks/placeholder.ipynb diff --git a/alea/__init__.py b/alea/__init__.py index db25e2bf..9ee77721 100644 --- a/alea/__init__.py +++ b/alea/__init__.py @@ -11,3 +11,5 @@ from .simulators import * from .template_source import * + +from .examples import * diff --git a/alea/examples/__init__.py b/alea/examples/__init__.py new file mode 100644 index 00000000..807b2117 --- /dev/null +++ b/alea/examples/__init__.py @@ -0,0 +1 @@ +from .gaussian_model import * diff --git a/alea/runner_configs/unbinned_wimp_running.yaml b/alea/examples/configs/unbinned_wimp_running.yaml similarity index 100% rename from alea/runner_configs/unbinned_wimp_running.yaml rename to alea/examples/configs/unbinned_wimp_running.yaml diff --git a/alea/model_configs/unbinned_wimp_statistical_model.yaml b/alea/examples/configs/unbinned_wimp_statistical_model.yaml similarity index 100% rename from alea/model_configs/unbinned_wimp_statistical_model.yaml rename to alea/examples/configs/unbinned_wimp_statistical_model.yaml diff --git a/alea/models/gaussian_model.py b/alea/examples/gaussian_model.py similarity index 100% rename from alea/models/gaussian_model.py rename to alea/examples/gaussian_model.py diff --git a/alea/templates/er_template.h5 b/alea/examples/templates/er_template.h5 similarity index 100% rename from alea/templates/er_template.h5 rename to alea/examples/templates/er_template.h5 diff --git a/alea/templates/er_template_-1.h5 b/alea/examples/templates/er_template_-1.h5 similarity index 100% rename from alea/templates/er_template_-1.h5 rename to alea/examples/templates/er_template_-1.h5 diff --git a/alea/templates/er_template_-2.h5 b/alea/examples/templates/er_template_-2.h5 similarity index 100% rename from alea/templates/er_template_-2.h5 rename to alea/examples/templates/er_template_-2.h5 diff --git a/alea/templates/er_template_0.h5 b/alea/examples/templates/er_template_0.h5 similarity index 100% rename from alea/templates/er_template_0.h5 rename to alea/examples/templates/er_template_0.h5 diff --git a/alea/templates/er_template_1.h5 b/alea/examples/templates/er_template_1.h5 similarity index 100% rename from alea/templates/er_template_1.h5 rename to alea/examples/templates/er_template_1.h5 diff --git a/alea/templates/er_template_2.h5 b/alea/examples/templates/er_template_2.h5 similarity index 100% rename from alea/templates/er_template_2.h5 rename to alea/examples/templates/er_template_2.h5 diff --git a/alea/templates/wimp50gev_template.h5 b/alea/examples/templates/wimp50gev_template.h5 similarity index 100% rename from alea/templates/wimp50gev_template.h5 rename to alea/examples/templates/wimp50gev_template.h5 diff --git a/alea/models/__init__.py b/alea/models/__init__.py index 9ddf65fa..7ad49249 100644 --- a/alea/models/__init__.py +++ b/alea/models/__init__.py @@ -1,2 +1 @@ -from .gaussian_model import * from .blueice_extended_model import * diff --git a/alea/utils.py b/alea/utils.py index f3744b38..3d24fe6b 100644 --- a/alea/utils.py +++ b/alea/utils.py @@ -72,7 +72,7 @@ def load_yaml(file_name: str): def _get_abspath(file_name): """Get the abspath of the file. Raise FileNotFoundError when not found in any subfolder""" - for sub_dir in ('model_configs', 'runner_configs', 'templates'): + for sub_dir in ('examples/configs', 'examples/templates'): p = os.path.join(_package_path(sub_dir), file_name) if glob(formatted_to_asterisked(p)): return p diff --git a/notebooks/placeholder.ipynb b/notebooks/placeholder.ipynb new file mode 100644 index 00000000..7d04ed15 --- /dev/null +++ b/notebooks/placeholder.ipynb @@ -0,0 +1,33 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "76edc00b-f220-47e2-914b-0781c288594d", + "metadata": {}, + "source": [ + "Please delete this when new meaningful notebooks are added." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/test_gaussian_model.py b/tests/test_gaussian_model.py index bc636233..ea21309e 100644 --- a/tests/test_gaussian_model.py +++ b/tests/test_gaussian_model.py @@ -4,7 +4,7 @@ import scipy.stats as sps import inference_interface -from alea.models import GaussianModel +from alea.examples import GaussianModel class TestGaussianModel(TestCase): From efda5585ecd9b850747f5c19af2fbdc5bb374c46 Mon Sep 17 00:00:00 2001 From: dachengx Date: Thu, 27 Jul 2023 09:01:15 -0500 Subject: [PATCH 2/4] Correspondingly update setup.py --- setup.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 9273dc2e..3fd339c9 100644 --- a/setup.py +++ b/setup.py @@ -29,9 +29,7 @@ def open_requirements(path): packages=setuptools.find_packages(), package_data={ 'alea': [ - 'model_configs/*', - 'runner_configs/*', - 'templates/*', + 'examples/*', ], }, url="https://github.com/XENONnT/alea", From fa1e0b2b1c6a9a11533ea6f817686c3b87183e5d Mon Sep 17 00:00:00 2001 From: dachengx Date: Thu, 27 Jul 2023 09:04:11 -0500 Subject: [PATCH 3/4] Change to the correct package_data --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3fd339c9..ac7f54b5 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,8 @@ def open_requirements(path): packages=setuptools.find_packages(), package_data={ 'alea': [ - 'examples/*', + 'examples/configs/*', + 'examples/templates/*', ], }, url="https://github.com/XENONnT/alea", From 3d1e07a1b7ee87d932e4d26c1a3a541063f1597d Mon Sep 17 00:00:00 2001 From: dachengx Date: Thu, 27 Jul 2023 09:58:06 -0500 Subject: [PATCH 4/4] Remove legacies --- alea/_likelihoods/__init__.py | 1 - alea/_likelihoods/ll_nt_from_config.py | 799 --------------- alea/_plotting.py | 689 ------------- alea/_scripts/get_data_from_OSG.py | 173 ---- alea/_scripts/run_toymc.py | 5 - alea/_scripts/runpy.sbatch | 18 - alea/_scripts/submission_script.py | 5 - alea/_scripts/unpack_data.sh | 13 - alea/_submission.py | 538 ---------- alea/_toymc_running.py | 1276 ------------------------ alea/_utils.py | 748 -------------- 11 files changed, 4265 deletions(-) delete mode 100644 alea/_likelihoods/__init__.py delete mode 100644 alea/_likelihoods/ll_nt_from_config.py delete mode 100644 alea/_plotting.py delete mode 100644 alea/_scripts/get_data_from_OSG.py delete mode 100755 alea/_scripts/run_toymc.py delete mode 100755 alea/_scripts/runpy.sbatch delete mode 100644 alea/_scripts/submission_script.py delete mode 100644 alea/_scripts/unpack_data.sh delete mode 100644 alea/_submission.py delete mode 100644 alea/_toymc_running.py delete mode 100644 alea/_utils.py diff --git a/alea/_likelihoods/__init__.py b/alea/_likelihoods/__init__.py deleted file mode 100644 index 8b137891..00000000 --- a/alea/_likelihoods/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/alea/_likelihoods/ll_nt_from_config.py b/alea/_likelihoods/ll_nt_from_config.py deleted file mode 100644 index 7a2cb96a..00000000 --- a/alea/_likelihoods/ll_nt_from_config.py +++ /dev/null @@ -1,799 +0,0 @@ -import re -import warnings -import os -from copy import deepcopy -from pydoc import locate # to lookup inferenceObject class - -import alea.utils as utils -import h5py -import numpy as np -import scipy.stats as sps -from alea._plotting import pdf_plotter -from alea.simulators import BlueiceDataGenerator -from blueice.inference import bestfit_scipy, one_parameter_interval -from blueice.likelihood import LogLikelihoodSum -from inference_interface import (dict_to_structured_array, - structured_array_to_dict, toydata_from_file, - toydata_to_file) -from scipy.interpolate import interp1d - -import logging -logging.basicConfig(level=logging.INFO, force=True) - -minimize_kwargs = {'method': 'Powell', 'options': {'maxiter': 10000000}} - - -class InferenceObject: - """ - Args: - wimp_mass (int): WIMP mass in GeV. - Overwrites wimp_mass in every likelihood term. - livetime (float): Livetime of the data. Unit depends on templates. - config_path (str): path to yaml-configfile which define the likelihood - generate_args (dict): dictionary which is used to created toyMC data. - confidence_level (float, optional): Confidence Level. Defaults to 0.9. - toydata_file: path to file containing toydata, - defaults to None - toydata_mode: "none", "write" or "read". - - if "write": simulate interpolated ll and write toydata to 'toydata_file' - - if "read": read toydata from 'toydata_file' - - if "none": simulate interpolated ll - - if "pass": not generateing toys - Defaults to "write" - limit_threshold (str, optional): path to file from which the thresholds are read. - If None use asymptotics. - defaults to None - confidence_level (float, optional): confidence level for which sensitivity is computed. - defaults to 0.9 - inference_object_config_overrides (dict, optional): Dict of values that are - overridden in the inference_object_config dict provided by config file - defaults to {} - """ - def __init__(self, - wimp_mass, - livetime, - config_path, - threshold_key=None, - generate_args=None, - toydata_file=None, - toydata_mode="write", - limit_threshold=None, - confidence_level=0.9, - inference_object_config_overrides=None, - cache_dir=None, - **kwargs): - self.wimp_mass = wimp_mass - if isinstance(livetime, float): - self.livetime = [livetime] - else: - self.livetime = livetime - - if inference_object_config_overrides is None: - inference_object_config_overrides = {} - - self.config_path = config_path - self.default_multiplier = None - if generate_args is None: - self.generate_args = {} - elif generate_args == "default": - print("Setting default_multiplier to 1") - self.default_multiplier = 1 - self.generate_args = {} - else: - self.generate_args = generate_args - - if toydata_mode == "read": - generate_args_from_toydata = utils._get_generate_args_from_toyfile(toydata_file) - if not ((self.generate_args == generate_args_from_toydata) or (self.generate_args == {})): - warnings.warn("The generate_args you provided are not the same as the ones in the toydata file." - + " Using the parsed generate_args.", stacklevel=2) - else: - self.generate_args = generate_args_from_toydata - - self.inference_object_config_overrides = inference_object_config_overrides - self.limit_threshold = limit_threshold - self.confidence_level = confidence_level - self.toydata_file = toydata_file - self.toydata_mode = toydata_mode - self.kwargs = kwargs - self.threshold_key = threshold_key - self.cache_dir = cache_dir - - self.variables_to_read = dict() - self.variables_to_read["wimp_mass"] = self.wimp_mass - self.variables_to_read["livetime"] = self.livetime - - for idx, livetime in enumerate(self.livetime): - setattr(self, "livetime_{}".format(idx), livetime) - - config_data = utils.read_config(config_path) - self.config_data = config_data - inference_object_config = utils.adapt_inference_object_config(config_data, wimp_mass=wimp_mass, cache_dir=self.cache_dir) - - self.limit_threshold_function = None - - overwrite_items = inference_object_config_overrides.get("likelihoods", []) - if overwrite_items: # check if list is not empty - del inference_object_config_overrides["likelihoods"] - - if self.livetime is not None and len(inference_object_config["likelihoods"]) != len( - self.livetime): - raise Exception( - "You try to set a livetime for a combined likelihood. You need to pass as many livetimes as there are likelihoods." - ) - - for idx, ll_config in enumerate(inference_object_config["likelihoods"]): - ll_config["wimp_mass"] = self.wimp_mass - if self.livetime is not None: - if self.livetime[idx] is not None: - ll_config["livetime_days"] = self.livetime[idx] - if idx < len(overwrite_items): - ll_config.update(overwrite_items[idx]) - inference_object_config.update(inference_object_config_overrides) - self.inference_object_config = inference_object_config - - input_files = [] - for ll_config in self.inference_object_config["likelihoods"]: - for source in ll_config["sources"]: - input_files.append(source["templatename"]) - for uncertainty_name, uncertainty_setting in ll_config["shape_parameters"].items(): - if isinstance(uncertainty_setting, dict) and "path_to_prior" in uncertainty_setting.keys(): - path, _ = utils.detect_path(self.config_data) - input_files.append(os.path.join(path, uncertainty_setting["path_to_prior"])) - self.input_files = input_files - - lls = [] - dataset_names = [] - constraint_term_list = [] # will be filled with keys of likelihood parameters to check if a constraint term is added multiple times to the likelihood. - binned = [] - for ll_config in self.inference_object_config["likelihoods"]: - if ll_config["likelihood_type"] == "blueice.likelihood.BinnedLogLikelihood": - binned.append(True) - elif ll_config["likelihood_type"] == "blueice.likelihood.UnbinnedLogLikelihood": - binned.append(False) - else: - raise NotImplementedError - likelihood_object = locate(ll_config["likelihood_type"]) - ll = likelihood_object(ll_config) - for parameter, relative_uncertainty in ll_config[ - "rate_parameter_uncertainties"].items(): - if relative_uncertainty is not None: - constraint_already_set = parameter in constraint_term_list - if constraint_already_set: - warnings.warn(f"The constraint term for {parameter} rate is added by multiple parts of the likelihood.") - constraint_term_list.append(parameter) - print("Setting rate parameter uncertainty for {} to {}". - format(parameter, relative_uncertainty)) - parameter_central_value = self.generate_args.get( - parameter + "_rate_multiplier", - self.default_multiplier) - self.generate_args[ - parameter + - "_rate_multiplier"] = parameter_central_value - if parameter_central_value is None: - print("Unknown parameter", parameter) - print( - "generate_args and rate_parameter_uncertainties seem incompatible" - ) - print("generate_args are:", self.generate_args) - print("rate_parameter_uncertainties are:", - ll_config["rate_parameter_uncertainties"]) - raise Exception - ll.add_rate_parameter(parameter, - log_prior=sps.norm( - 1, parameter_central_value * - relative_uncertainty).logpdf) - else: - ll.add_rate_parameter(parameter) - - # not tested! - for parameter, values in ll_config["shape_parameters"].items(): - dict_to_set = {} - dict_to_set[parameter] = deepcopy(values) - if isinstance(values, dict) and "log_prior" in values.keys(): - constraint_already_set = parameter in constraint_term_list - if constraint_already_set: - warnings.warn(f"The constraint term for {parameter} shape is added by multiple parts of the likelihood.") - constraint_term_list.append(parameter) - # this_log_prior = eval(values["log_prior"]) - # values["log_prior"] = this_log_prior - this_log_prior = sps.norm(loc=values["log_prior"]["loc"], scale=values["log_prior"]["scale"]).logpdf - # values["log_prior"] = this_log_prior - dict_to_set[parameter]["log_prior"] = this_log_prior - elif isinstance(values, dict) and "path_to_prior" in values.keys(): - constraint_already_set = parameter in constraint_term_list - if constraint_already_set: - warnings.warn(f"The constraint term for {parameter} shape is added by multiple parts of the likelihood.") - constraint_term_list.append(parameter) - filename_expander = values.get("filename_expander", {}) - transformed_expander = {} - for string_name, value_to_get in filename_expander.items(): - transformed_expander[string_name] = getattr(self, value_to_get) - - BASEPATH, on_remote = utils.detect_path(self.config_data) - if on_remote: - file_to_open = os.path.join(BASEPATH, values["path_to_prior"].format(**transformed_expander)) - else: - # ensures that file is expected in top-level - file_to_open = os.path.join(BASEPATH, os.path.basename(values["path_to_prior"].format(**transformed_expander))) - log_prior_from_file = utils.get_log_prior_from_file(file_to_open, parameter=parameter) - if isinstance(log_prior_from_file, dict): - # make the function yourself from what is stored in file - values["log_prior"] = {} - values["log_prior"]["loc"] = log_prior_from_file["central_value"] - values["log_prior"]["scale"] = log_prior_from_file["uncertainty"] - dict_to_set[parameter]["log_prior"] = sps.norm(log_prior_from_file["central_value"], log_prior_from_file["uncertainty"]).logpdf - else: - # then the log_prior is a function - values["log_prior"] = log_prior_from_file - dict_to_set[parameter]["log_prior"] = log_prior_from_file - - if "path_to_prior" in dict_to_set[parameter].keys(): - del dict_to_set[parameter]["path_to_prior"] - if "filename_expander" in dict_to_set[parameter].keys(): - del dict_to_set[parameter]["filename_expander"] - - if isinstance(values, list): - ll.add_shape_parameter(parameter, dict_to_set[parameter]) - else: - ll.add_shape_parameter(parameter, **dict_to_set[parameter]) - - ll.prepare() - - dtype = [(key[0], float) for key in ll_config["analysis_space"]] - dummy_data = np.zeros(1, dtype) - for key in ll_config["analysis_space"]: - dummy_data[key[0]] = (key[1][-1] - key[1][0]) / 2 - - # The above used to be: might be useful for debugging - # dtype = [("cs1", float), ("logcs2b", float)] - # dummy_data = np.zeros(1, dtype) - # dummy_data["cs1"] = 50. - # dummy_data["logcs2b"] = 2. - ll.set_data(dummy_data) - lls.append(ll) - dataset_names.append(ll_config["dataset_name"]) - dataset_names += inference_object_config["dataset_names"] - self.dataset_names = dataset_names - self.lls = lls - self.binned = binned - self.pdf_plotters = [pdf_plotter(ll) for ll in self.lls] - - self.signal_expectations = None - self.thresholds = None - self.nominal_signal_expectation = None - self.datasets_array = None - self.set_toy_reading_mode(toydata_mode=self.toydata_mode, toydata_file=self.toydata_file) - self.ll = LogLikelihoodSum(self.lls, likelihood_weights=self.kwargs.get("likelihood_weights", None)) - - self.promote_generate_args() - - def set_toy_reading_mode(self, toydata_mode, toydata_file): - self.toydata_mode = toydata_mode - self.toydata_file = toydata_file - if toydata_mode == "none": - self.rgs = [BlueiceDataGenerator(ll, binned=b) for ll, b in zip(self.lls, self.binned)] - if hasattr(self, "datasets_array"): - del self.datasets_array - if hasattr(self, "toydata_index"): - del self.toydata_index - elif toydata_mode == "write": - self.rgs = [BlueiceDataGenerator(ll, binned=b) for ll, b in zip(self.lls, self.binned)] - self.datasets_array = [] - if hasattr(self, "toydata_index"): - del self.toydata_index - elif toydata_mode == "read": - dataset_names = self.load_toydata(toydata_file) - assert self.dataset_names == dataset_names - self.toydata_index = 0 - elif toydata_mode == "pass": - self.toydata_index = 0 - - else: - print("Unknown toydata_mode {toydata_mode}".format( - toydata_mode=self.toydata_mode)) - print("Allowed modes are none, write, read, pass") - raise SystemExit - - def load_toydata(self, toydata_file): - self.datasets_array, dataset_names = toydata_from_file( - toydata_file) - return dataset_names - - def full_ll(self, **kwargs): - for key in kwargs: - if key not in self.get_parameter_list(): - raise Exception("Unknown parameter {}".format(key)) - return self.ll(**kwargs) - - def build_threshold_key(self, threshold_key=None): - if self.limit_threshold is None: - raise Exception( - "You need to set a limit_threshold, a file where the thresholds are stored." - ) - - if threshold_key is None: - with h5py.File(self.limit_threshold, "r") as f: - threshold_key_pattern = f.attrs["threshold_key_pattern"] - - variables = re.findall("\{(.*?)\}", threshold_key_pattern) - variables = [var.split(":")[0] for var in variables] - - for var in variables: - if var not in dir(self): - raise Exception( - "Variable {} is not in the list of variables to read". - format(var)) - threshold_key = threshold_key_pattern.format(**self.__dict__) - threshold_key = threshold_key.replace("\"", "") - else: - # print a warning with the warning module - warnings.warn( - "You are using a custom threshold_key. This is not recommended, as the thresholds and parameters most likely mismatch." - ) - - self.check_if_key_exists(threshold_key) - self.threshold_key = threshold_key - - def check_if_key_exists(self, threshold_key): - with h5py.File(self.limit_threshold, "r") as f: - if threshold_key not in f: - raise Exception( - "Threshold key \n{}\n is not in the list of thresholds in the file {}" - .format(threshold_key, self.limit_threshold) + - "\nAvailable keys are:" + "\n".join(f.keys())) - - def _set_limit_threshold_function(self): - if self.limit_threshold is None: - print("No limit_threshold set. Using asymptotics.") - limit_threshold_function = lambda x, dummy: sps.chi2(1).isf(0.1) - self.signal_expectations = None - self.thresholds = None - self.nominal_signal_expectation = None - - return limit_threshold_function - else: - print( - "loading limit_threshold {:s}, confidence level {:.2f}".format( - self.limit_threshold, self.confidence_level)) - - self.signal_expectations, self.thresholds, self.nominal_signal_expectation = utils.read_neyman_threshold_update( - self.limit_threshold, - self.threshold_key, - confidence_level=self.confidence_level) - - print("loaded threshold") - print("signal_expectations", self.signal_expectations) - print("thresholds", self.thresholds) - print("nominal_signal_expectation", - self.nominal_signal_expectation) - ltf = interp1d(self.signal_expectations / - self.nominal_signal_expectation, - self.thresholds, - bounds_error=False, - fill_value=sps.chi2(1).isf(0.1)) - - def limit_threshold_function(x, cl): - return ltf(x) - - return limit_threshold_function - - def assign_data(self, datas): - for data, ll in zip(datas, self.lls): - ll.set_data(data) - - def simulate_and_assign_data(self, generate_args): - logging.debug("simulate_and_assign_data with generate_args", generate_args) - self._check_generate_args(generate_args=generate_args) - datas = [rg.simulate(**generate_args) for rg in self.rgs] - self.assign_data(datas) - return datas - - def simulate_and_assign_measurements(self, generate_args): - self._check_generate_args(generate_args=generate_args) - ancillary_measurements = [] - for ll_config in self.inference_object_config["likelihoods"]: - ret = dict() - # loop over rate parameters - for parameter_name, parameter_uncert in ll_config[ - "rate_parameter_uncertainties"].items(): - if parameter_uncert is not None: - parameter_mean = generate_args.get( - parameter_name + "_rate_multiplier", 1) - parameter_meas = max( - 0, - sps.norm(parameter_mean, - parameter_mean * parameter_uncert).rvs()) - ret[parameter_name + "_rate_multiplier"] = parameter_meas - # loop over shape parameters - for parameter_name, parameter_uncert in ll_config["shape_parameters"].items(): - if "log_prior" in parameter_uncert.keys(): - parameter_mean = generate_args.get(parameter_name) - parameter_meas = sps.norm(parameter_mean, - parameter_uncert["log_prior"]["scale"]).rvs() - ret[parameter_name] = parameter_meas - - ancillary_measurements.append(ret) - self.assign_measurements(ancillary_measurements=ancillary_measurements, - generate_args=generate_args) - return ancillary_measurements - - def assign_measurements(self, ancillary_measurements, generate_args): - if isinstance(ancillary_measurements, dict): - ancillary_measurements = [ancillary_measurements] * len( - self.lls) - for ancillary_measurement, ll_config, ll in zip(ancillary_measurements, self.inference_object_config["likelihoods"], self.lls): - for parameter_name, parameter_uncert in ll_config[ - "rate_parameter_uncertainties"].items(): - if parameter_uncert is not None: - parameter_mean = generate_args.get( - parameter_name + "_rate_multiplier", 1) - parameter_meas = ancillary_measurement[parameter_name + - "_rate_multiplier"] - logging.debug(f"Assign measurement for {parameter_name}") - logging.debug(f"Normal distribution with mean {parameter_meas:.5f} and width {parameter_mean * parameter_uncert:.5f}.") - ll.rate_parameters[parameter_name] = sps.norm( - parameter_meas, - parameter_mean * parameter_uncert).logpdf - - for ancillary_measurement, ll_config, ll in zip(ancillary_measurements, self.inference_object_config["likelihoods"], self.lls): - for parameter_name, parameter_uncert in ll_config[ - "shape_parameters"].items(): - if "log_prior" in parameter_uncert.keys(): - parameter_meas = ancillary_measurement[parameter_name] - shape_options = list(deepcopy(ll.shape_parameters[parameter_name])) - shape_options[1] = sps.norm( - parameter_meas, - parameter_uncert["log_prior"]["scale"]).logpdf - ll.shape_parameters[parameter_name] = tuple(shape_options) - - def llr(self, - extra_args=None, - extra_args_null=None, - guess=None): - - if extra_args is None: - extra_args = {} - if guess is None: - guess = {} - if extra_args_null is None: - extra_args_null = {"signal_rate_multiplier": 0.} - extra_args_null_total = deepcopy(extra_args) - extra_args_null_total.update(extra_args_null) - res1, llval1 = bestfit_scipy(self.ll, - guess=guess, - minimize_kwargs=minimize_kwargs, - **extra_args) - res0, llval0 = bestfit_scipy(self.ll, - guess=guess, - minimize_kwargs=minimize_kwargs, - **extra_args_null_total) - return 2. * (llval1 - llval0), llval1, res1, llval0, res0 - - def confidence_interval(self, - llval_best, - extra_args=None, - guess=None, - parameter_name="signal_rate_multiplier", - two_sided=True): - if guess is None: - guess = {} - if extra_args is None: - extra_args = {} - - #the confidence interval computation looks in a bounded region-- we will say that we will not look for larger than 300 signal events - rate_multiplier_max = 10000. / self.get_mus(**extra_args).get( - parameter_name.replace("_rate_multiplier", ""), 1.) - rate_multiplier_min = 0. - logging.debug("rate_multiplier_max: " + str(rate_multiplier_max)) - - if self.limit_threshold_function is None: - self.limit_threshold_function = self._set_limit_threshold_function( - ) - - dl = -1 * np.inf - ul = one_parameter_interval(self.ll, - parameter_name, - rate_multiplier_max, - bestfit_routine=bestfit_scipy, - minimize_kwargs=minimize_kwargs, - t_ppf=self.limit_threshold_function, - guess=guess, - **extra_args) - if two_sided: - extra_args_null = deepcopy(extra_args) - extra_args_null[parameter_name] = rate_multiplier_min - - res_null, llval_null = bestfit_scipy( - self.ll, - guess=guess, - minimize_kwargs=minimize_kwargs, - **extra_args_null) - llr = 2. * (llval_best - llval_null) - if llr <= self.limit_threshold_function(rate_multiplier_min, 0): - dl = rate_multiplier_min - else: - dl = one_parameter_interval( - self.ll, - parameter_name, - rate_multiplier_min, - kind="lower", - bestfit_routine=bestfit_scipy, - minimize_kwargs=minimize_kwargs, - t_ppf=self.limit_threshold_function, - guess=guess, - **extra_args) - return dl, ul - - def generate_toydata(self, generate_args): - datas = self.simulate_and_assign_data(generate_args=generate_args) - ancillary_measurements = self.simulate_and_assign_measurements( - generate_args=generate_args) - if self.toydata_mode == "write": - all_keys = [] - ancillary_measurements_to_store = {} - for ancillary_measurement in ancillary_measurements: - for key, value in ancillary_measurement.items(): - all_keys.append(key) - ancillary_measurements_to_store[key] = value - if len(all_keys) != len(set(all_keys)): - raise ValueError("WARNING: some keys are repeated in the ancillary measurements.") - - datas.append(dict_to_structured_array(ancillary_measurements_to_store)) - if 0 < len(generate_args): - datas.append(dict_to_structured_array(generate_args)) - else: - datas.append(dict_to_structured_array({"alldefault": 0})) - - self.datasets_array.append(datas) - return datas, ancillary_measurements - - def read_toydata(self, generate_args): - datas = self.datasets_array[self.toydata_index] - self.toydata_index += 1 - self.assign_data(datas) - #print("ancillary measurement is", datas[-2], datas[-2].dtype) - #print("ancillary measurement length",len(datas[-2])) - ancillary_measurements = structured_array_to_dict(datas[-2]) - #print("ancillary measurement",ancillary_measurements) - self.assign_measurements(ancillary_measurements, generate_args) - - def _check_generate_args(self, generate_args): - parameters = self.get_parameter_list() - for par_to_pop in ["ll", "dl", "ul"]: - if par_to_pop in parameters: - parameters.remove(par_to_pop) - generate_args_present = sorted(list(generate_args.keys())) == sorted(parameters) - if not generate_args_present: - error_msg = "" - for parameter in parameters: - if parameter not in generate_args.keys(): - error_msg += f"necessary parameter {parameter} not in generate_args\n" - - for parameter in generate_args.keys(): - if parameter not in parameters: - error_msg += f"parameter {parameter} defined in generate_args not in parameter list\n" - raise ValueError(error_msg) - - - def toy_simulation(self, - generate_args=None, - extra_args=None, - guess=None, - compute_confidence_interval=False, - confidence_interval_args=None, - propagate_guess=True): - """Read in next toy dataset (toydata_mode == "read") or simulate - toy dataset with given `generate_args` (toydata_mode == "write" or "none") - and assign data and measurements. - A log likelihood fit of the dataset is performed via - `blueice.inference.bestfit_scipy` for each extra arg in `extra_args`. - - - Args: - generate_args (dict, optional): Dict of rate, signal and - shape parameters. Defaults to {}. - extra_args (list, optional): each toyMC is fit with each - extra_arg. Defaults to [{},{"signal_rate_multiplier":0.}]. - guess (dict, optional): Guess passed to fitter. - Defaults to {"signal_rate_multiplier":0.}. - compute_confidence_interval (bool, optional): if True, the function - will compute confidence intervals on the - signal_rate_multiplier, for the _first_ set of extra_args. - Defaults to False. - confidence_interval_args (dict, optional): If - `compute_confidence_interval` is True, these kwargs are passed - to self.confidence_interval. Defaults to {}. - propagate_guess (bool, optional): If True, the previous fit results - (except the values specified in guess) are used as a guess, - defaults to True - - Returns: - ress: List of ML fit results for all extra_args - """ - logging.debug("toy_simulation with generate_args", generate_args) - if generate_args is None: - generate_args = {} - - if extra_args is None: - extra_args=[{}, { - "signal_rate_multiplier": 0. - }] - if guess is None: - guess={"signal_rate_multiplier": 0.} - if confidence_interval_args is None: - confidence_interval_args={} - if self.toydata_mode == "read": - self.read_toydata(generate_args) - elif self.toydata_mode == "pass": - print("Not generating toy data") - else: - self.generate_toydata(generate_args=generate_args) - for i, ll in enumerate(self.lls): - logging.debug(f"toy data in ll with index {i} has length " + str(len(ll._data))) - logging.debug(f"source names: " + str(ll.source_name_list)) - for j, name in enumerate(ll.source_name_list): - logging.debug(f"number of simulated {name} events: " + str(len(ll._data[ll._data["source"] == j]))) - logging.debug(f"rate_parameters in ll with index {i}: " + str(ll.rate_parameters)) - logging.debug(f"shape_parameters in ll with index {i}: " + str(ll.shape_parameters)) - - ress = [] - extra_args_runs = extra_args - if type(extra_args_runs) is dict: - extra_args_runs = [extra_args_runs] - - previous_fit = {} - logging.debug("Propagate guess: " + str(propagate_guess)) - for extra_args_run in extra_args_runs: - guess_run = {} - if propagate_guess: - guess_run.update(previous_fit) - guess_run.update(guess) - - logging.debug("guess_run: " + str(guess_run)) - logging.debug("extra_args_run: " + str(extra_args_run)) - logging.debug("self.ll: " + str(self.ll)) - res, llval = bestfit_scipy(self.ll, - guess=guess_run, - minimize_kwargs=minimize_kwargs, - **extra_args_run) - previous_fit = res - res.update(extra_args_run) - res["ll"] = llval - res["dl"] = -1. - res["ul"] = -1. - ress.append(res) - logging.debug("ress: " + str(ress)) - - # All following is just for debugging (basically what blueice does) - ret = 0. - for i, (ll, parameter_names, ll_weight) in enumerate( - zip(self.lls, - self.ll.likelihood_parameters, - self.ll.likelihood_weights)): - pass_kwargs = {k: v - for k, v in res.items() - if k in parameter_names} - - ret += ll_weight * ll(compute_pdf=False, - livetime_days=None, - **pass_kwargs) - logging.debug(f"LogLikelihoodSum after adding ll({i}): {ret}") - - if compute_confidence_interval: - ci_guess = deepcopy(ress[-1]) - ci_guess.pop("signal_rate_multiplier", None) - ci_guess.update({"signal_rate_multiplier": 0}) - ci_args = { - "llval_best": ress[-1]["ll"], - "extra_args": extra_args_runs[-1], - "guess": ci_guess, - } - ci_args.update(confidence_interval_args) - logging.debug("ci_args: " + str(ci_args)) - dl, ul = self.confidence_interval(**ci_args) - logging.debug("dl: " + str(dl)) - logging.debug("ul: " + str(ul)) - - ress[-1]["dl"] = dl - ress[-1]["ul"] = ul - - return ress - - def get_mus(self, - evaluate_per_likelihood=False, - livetime_days_multiplier=None, - **res): - """Return dictionary of expectation values for all sources - evaluate_per_likelihood (bool): returns a list of dictionarys with expectations per likelihood""" - - if not (isinstance(livetime_days_multiplier, list) - or livetime_days_multiplier is None): - raise Exception( - "livetime_days_multiplier needs to be list or None") - - if livetime_days_multiplier is not None and len( - livetime_days_multiplier) != len(self.lls): - raise Exception( - "You cannot set the livetime_days_multiplier for only a subset of your sub-likelihoods" - ) - - to_return = [] - for idx, ll in enumerate(self.lls): - this_res = deepcopy(res) - ret = {} - - # need to take care that multiplier is not evaluted for sub-likelihood - parameters = list(ll.rate_parameters.keys()) + list( - ll.shape_parameters.keys()) - for res_key in res.keys(): - parameter_name = res_key.replace("_rate_multiplier", "") - parameter_name = parameter_name.replace( - "_shape_multiplier", "") - if parameter_name not in parameters: - del this_res[res_key] - - if isinstance(livetime_days_multiplier, list): - default_livetime = ll.pdf_base_config["livetime_days"] - this_res.update({ - "livetime_days": - float(livetime_days_multiplier[idx]) * - float(default_livetime) - }) - - mus = ll(full_output=True, **this_res)[1] - for n, mu in zip(ll.source_name_list, mus): - ret[n] = ret.get(n, 0) + mu - to_return.append(ret) - - if evaluate_per_likelihood: - return to_return - else: - ret = {} - for item in to_return: - for n, mu in item.items(): - ret[n] = ret.get(n, 0) + mu - return ret - - def get_parameter_list(self): - """Return string of rate and shape parameters""" - ret = [ - n + "_rate_multiplier" - for n in list(self.ll.rate_parameters.keys()) - ] - ret += list(self.ll.shape_parameters.keys()) - ret += ["ll", "dl", "ul"] - return ret - - def write_toydata(self): - toydata_to_file(self.toydata_file, - datasets_array=self.datasets_array, - dataset_names=self.dataset_names, - overwrite_existing_file=True) - - def promote_generate_args(self): - """Promote generate_args to members of the inferenceObject""" - for key, value in self.generate_args.items(): - print("promoting", key, "to", value) - setattr(self, key, value) - - def get_pdfs(self, fit_result, slice_args=None, expected_events=False): - """ - fit_result (dict): dictionary of fit result - slice_args (list): list of arguments to slice the pdfs - """ - if slice_args is None: - slice_args = [{} for _ in self.lls] - else: - if len(slice_args) != len(self.lls): - raise Exception( - "You need to provide a slice_args for each sub-likelihood" - ) - - l_pdfs = [] - for plotter, slice_arg in zip(self.pdf_plotters, slice_args): - myresult = utils._divide_fit_result( - plotter.ll, fit_result) - mypdf = plotter.get_pdf(**myresult, slice_args=slice_arg, expected_events=expected_events) - l_pdfs.append(mypdf) - return l_pdfs diff --git a/alea/_plotting.py b/alea/_plotting.py deleted file mode 100644 index 286d78b2..00000000 --- a/alea/_plotting.py +++ /dev/null @@ -1,689 +0,0 @@ -import numpy as np -import matplotlib.pyplot as plt -from itertools import product -from scipy.stats import poisson, norm -from scipy.interpolate import interp1d -import multihist as mh -from multihist import Histdd - -from copy import deepcopy - - -def interleave_bincv(binc, bine): - rc = np.zeros(2 * len(binc)) - re = np.zeros(2 * len(binc)) - rc[0::2] = binc - rc[1::2] = binc - re[0::2] = bine[0:-1] - re[1::2] = bine[1::] - return rc, re - - -class pdf_plotter(): - def __init__(self, ll, analysis_space_plot=None): - """Generate histograms for all sources - - Args: - ll (ll object): alea ll object - analysis_space_plot (list of tuples, optional): if not None - this replaces the bin edges defined in the ll config file - for plotting / defining the histogras. - Example: analysis_space_plot = - [("cs1", np.linspace(0, 100, 101)), - ("logcs2", np.linspace(1.7, 3.9, 125 + 1))] - Defaults to None. - """ - self.ll = deepcopy(ll) - bins = [] # bin edges - bincs = [] # bin centers of each component - direction_names = [] - dtype = [] - data_length = 1 # number of bins in nD histogram - self.data_lengths = [] # list of number of bins of each component - - if analysis_space_plot is not None: - analysis_space = analysis_space_plot - else: - analysis_space = self.ll.base_model.config['analysis_space'] - for direction in analysis_space: - bins.append(direction[1]) - binc = 0.5 * (direction[1][1::] + direction[1][0:-1]) - bincs.append(binc) - # binc_dict[direction[0]] = binc - dtype.append((direction[0], float)) - data_length *= len(direction[1]) - 1 - self.data_lengths.append(len(direction[1]) - 1) - direction_names.append(direction[0]) - dtype.append(('source', int)) - - data_binc = np.zeros(data_length, dtype=dtype) - for i, l in enumerate(product(*bincs)): - for n, v in zip(direction_names, l): - data_binc[n][i] = v - - self.ll.set_data(data_binc) - - self.source_histograms = [] - for i in range(len(self.ll.base_model.sources)): - self.source_histograms.append(Histdd(bins=bins)) - - def get_pdf(self, - i_source=None, - expected_events=False, - slice_args={}, - **kwargs): - """get a histogram of the PDF or of the expected events for the - entire analysis space or projected onto one axis. - - Args: - i_source (int, optional): index of a source, the histogram - of which will be returned. If None is passed, a histogram - with all sources is returned. Defaults to None. - expected_events (bool, optional): If True, histogram of expected - events is returned, otherwise a histogram of the PDF. - Defaults to False. - slice_args (dict, optional): Slicing arguments for plotting. - Defaults to {}. - kwargs are rate_multipliers and shape_parameter_settings - that are passed to the call of the full ll output, as - well as n_mc, which is used as a rate factor if passed. - - Raises: - ValueError: if slice_axis is given in slice_args but not - collapse_slices. - - Returns: - multhist.Hisdd: histogram of PDF / expected events - """ - n_mc = kwargs.get('n_mc', None) - if n_mc is not None: - del kwargs['n_mc'] - ret = self.ll(full_output=True, **kwargs) # result, mus, ps - if type(ret) == float: - print("ERROR, generator kwarg outside range?") - print(kwargs) - #print("ret,kwargs",ret,kwargs) - _, mus, ps_array = ret - for i in range(len(self.ll.base_model.sources)): - self.source_histograms[i].histogram = ps_array[i].reshape( - self.data_lengths) - #source_histograms[i]*=source_histograms[i].bin_volumes() - self.mus = mus - self.last_kwargs = kwargs - - if i_source is not None: - ret_histogram = deepcopy(self.source_histograms[i_source]) - ret_histogram.histogram *= ret_histogram.bin_volumes() - if expected_events: - if n_mc is not None: - rate_factor = n_mc - print( - 'Multiply histogram with rate factor(n_mc): {}'.format( - rate_factor)) - else: - rate_factor = mus[i_source] - print('Multiply histogram with rate factor(mu): {}'.format( - rate_factor)) - ret_histogram *= rate_factor - - else: - ret_histogram = self.source_histograms[0].similar_blank_hist() - for i in range(len(mus)): - ret_histogram.histogram += mus[i] * self.source_histograms[ - i].histogram / np.sum(mus) - ret_histogram.histogram *= ret_histogram.bin_volumes() - if expected_events: - if n_mc is not None: - rate_factor = n_mc - print('Multiply histogram with rate factor (n_mc): {}'. - format(rate_factor)) - else: - rate_factor = np.sum(mus) - print('Multiply histogram with rate factor (sum(mus)): {}'. - format(rate_factor)) - - ret_histogram *= rate_factor - ret_histogram.axis_names = [ - n for n, _ in self.ll.base_model.config['analysis_space'] - ] - - if type(slice_args) is dict: - slice_args = [slice_args] - - for sa in slice_args: - # Supply slicing args for plotting: - slice_axis = sa.get("slice_axis", None) - sum_axis = sa.get( - "sum_axis", False - ) #decide if you wish to sum the histogram into lower dimensions or - - collapse_axis = sa.get('collapse_axis', None) - collapse_slices = sa.get('collapse_slices', None) - - if (slice_axis is not None): - #use total, not differential probability for slicing (makes sums easier): - - bes = ret_histogram.bin_edges[slice_axis] - slice_axis_limits = sa.get("slice_axis_limits", - [bes[0], bes[-1]]) - print("slicing axis", slice_axis) - if sum_axis: - ret_histogram = ret_histogram.slicesum( - axis=slice_axis, - start=slice_axis_limits[0], - stop=slice_axis_limits[1]) - else: - ret_histogram = ret_histogram.slice( - axis=slice_axis, - start=slice_axis_limits[0], - stop=slice_axis_limits[1]) - - if collapse_axis is not None: - if collapse_slices is None: - raise ValueError( - "To collapse you must supply collapse_slices") - ret_histogram = ret_histogram.collapse_bins( - collapse_slices, axis=collapse_axis) - - if not expected_events: - ret_histogram.histogram /= ret_histogram.bin_volumes() - - return ret_histogram - - def plot_pdf(self, - i_source=None, - expected_events=False, - slice_args={}, - plot_kwargs={}, - data=None, - scatter_kwargs={}, - **kwargs): - h = self.get_pdf(i_source=i_source, - expected_events=expected_events, - slice_args=slice_args, - **kwargs) - h.plot(**plot_kwargs) - - if data is not None: - if 'source' in data.dtype.names: - for i, source in enumerate(self.ll.base_model.sources): - scatter_kwargs_default = {'lw': 0.5, 'edgecolor': 'gray'} - scatter_kwargs_default['color'] = source.config.get( - 'color', 'gray') - scatter_kwargs_default['label'] = source.config.get( - 'label', '') - scatter_kwargs_default.update(scatter_kwargs) - axis_names = h.axis_names - data_source = data[data['source'] == i] - plt.scatter(data_source[axis_names[0]], - data_source[axis_names[1]], - **scatter_kwargs_default) - else: - scatter_kwargs_default = {'lw': 0.5, 'edgecolor': 'gray'} - scatter_kwargs_default.update(scatter_kwargs) - axis_names = h.axis_names - plt.scatter(data[axis_names[0]], data[axis_names[1]], - **scatter_kwargs_default) - - def get_expectation_value(self, i_source=None, slice_args={}, **kwargs): - if i_source is not None: - return self.get_pdf(i_source=i_source, - expected_events=True, - slice_args=slice_args, - **kwargs).n - mus = [] - for i, _ in enumerate(self.source_histograms): - mus.append( - self.get_pdf(i_source=i, - expected_events=True, - slice_args=slice_args, - **kwargs).n) - return np.array(mus) - - def get_projected_plottable(self, - proj_axis=0, - i_source=0, - per_axis=False, - binning=None, - slice_args={}, - cl=0.68, - positive_radius=True, - **kwargs): - h = deepcopy( - self.get_pdf(i_source=i_source, - slice_args=slice_args, - expected_events=True, - **kwargs)) - hh = h.project(axis=proj_axis) - if positive_radius: - hh.histogram[1] += hh.histogram[0] - hh.histogram[0] = 0. - bes = hh.bin_edges - ns = np.concatenate([np.zeros(1), hh.cumulative_histogram]) - if binning is None: - bins = hh.bin_edges - elif type(binning) is np.ndarray: - bins = binning - elif np.isreal(binning): - cumulative_interpolation = interp1d(ns, bes) - nbins = np.ceil(ns[-1] / binning) - bins = cumulative_interpolation(np.linspace(0, ns[-1], nbins)) - - ns_interpolator = interp1d(bes, - ns, - bounds_error=False, - fill_value=(0, ns[-1])) - - n_ret = ns_interpolator(bins) - n_ret = n_ret[1::] - n_ret[0:-1] - h_ret = mh.Hist1d(bins=bins) - h_ret.histogram = n_ret - if per_axis: - h_ret /= h_ret.bin_volumes() - if cl is not None: - hd, hu = poisson(n_ret).interval(cl) - hu = np.maximum(n_ret, hu) - else: - hd = hu = np.zeros(len(n_ret)) - if per_axis: - hd = hd / h_ret.bin_volumes() - hu = hu / h_ret.bin_volumes() - hd, be = interleave_bincv(hd, bins) - hu, be = interleave_bincv(hu, bins) - plot_kwargs = {'lw': 3, 'color': 'black'} - if i_source is not None: - plot_kwargs['color'] = self.ll.base_model.sources[ - i_source].config.get("color", 'gray') - plot_kwargs['label'] = self.ll.base_model.sources[ - i_source].config.get("label", 'label') - plot_kwargs['lw'] = 4 - - return h_ret, be, hd, hu, plot_kwargs - - def plot_projection( - self, - proj_axis=2, - binning=None, - per_axis=False, - slice_args={}, - res={}, - data=None, - cl=0.68, - plot_kwarg_dict={ - 'wimp': { - 'linestyle': '--' - }, - 'total': { - 'label': 'Total Model' - } - }, - ylim=None, - xlim=None, - scatter_kwargs={}, - plottables=None, - logy=True, - reference_period=None, - positive_radius=False, - plot_legend=True, - plot_order=['cnns', 'radiogenic', 'ac', 'wall', 'er', 'total', 'wimp'], - ): - plottables, legend = plot_projection( - pdf_plotter=self, - proj_axis=proj_axis, - binning=binning, - per_axis=per_axis, - slice_args=slice_args, - res=res, - data=data, - cl=cl, - plot_kwarg_dict=plot_kwarg_dict, - ylim=ylim, - xlim=xlim, - scatter_kwargs=scatter_kwargs, - plottables=plottables, - logy=logy, - reference_period=reference_period, - plot_order=plot_order, - plot_legend=plot_legend, - positive_radius=positive_radius, - ) - return plottables, legend - - def plot_projection_residual( - self, - proj_axis=2, - binning=None, - per_axis=False, - slice_args={}, - res={}, - data=None, - cl=0.68, - plot_kwarg_dict={ - 'wimp': { - 'linestyle': '--' - }, - 'total': { - 'label': 'Total Model' - } - }, - ylim=None, - xlim=None, - scatter_kwargs={}, - plottables=None, - logy=True, - reference_period=None, - plot_legend=True, - positive_radius=False, - plot_order=['cnns', 'radiogenic', 'ac', 'wall', 'er', 'total', 'wimp'], - ): - plottables, legend, ps = plot_projection_residual( - pdf_plotter=self, - proj_axis=proj_axis, - binning=binning, - per_axis=per_axis, - slice_args=slice_args, - res=res, - data=data, - cl=cl, - plot_kwarg_dict=plot_kwarg_dict, - ylim=ylim, - xlim=xlim, - scatter_kwargs=scatter_kwargs, - plottables=plottables, - logy=logy, - reference_period=reference_period, - plot_order=plot_order, - plot_legend=plot_legend, - positive_radius=positive_radius, - ) - return plottables, legend, ps - - def plot_projected_difference( - self, - proj_axis=0, - i_source=0, - binning=None, - fractional=False, - absolute=False, - slice_args={}, - per_axis=False, - res={}, - resmod={}, - plot_kwargs={}, - ): - plot_projected_difference( - self, - proj_axis=proj_axis, - i_source=i_source, - binning=binning, - fractional=fractional, - absolute=absolute, - slice_args=slice_args, - per_axis=per_axis, - res=res, - resmod=resmod, - plot_kwargs=plot_kwargs, - ) - - -def plot_projected_difference(plotter, - proj_axis=0, - i_source=0, - binning=None, - fractional=False, - absolute=False, - slice_args={}, - per_axis=False, - res={}, - resmod={}, - plot_kwargs={}): - - h, _, _, _, default_plot_kwargs = plotter.get_projected_plottable( - proj_axis=proj_axis, - i_source=i_source, - per_axis=per_axis, - binning=binning, - slice_args=slice_args, - **res) - print(h, len(h)) - res0 = deepcopy(res) - res0.update(resmod) - h0, _, _, _, _ = plotter.get_projected_plottable(proj_axis=proj_axis, - i_source=i_source, - per_axis=per_axis, - binning=binning, - slice_args=slice_args, - **res0) - if not fractional: - h = h - h0 - else: - h = (h - h0) / h - if absolute: - h.histogram = abs(h.histogram) - default_plot_kwargs.update(plot_kwargs) - h.plot(**default_plot_kwargs) - - -def plot_projection( - pdf_plotter=None, - proj_axis=2, - binning=None, - per_axis=False, - slice_args={}, - res={}, - data=None, - cl=0.68, - plot_kwarg_dict={ - 'wimp': { - 'linestyle': '--' - }, - 'total': { - 'label': 'Total Model' - } - }, - ylim=None, - xlim=None, - scatter_kwargs={}, - plottables=None, - logy=True, - reference_period=None, - plot_legend=True, - positive_radius=True, - plot_order=['cnns', 'radiogenic', 'ac', 'wall', 'er', 'total', 'wimp'], -): - # from sr_ll_definition import human_name_dict - if plottables is None: - plottables = {} - plottables['total'] = pdf_plotter.get_projected_plottable( - proj_axis=proj_axis, - i_source=None, - per_axis=per_axis, - binning=binning, - slice_args=slice_args, - positive_radius=positive_radius, - **res) - bins = plottables['total'][0].bin_edges - for i, sn in enumerate(pdf_plotter.ll.source_name_list): - plottables[sn] = pdf_plotter.get_projected_plottable( - proj_axis=proj_axis, - i_source=i, - per_axis=per_axis, - binning=bins, - slice_args=slice_args, - positive_radius=positive_radius, - **res) - else: - bins = plottables['total'][0].bin_edges - for k in plot_order: - (h_res, be, ed, eu, kwarg) = plottables[k] - if reference_period is not None: - h_res = h_res / reference_period - kwarg.update(plot_kwarg_dict.get(k, {})) - h_res.plot(**kwarg) - h_res, be, ed, eu, kwarg = plottables['total'] - if reference_period is not None: - ed = ed / reference_period - eu = eu / reference_period - if cl is not None: - plt.fill_between(be, ed, eu, alpha=0.3, color=kwarg['color']) - bins = h_res.bin_edges - if data is not None: - binv, _ = np.histogram(data, bins=bins) - if positive_radius: - binv, _ = np.histogram(np.abs(data), bins=bins) - binc = 0.5 * (bins[0:-1] + bins[1::]) - if per_axis: - binv = binv / h_res.bin_volumes() - if reference_period is not None: - binv = binv / reference_period - scatter_kwargs_default = {'color': 'magenta', 'marker': 'o', 's': 25} - scatter_kwargs_default.update(scatter_kwargs) - plt.scatter(binc, binv, zorder=100, **scatter_kwargs_default) - if logy: - plt.yscale('log') - if ylim is not None: - plt.ylim(ylim) - if xlim is not None: - plt.xlim(xlim) - if pdf_plotter is not None: - axis_name = pdf_plotter.ll.base_model.config['analysis_space'][ - proj_axis][0] - else: - anames = {0: 'cs1', 1: 'cs2_bottom', 2: 'r'} - axis_name = anames[proj_axis] - # plt.xlabel(human_name_dict.get(axis_name, axis_name)) - if axis_name == 'r': - xlim = plt.gca().get_xlim() - rticks = [10, 20, 30, 35, 40, 42, 44] - if bins[0] < 0: - rticks = [ - np.sqrt(abs(bins[0])), 0, 20, 30, 35, 40, - np.sqrt(abs(bins[-1])) - ] - r2ticks = [r * abs(r) for r in rticks] - rtickns = ["{:.1f}".format(r) for r in rticks] - plt.gca().set_xticks(r2ticks) - plt.gca().set_xticklabels(rtickns) - plt.xlim(xlim) - plt.xlabel("$r$[$\mathrm{cm}$]") - period_add = "" - if reference_period is not None: - period_add = "/$d$" - if per_axis: - pass - # plt.ylabel( - # 'events/{:s}'.format(human_name_dict.get(axis_name, axis_name)) + - # period_add) - else: - plt.ylabel('events' + period_add) - - # order legend reverse from how we plotted it: - objs, handles = plt.gca().get_legend_handles_labels() - objs = objs[::-1] - handles = handles[::-1] - - if plot_legend: - legend = plt.legend(objs, - handles, - loc=2, - bbox_to_anchor=(1, 1), - borderaxespad=0) - else: - legend = None - - return plottables, legend - - -def plot_projection_residual( - pdf_plotter=None, - proj_axis=2, - binning=None, - per_axis=False, - slice_args={}, - res={}, - data=None, - cl=0.68, - plot_kwarg_dict={ - 'wimp': { - 'linestyle': '--' - }, - 'total': { - 'label': 'Total Model' - } - }, - ylim=None, - xlim=None, - scatter_kwargs={}, - plottables=None, - logy=True, - reference_period=None, - plot_legend=True, - positive_radius=False, - plot_order=['cnns', 'radiogenic', 'ac', 'wall', 'er', 'total', 'wimp'], -): - fig1 = plt.figure() - frame1 = fig1.add_axes((.1, .3, .8, .6)) - plottables, legend = plot_projection( - pdf_plotter=pdf_plotter, - proj_axis=proj_axis, - binning=binning, - per_axis=per_axis, - slice_args=slice_args, - res=res, - data=data, - cl=cl, - plot_kwarg_dict=plot_kwarg_dict, - ylim=ylim, - xlim=xlim, - scatter_kwargs=scatter_kwargs, - plottables=plottables, - logy=logy, - reference_period=reference_period, - plot_order=plot_order, - plot_legend=plot_legend, - positive_radius=positive_radius, - ) - xlabel = plt.gca().get_xlabel() - xlim = plt.gca().get_xlim() - plt.setp(plt.gca().get_xticklabels(), visible=False) - frame2 = fig1.add_axes((.1, .1, .8, .2)) - plt.ylabel('p-value') - plt.xlabel('') - hist_res, be, ed, eu, kwarg = plottables['total'] - - mus = hist_res.histogram - if per_axis: - mus = mus * hist_res.bin_volumes() - bins = hist_res.bin_edges - ns, _ = np.histogram(data, bins=bins) - if positive_radius: - ns, _ = np.histogram(np.abs(data), bins=bins) - - ps = poisson(mus).sf(ns - 0.1) - ps = -1 * norm().ppf(ps) - binc = 0.5 * (bins[0:-1] + bins[1::]) - plt.scatter(binc, ps, color='k', s=scatter_kwargs.get("s", 20)) - # plt.xlabel('radius') - plt.xlim(xlim) - plt.xlabel(xlabel) - if proj_axis == 2: - rticks = [10, 20, 30, 35, 40, 42, 44] - r2ticks = [r * abs(r) for r in rticks] - if bins[0] < 0: - print("switch to twoside binning") - rticks = [ - np.sign(bins[0]) * np.sqrt(abs(bins[0])), 0, 20, 30, 35, 40 - ] - r2ticks = [r * abs(r) for r in rticks] - rtickns = ["{:.1f}".format(r) for r in rticks] - plt.gca().set_xticks(r2ticks) - plt.gca().set_xticklabels(rtickns) - plt.xlim(xlim) - plt.xlabel("$r$[$\mathrm{cm}$]") - # plt.yticks([0,0.5,1]) - # plt.ylim([-0.3,1.3]) - plt.yticks([-2, 0, 2]) - plt.ylim([-3, 3]) - plt.ylabel('$\sigma$') - plt.xlabel(xlabel) - return plottables, legend, ps diff --git a/alea/_scripts/get_data_from_OSG.py b/alea/_scripts/get_data_from_OSG.py deleted file mode 100644 index ab3c9a76..00000000 --- a/alea/_scripts/get_data_from_OSG.py +++ /dev/null @@ -1,173 +0,0 @@ -import os -import sys -import argparse -import glob -import pkg_resources -import subprocess - - -def execute_remote_command(user, - remote_machine, - remote_command, - skip_command_check=False): - """Docstring for function. - This function executes the command - - :arg1: user - user which is used to execute the remote command - :arg2: remote_machine - remote machine where the command should be executed - :arg3: remote_command - command which is executed on the remote machine - - :returns: None - """ - # Ports are handled in ~/.ssh/config since we use OpenSSH - command = ["ssh", "%s@%s" % (user, remote_machine), remote_command] - print(" ".join(command)) - ssh = subprocess.Popen( - command, - # bufsize=0, - shell=False, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - # close_fds=True - ) - std, err = ssh.communicate() - - if skip_command_check: - print("No remote check.") - else: - print("Checking remote result... Consider '--skip_command_check'") - if std == []: - # error = ssh.stderr.readlines() - print(err, "ERROR: %s" % err) - else: - print("Command executed successfully!") - - -def parse_args(args): - """Parse the command line arguments""" - - parser = argparse.ArgumentParser("debug toy submission") - parser.add_argument("--OSG_path", - type=str, - required=True, - help="Path to copy from OSG") - parser.add_argument("--midway_target", - default="/home/twolf/scratch-midway2/toyoutput_SR0", - type=str, - help="Target path on midway") - parser.add_argument("--user", - type=str, - required=True, - help="User used for copy") - parser.add_argument("--remote_machine", - type=str, - default="login.xenon.ci-connect.net", - help="URL of remote machine") - parser.add_argument("--skip_command_check", - action='store_true', - help="Skip checking of remote command.") - parser.add_argument("--get", - action='store_true', - help="Execute unpacking and transfer") - parser.add_argument("--no_unpack", - action='store_true', - help="Avoid unpacking remotely") - - args = parser.parse_args(args) - if not args.OSG_path.endswith("/"): - args.OSG_path += "/" - - return vars(args) - - -def main(args): - parsed_args = parse_args(args=args) - midway_target = parsed_args["midway_target"] - OSG_path = parsed_args["OSG_path"] - user = parsed_args["user"] - remote_machine = parsed_args["remote_machine"] - - # setting up paths - splitted_path = OSG_path.split("/") - if splitted_path[-1] == "": - dirname = splitted_path[-2] - else: - dirname = splitted_path[-1] - full_path_midway = os.path.join(midway_target, dirname) - if not os.path.exists(full_path_midway): - os.makedirs(full_path_midway) - if not full_path_midway.endswith("/"): - full_path_midway += "/" - - if parsed_args["no_unpack"]: - - print("No unpacking remotely") - else: - # unpack tarballs remotely - unpacking_script = pkg_resources.resource_filename( - "alea", "scripts/unpack_data.sh") - print(unpacking_script) - cmd = "scp {unpacking_script} {user}@{remote_machine}:~/".format( - unpacking_script=unpacking_script, - user=user, - remote_machine=remote_machine) - if parsed_args["get"]: - os.system(cmd) - else: - print(cmd) - - remote_command = "source $HOME/{unpacking_script} {OSG_path}".format( - unpacking_script=os.path.basename(unpacking_script), OSG_path=OSG_path) - if parsed_args["get"]: - print("Have a little patience... I am unpacking data remotely...") - execute_remote_command( - user=user, - remote_machine=remote_machine, - remote_command=remote_command, - skip_command_check=parsed_args["skip_command_check"]) - else: - print("REMOTE COMMAND:", remote_command) - - # copy output from remote machine - pattern = os.path.join(OSG_path, "*.hdf5") - copy_cmds = [] - copy_cmd = "rsync -a --progress --exclude jobs/ --exclude *tar.gz {user}@{remote_machine}:{OSG_path} {full_path_midway}".format( - user=user, - remote_machine=remote_machine, - full_path_midway=full_path_midway, - OSG_path=OSG_path - ) - # copy_cmd = "scp {user}@{remote_machine}:{pattern} {full_path_midway}".format( - # user=user, - # remote_machine=remote_machine, - # pattern=pattern, - # full_path_midway=full_path_midway) - copy_cmds.append(copy_cmd) - - pattern = os.path.join(OSG_path, "*.yaml") - copy_cmd = "scp {user}@{remote_machine}:{pattern} {full_path_midway}".format( - user=user, - remote_machine=remote_machine, - pattern=pattern, - full_path_midway=full_path_midway) - copy_cmds.append(copy_cmd) - - for cmd in copy_cmds: - print(cmd) - if parsed_args["get"]: - os.system(cmd) - - if parsed_args["get"]: - print() - print("Midway path:") - print(full_path_midway) - yaml_files = glob.glob(os.path.join(full_path_midway, "*.yaml")) - print() - print("------------------") - print("YAML files:") - for yaml_file in yaml_files: - print(os.path.basename(yaml_file)) - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/alea/_scripts/run_toymc.py b/alea/_scripts/run_toymc.py deleted file mode 100755 index 96ca366c..00000000 --- a/alea/_scripts/run_toymc.py +++ /dev/null @@ -1,5 +0,0 @@ -from alea.toymc_running import run_toymcs_from_cl -import sys - -if __name__ == "__main__": - run_toymcs_from_cl(sys.argv[1:]) diff --git a/alea/_scripts/runpy.sbatch b/alea/_scripts/runpy.sbatch deleted file mode 100755 index 2d2fc43e..00000000 --- a/alea/_scripts/runpy.sbatch +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/sh - -#SBATCH --account=pi-lgrandi -#SBATCH --ntasks=1 -#SBATCH --partition=xenon1t -#SBATCH --qos=xenon1t - - -toymc_script=$(python -c 'import pkg_resources; print(pkg_resources.resource_filename("alea","/scripts/run_toymc.py"))') -execution_cmd="python $toymc_script $@" -echo $execution_cmd -echo "-------------------" - -echo "loading singularity" -module load singularity - -echo "executing command" -singularity exec --bind /project2 --bind /dali --bind /home /project2/lgrandi/xenonnt/singularity-images/xenonnt-development.simg $execution_cmd diff --git a/alea/_scripts/submission_script.py b/alea/_scripts/submission_script.py deleted file mode 100644 index a38635d6..00000000 --- a/alea/_scripts/submission_script.py +++ /dev/null @@ -1,5 +0,0 @@ -import sys -import alea.submission - -if __name__ == "__main__": - alea.submission.submit_jobs(sys.argv[1:]) diff --git a/alea/_scripts/unpack_data.sh b/alea/_scripts/unpack_data.sh deleted file mode 100644 index b9093791..00000000 --- a/alea/_scripts/unpack_data.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -OSG_path=$1 - -for file in `ls $OSG_path/*discovery*.tar.gz` -do - tar xvfz $file -C $OSG_path -done - -for file in `ls $OSG_path/*sensi*.tar.gz` -do - tar xvfz $file -C $OSG_path -done diff --git a/alea/_submission.py b/alea/_submission.py deleted file mode 100644 index fdaf6b2a..00000000 --- a/alea/_submission.py +++ /dev/null @@ -1,538 +0,0 @@ -import logging -logging.basicConfig(level=logging.INFO, force=True) -import subprocess -import time -from alea.toymc_running import toymc_to_sbatch_call_array, compute_neyman_thresholds, toymc_to_sbatch_call_array_update, compute_neyman_thresholds_update -from alea import toymc_running -import argparse -import os -import sys -from pydoc import locate -import re -import glob -import copy -import pkg_resources -from tqdm import tqdm -import json -import itertools -import inspect -from alea.utils import read_config -import alea - -def create_alea_tarball(targetname, targetpath): - alea_data_path = pkg_resources.resource_filename( - "alea", "data") - alea_root_path = os.path.join(alea_data_path, "../..") - - if os.path.exists(targetname): - os.remove(targetname) - - # cmd = "tar cvfz {target} {input} --exclude=*.tar.gz --exclude=*/.git/* --exclude=*/sbatch_submission/*".format( - # target=targetname, input=alea_root_path) - cmd = "tar cvfz {target} {input} --exclude=*.tar.gz --exclude=*.sbatch".format( - target=targetname, input=alea_root_path) - os.system(cmd) - - # full_targetpath = os.path.join(targetpath, targetname) - # if os.path.exists(full_targetpath): - # os.remove(full_targetpath) - - # cmd = "mv {targetname} {full_targetpath}".format( - # targetname=targetname, full_targetpath=full_targetpath) - # os.system(cmd) - - -def OSG_template( - joblist, - inputfiles, - singularity_container="/cvmfs/singularity.opensciencegrid.org/xenonnt/base-environment:latest", - request_memory="1000Mb"): - submission_template = """#!/bin/bash -executable = $(job) -universe = vanilla -Error = $(job).err -Output = $(job).out -Log = $(job).log - -# Requirements = (HAS_CVMFS_xenon_opensciencegrid_org) && (OSGVO_OS_STRING == "RHEL 7") && (TARGET.GLIDEIN_ResourceName =!= MY.MachineAttrGLIDEIN_ResourceName1) && (TARGET.GLIDEIN_ResourceName =!= MY.MachineAttrGLIDEIN_ResourceName2) && (TARGET.GLIDEIN_ResourceName =!= MY.MachineAttrGLIDEIN_ResourceName3) && (TARGET.GLIDEIN_ResourceName =!= MY.MachineAttrGLIDEIN_ResourceName4) && (GLIDEIN_SITE =!= "SU-ITS" ) - -Requirements = HAS_SINGULARITY && HAS_CVMFS_singularity_opensciencegrid_org && HAS_CVMFS_xenon_opensciencegrid_org && (OSGVO_OS_STRING == "RHEL 7") && (GLIDEIN_Site =!= "IIT" && GLIDEIN_Site =!= "NotreDame" && GLIDEIN_Site =!= "OSG_US_NMSU_AGGIE_GRID") && (TARGET.GLIDEIN_ResourceName =!= MY.MachineAttrGLIDEIN_ResourceName1) && (TARGET.GLIDEIN_ResourceName =!= MY.MachineAttrGLIDEIN_ResourceName2) && (TARGET.GLIDEIN_ResourceName =!= MY.MachineAttrGLIDEIN_ResourceName3) && (TARGET.GLIDEIN_ResourceName =!= MY.MachineAttrGLIDEIN_ResourceName4) -request_cpus = 1 -request_memory = {request_memory} - -max_retries = 5 -periodic_release = (NumJobStarts < JobMaxRetries) && ((CurrentTime - EnteredCurrentStatus) > (10*60)) -transfer_output_files = $(job).tar.gz -transfer_input_files = {inputfiles} -+WANT_RCC_ciconnect = True -+ProjectName = "xenon1t" -+AccountingGroup = "group_opportunistic.xenon1t.processing" -+SingularityImage = "{singularity_container}" -when_to_transfer_output = ON_EXIT -transfer_executable = True -# x509userproxy = /home/ershockley/user_cert - -queue job from( -{joblist} -) -""" - return submission_template.format( - joblist="\n".join(joblist), - inputfiles=", ".join(inputfiles), - singularity_container=singularity_container, - request_memory=request_memory) - - -def write_OSG_job(jobname, arguments, output_filename, toydata_file, inputfiles, targetfiles, - config_path): - - jobname_with_out_dir = os.path.basename(jobname) - merged_args = "'" + "' '".join(arguments) + "'" - - copy_template = "cp {inputfile} alea/data/{target_path}\n" - # path_from_template refers to to a prepending path in the source of config - # e.g. ER/template_XENONnT_ER_field_20.h5 --> the ER is not matched correctly - # without this - - copy_input_files_to_where_they_belong = "" - for inputfile, target_path in zip(inputfiles, targetfiles): - if inputfile == "alea.tar.gz": - continue - # elif inputfile == os.path.basename(config_path): - # # mkdirs = "mkdir -p {config_dir}\n".format(config_dir=os.path.dirname(config_path)) - # # copy_input_files_to_where_they_belong += mkdirs - # copy_input_files_to_where_they_belong += "cp {inputfile} {config_path}\n".format( - # inputfile=os.path.basename(config_path), - # config_path=config_path) - else: - copy_input_files_to_where_they_belong += copy_template.format( - inputfile=inputfile, target_path=target_path) - - create_paths_in_alea = "" - for target_path in targetfiles: - dirname = os.path.dirname(target_path) - if dirname != "": - create_paths_in_alea += "mkdir -p alea/data/{dirname}\n".format( - dirname=dirname) - - osg_job = """#!/bin/bash - -# set -e is needed so a job "can" fail and submission knows about it -set -e -echo "---Check environment---" -which python -which pip -git --version -echo "hostname:" -hostname -echo "GLIDEIN_Site = $GLIDEIN_Site" -echo "GLIDEIN_ResourceName = $GLIDEIN_ResourceName" - -tar xvfz alea.tar.gz - -# inside the OSG job the input files need to be copied into alea -{create_paths_in_alea} -{copy_input_files_to_where_they_belong} -pip install -r requirements.txt --user -pip install -e . --user - -toymc_script=$(python -c 'import pkg_resources; print(pkg_resources.resource_filename("alea","/scripts/run_toymc.py"))') -python $toymc_script {arguments} - -ls -ltrha -# echo $execution_cmd -# echo "-------------------" - -# $execution_cmd - -if [[ -f {toydata_file} ]] -then - files_to_tar="{output_filename} {toydata_file}" -else - files_to_tar="{output_filename}" -fi - - -# tar the output -echo "tar cvfz {jobname}.tar.gz $files_to_tar" -if ! tar cvfz {jobname}.tar.gz $files_to_tar &> /dev/null; then - rm {jobname}.tar.gz - echo "TARALL CREATION FAILED" -else - echo "TARBALL SUCCESFULLY CREATED" -fi -ls -ltrha -mkdir -p jobs -pwd -cp {jobname}.tar.gz jobs -ls -ltrha jobs/ -""".format(arguments=merged_args, - output_filename=output_filename, - toydata_file=toydata_file, - jobname=jobname_with_out_dir, - copy_input_files_to_where_they_belong= - copy_input_files_to_where_they_belong, - create_paths_in_alea=create_paths_in_alea) - with open(jobname, "w") as f: - f.write(osg_job) - - -def parse_args(args): - parser = argparse.ArgumentParser("Submission script for toyMCs in alea") - group = parser.add_mutually_exclusive_group(required=False) - group.add_argument('--local', - action='store_true', - help="Executes the defined jobs locally") - group.add_argument('--midway', - action='store_true', - help="Prepare submission for Midway") - group.add_argument('--OSG', - action='store_true', - help="Write out files for submission to OSG") - parser.add_argument("--submit", - action='store_true', - help="Submit to OSG/Midway") - parser.add_argument("--debug", - action='store_true', - help="Only 1 job will be prepared") - parser.add_argument("--unpack", - action='store_true', - help="Unpacks the results from OSG submission") - parser.add_argument("--config", - type=str, - required=True, - help="YAML config file specifying alea details") - parser.add_argument("--outputfolder_overwrite", - type=str, - required=False, - default=None, - help="Overwriting the outputfolder, usually defined in config.") - parser.add_argument( - "--computation", - type=str, - required=True, - choices=["discovery_power", "threshold", "sensitivity", "unpack", "unpack_sensi"], - help="Type of computation, defined in YAML config file") - args = parser.parse_args(args) - return args - - -def submit_jobs(argv): - parsed_args = parse_args(argv) - config_data = read_config(parsed_args.config) - outputfolder = config_data["outputfolder"] - if parsed_args.outputfolder_overwrite is not None: - outputfolder = parsed_args.outputfolder_overwrite - if not os.path.exists(outputfolder): - os.makedirs(outputfolder) - - abs_config_path = os.path.abspath(parsed_args.config) - cwd = os.getcwd() - os.chdir(outputfolder) - if ((parsed_args.computation == "discovery_power") - or (parsed_args.computation == "threshold") - or (parsed_args.computation == "sensitivity")): - computation = config_data["computation"][parsed_args.computation] - - parameters_to_vary = computation.get("parameters_to_vary", {}) - parameters_in_common = computation.get("parameters_in_common", {}) - if parsed_args.local or parsed_args.midway: - parameters_in_common["inference_object_args"].update( - {"config_path": abs_config_path}) - elif parsed_args.OSG: - parameters_in_common["inference_object_args"].update( - {"config_path": os.path.basename(parsed_args.config)}) - parameters_to_zip = computation.get("parameters_to_zip", {}) - parameters_as_wildcards = computation.get("parameters_as_wildcards", {}) - - if computation.get("use_conveniencevariation", False): - generate_args_parameter = computation.get("generate_args_parameter") - InferenceObjectClass = locate(parameters_in_common.get("inference_class_name")) - signature = inspect.signature(InferenceObjectClass) - varcon = alea.utils.VariationConvenience(parameters_to_vary=parameters_to_vary, - parameters_to_zip=parameters_to_zip, - parameters_as_wildcards=parameters_as_wildcards, - parameters_in_common=parameters_in_common, - generate_args_parameters=generate_args_parameter, - signature=signature - ) - propagate_guess = computation.get("propagate_guess", False) - varcon.combined_zip_input(propagate_guess=propagate_guess) - parameters_to_vary = {} - parameters_to_zip = varcon.zip_input - parameters_in_common = parameters_in_common - - - if parsed_args.computation == "discovery_power" or parsed_args.computation == "sensitivity": - - if parsed_args.computation == "sensitivity": - limit_threshold = parameters_in_common.get("limit_threshold", None) - if limit_threshold is None: - print("You are running with asymptotics!") - time.sleep(5) - - fnames, calls = toymc_to_sbatch_call_array_update( - parameters_to_vary=parameters_to_vary, - parameters_to_zip=parameters_to_zip, - parameters_in_common=parameters_in_common) - - if parsed_args.debug: - calls = [calls[0]] - fnames = [fnames[0]] - n_mc_index = calls[0].index("--n_mc") + 1 - calls[0][n_mc_index] = "1" - print(len(calls), "calls could be submitted.") - - joblist = [] - first = True - inputfiles, targetfiles = ["alea.tar.gz"], ["alea.tar.gz"] - for number, c in enumerate(tqdm(calls)): - if parsed_args.local: - local_path = pkg_resources.resource_filename( - "alea", "/scripts/run_toymc.py") - local_call = ["python", local_path] - local_call += c[2:] - print(local_call) - subprocess.call(local_call) - break - elif parsed_args.midway: - if parsed_args.debug: - print(c) - continue - elif parsed_args.OSG: - parsed_args_toy = toymc_running.parse_cl_arguments(c[2:]) - # this path is absolute to be able to read it locally below - parsed_args_toy.inference_object_args.update( - {"config_path": abs_config_path}) - logging.debug("parsed_args_toy: " + str(parsed_args_toy)) - - # here we create a dummy InferenceObject to read the templates that we need to copy - if first: - InferenceObject = locate( - parsed_args_toy.inference_class_name) - generate_args_index = calls[0].index("--generate_args") + 1 - generate_args = json.loads(calls[0][generate_args_index]) - threshold_key_index = calls[0].index("--threshold_key") + 1 - threshold_key = calls[0][threshold_key_index] - logging.debug("initializing a dummy InferenceObject with parameters:") - logging.debug("wimp_mass: " + str(parsed_args_toy.wimp_mass)) - logging.debug("i_batch: " + str(parsed_args_toy.i_batch)) - logging.debug("livetime: " + str(parsed_args_toy.livetime)) - logging.debug("limit_threshold: " + str(parsed_args_toy.limit_threshold)) - logging.debug("toydata_file: " + str(parsed_args_toy.toydata_file)) - logging.debug("toydata_mode: " + str(parsed_args_toy.toydata_mode)) - logging.debug("signal_component_name: " + str(parsed_args_toy.signal_component_name)) - logging.debug("generate_args: " + str(parsed_args_toy.generate_args)) - logging.debug("threshold_key: " + str(parsed_args_toy.threshold_key)) - logging.debug("inference_object_args: " + str(parsed_args_toy.inference_object_args)) - statistical_model = InferenceObject( - wimp_mass=parsed_args_toy.wimp_mass, - i_batch=parsed_args_toy.i_batch, - livetime=parsed_args_toy.livetime, - limit_threshold=parsed_args_toy.limit_threshold, - toydata_file=parsed_args_toy.toydata_file, - toydata_mode=parsed_args_toy.toydata_mode, - signal_component_name=parsed_args_toy. - signal_component_name, - generate_args=generate_args, - threshold_key=threshold_key, - **parsed_args_toy.inference_object_args) - - # collect templates to copy as input-files on OSG - # We assume that formatting strings are indicated with { } - templates_to_copy = [] - templates_to_copy.append(statistical_model.config_path) - templates_to_target = [] - templates_to_target.append( - os.path.basename(statistical_model.config_path)) - logging.debug("statistical_model.config_path: " + str(statistical_model.config_path)) - logging.debug("statistical_model.input_files: " + str(statistical_model.input_files)) - for input_file in statistical_model.input_files: - p = re.compile(r"{(.*?)}") - result = p.findall(input_file) - for res in result: - input_file = input_file.replace( - "{" + res + "}", "*") - else: - matches = glob.glob(input_file) - for match in matches: - templates_to_copy.append(match) - # target_path = os.path.relpath( - # match, config_data["OSG_path"]) - templates_to_target.append(".") - - # copy template files to top dir of submission to prepare for input transfer - logging.debug("templates_to_copy: " + str(templates_to_copy)) - # Check whether the basenames of two templates are the same and raise a warning if so - check_basename_sanity(templates_to_copy) - for template in templates_to_copy: - target = os.path.basename(template) - if target.endswith(".yaml"): - cmd = "cp {source} {target}".format( - source=template, target=target) - os.system(cmd) - continue - else: - if os.path.exists(target): - cmd = "rm {filename}".format(filename=target) - os.system(cmd) - cmd = "ln -s {source} {filename}".format( - source=template, - filename=os.path.basename(template)) - os.system(cmd) - - templates_to_copy = [ - os.path.basename(template) - for template in templates_to_copy - ] - - # check if threshols-file is specified - threshold_index = calls[0].index("--limit_threshold") - if calls[0][threshold_index + 1] != "None": - threshold_file = calls[0][threshold_index + 1] - inputfiles.append(threshold_file) - # This assumes that there is no subdirectory specified for the - # threshold file... - targetfiles.append(".") - - inputfiles += templates_to_copy - targetfiles += templates_to_target - logging.debug("inputfiles: " + str(inputfiles)) - logging.debug("targetfiles: " + str(targetfiles)) - first = False - - output_filename = parsed_args_toy.output_filename - toydata_file = parsed_args_toy.toydata_file - if parsed_args.debug: - jobname = "jobs/debug_{computation}_{number}".format( - computation=parsed_args.computation, number=number) - else: - jobname = "jobs/{computation}_{number}".format( - computation=parsed_args.computation, number=number) - jobdir = "jobs" - if parsed_args_toy.toydata_mode == 'read': - inputfiles.append(parsed_args_toy.toydata_file) - targetfiles.append(".") - logging.debug('inputfiles: ' + str(inputfiles)) - logging.debug('targetfiles: ' + str(targetfiles)) - - if not os.path.exists(jobdir): - os.makedirs(jobdir) - this_call = c[2:] - logging.debug("this_call: " + str(this_call)) - write_OSG_job(jobname, - this_call, - output_filename=output_filename, - toydata_file=toydata_file, - inputfiles=inputfiles, - targetfiles=targetfiles, - config_path=parsed_args.config) - joblist.append(jobname) - if parsed_args.debug: - break - - if parsed_args.OSG: - OSG_parameters = config_data.get("OSG_parameters", {}) - singularity_container = OSG_parameters.get( - "singularity_container", - "/cvmfs/singularity.opensciencegrid.org/xenonnt/base-environment:latest" - ) - print(f"Using singularity_container {singularity_container}") - request_memory = OSG_parameters.get("request_memory", "1000Mb") - print(f"Using request_memory {request_memory}") - submission_script = OSG_template( - joblist=joblist, - inputfiles=inputfiles, - singularity_container=singularity_container, - request_memory=request_memory) - with open("submit.sub", "w") as f: - f.write(submission_script) - - if parsed_args.submit: - if parsed_args.OSG: - print("CREATNG alea-tarball") - # The tar ball will exclude all *.tar.gz files - create_alea_tarball(targetname=inputfiles[0], - targetpath=os.getcwd()) - cmd = "condor_submit submit.sub" - os.system(cmd) - elif parsed_args.midway: - for c in calls: - subprocess.call(c) - time.sleep(0.2) - elif parsed_args.local: - print("Cannot submit local submission!") - raise SystemExit - os.chdir(cwd) - elif parsed_args.computation == "threshold": - print( - "This is done after computation of discovery_power and is executed locally" - ) - - if parsed_args.unpack: - filelist = glob.glob("*.tar.gz") - for file in filelist: - if "alea.tar.gz" in file: - continue - cmd = "tar xvfz {file}".format(file=file) - os.system(cmd) - - threshold_file = computation.get("limit_threshold", "thresholds.hdf5") - output_filename = parameters_in_common.get("output_filename") - file_name_pattern = output_filename.split( - ".hdf5")[0] + "_{n_batch:d}.hdf5" - print("Writing threshold_file:", threshold_file) - compute_neyman_thresholds_update( - file_name_pattern, - threshold_file, - parameters_in_common=parameters_in_common, - parameters_to_vary=parameters_to_vary, - parameters_as_wildcards=parameters_as_wildcards, - parameters_to_zip=parameters_to_zip - ) - elif parsed_args.computation == "unpack": - print( - "This is done after computation of discovery_power and is executed locally" - ) - filelist = glob.glob("*.tar.gz") - for file in filelist: - if "alea.tar.gz" in file: - continue - cmd = "tar xvfz {file}".format(file=file) - os.system(cmd) - elif parsed_args.computation == "unpack_sensi": - print( - "This is done after computation of sensitivity and is executed locally" - ) - filelist = glob.glob("*sensi*.tar.gz") - for file in filelist: - if "alea.tar.gz" in file: - continue - cmd = "tar xvfz {file}".format(file=file) - os.system(cmd) - - -def check_basename_sanity(templates_to_copy): - basenames = [os.path.basename(template) for template in templates_to_copy] - # Create a dictionary to store the basenames and corresponding directories - basename_dirs = {} - - # Iterate over the templates and check for duplicate basenames - for template, basename in zip(templates_to_copy, basenames): - if basename not in basename_dirs: - basename_dirs[basename] = [template] - else: - basename_dirs[basename].append(template) - - # Iterate over the dictionary and print the directories with duplicate basenames - duplicates = False - for basename, dirs in basename_dirs.items(): - if (len(dirs) > 1) & (len(set(dirs)) > 1): - print("Duplicate basename:", basename) - print("Directories:") - for directory in dirs: - print(directory) - print() - duplicates = True - if duplicates: - raise ValueError("Duplicate basenames found!") diff --git a/alea/_toymc_running.py b/alea/_toymc_running.py deleted file mode 100644 index 436b3d2d..00000000 --- a/alea/_toymc_running.py +++ /dev/null @@ -1,1276 +0,0 @@ -from tqdm import tqdm -from subprocess import call -from time import gmtime, strftime, sleep -import argparse -from json import dumps, loads -from string import Formatter -import numpy as np -from copy import deepcopy -from datetime import datetime -from os.path import splitext -import pkg_resources -import warnings -import inspect -from itertools import product -import re -from glob import glob -import h5py -from pydoc import locate # to lookup inferenceObject class -import os -from inference_interface import numpy_to_toyfile -import alea -import copy -import mergedeep -import logging -logging.basicConfig(level=logging.INFO) - - -def sbatch_template(queue, logfile, execution_time, ram_roof, - singularity_container): - template_script = """#!/bin/sh - -#SBATCH --account=pi-lgrandi -#SBATCH --ntasks=1 -#SBATCH --partition={queue} -#SBATCH --qos={queue} -#SBATCH --output={logfile} -#SBATCH --error={logfile} -#SBATCH --time={execution_time} -#SBATCH --mem-per-cpu={ram_roof} - -# this bash-script receives the same inputs as the run_toymc.py script - - -toymc_script=$(python -c 'import pkg_resources; print(pkg_resources.resource_filename("alea","/scripts/run_toymc.py"))') -execution_cmd="python $toymc_script $@" -echo $execution_cmd -echo "-------------------" - -echo "loading singularity" -module load singularity - -echo "executing command" -singularity exec --bind /project2 --bind /dali --bind /home {singularity_container} $execution_cmd -""" - return template_script.format(queue=queue, - execution_time=execution_time, - logfile=logfile, - ram_roof=ram_roof, - singularity_container=singularity_container) - - -def create_sbatch_file( - filename="runpy.sbatch", - queue="xenon1t", - logfile="logfile.log", - execution_time=60, - ram_roof=2000, - singularity_container="/project2/lgrandi/xenonnt/singularity-images/xenonnt-development.simg" -): - # getting bash-script template and set computing stuff - bash_text = sbatch_template(queue=queue, - logfile=logfile, - execution_time=execution_time, - ram_roof=ram_roof, - singularity_container=singularity_container) - - # write bash-script to file - with open(filename, "w") as f: - f.write(bash_text) - - # if os.path.exists(filename): - # print("batch-file", filename, "created.") - # else: - # print("batch-file", filename, "NOT created.") - - -def add_imc(fpat): - if "i_batch" not in [tup[1] for tup in Formatter().parse(fpat)]: - fpat_split = splitext(fpat) - return fpat_split[0] + "_{i_batch:d}" + fpat_split[1] - else: - return fpat - - -def run_toymcs( - n_mc=10, - i_batch=0, #indexes batch-dependent values - wimp_mass=50, - signal_expectation=None, # no of evts. - signal_component_name="signal", #change this is you're doing inference on the rate of a source not named signal - generate_args={}, - livetime=1., #ty - extra_args=[{}], - extra_args_all={}, - guess={}, - compute_confidence_interval=False, - limit_threshold=None, - threshold_key=None, - inference_object_args={}, - refit_if_first_not_best=False, - propagate_guess=True, - return_expectations=False, - inference_class_name="alea.likelihoods.ll_nt.InferenceObject", - toydata_file=None, - toydata_mode="none", -): - """ - run toyMCs and return best-fit likelihoods, - best-fit parameters, confidence intervals - - input args: - ----------- - n_mc: int, number of toyMCs, defaults to 10 - i_batch: indexes batch-dependent values, defaults to 0 - wimp_mass: mass of signal in GeV, defaults to 50 - signal_expectation: number of events expected, defaults to None - if not None -- will set the signal_rate_multiplier in the - toyMC to have this signal expectation. - signal_component_name: defaults to 'signal' - change this if you're doing inference on the rate of a - source not named signal - generate_args: kwargs passed to simulate. Possible kwargs are the - rate, signal and shape parameters, defaults to {} - livetime: exposure in t*y, defaults to 1. - extra_args: array of dicts, defaults to [{}] - the function will fit each toyMC with each extra_arg - (so e.g. fit for signal=0, signal="free", signal="true") - extra_args_all: dict of full extra_args passed to each fit, - defaults to {} - (overridden by the per-fit extra_args) - guess: dict passed to fitter as guess, defaults to {} - compute_confidence_interval: defaults to False - if True, the function will compute confidence intervals on - the signal_rate_multiplier, for the _first_ set of extra_args. - limit_threshold: defaults to None TODO - inference_object_args: Passed to the initialisation of the - InferenceObject. Defaults to {} - refit_if_first_not_best: defaults to False - if True, check if the first extra_args - (assumed to be the freest extra args) is the best llhood - and refit if not true (tolerance 1e-2) - [not yet implemented] - propagate_guess: If True, the previous fit results (except the values - specified in guess) are is used as a guess, defaults to True - return_expectations: if True, tuple of results and expectation vlues - are returned, defaults to False - inference_class_name: name of inference class, string - defaults to "alea.likelihoods.ll_nt.InferenceObject" - toydata_file: path to file containing toydata, - defaults to None - toydata_mode: "none", "write" or "read". - - if "write": simulate interpolated ll and write toydata to 'toydata_file' - - if "read": read toydata from 'toydata_file' - - if "none": simulate interpolated ll - Defaults to "none" - inference_object_args: dict of arguments to be passed to the - initialisation of the inferenceobject, defaults to {} - - neyman_threshold: name of file where the neyman threshold for - the limit computation is stored <- NOT IMPLEMENTED - - outputs: - -------- - array (length of extra_args array) of structured - numpy arrays for each - """ - - #parse limit_threshod: - - if refit_if_first_not_best: - raise NotImplementedError() - - #programatically import statistical model: - InferenceObject = locate(inference_class_name) - print("loaded inference class", inference_class_name, - type(InferenceObject)) - logging.debug("Building statistical model with the following parameters:") - logging.debug(f"wimp_mass {wimp_mass:.2f}") - logging.debug(f"i_batch {i_batch}") - logging.debug("livetime " + str(livetime)) - logging.debug("limit_threshold " + str(limit_threshold)) - logging.debug("toydata_file " + str(toydata_file)) - logging.debug("toydata_mode " + str(toydata_mode)) - logging.debug("signal_component_name " + str(signal_component_name)) - logging.debug("generate_args " + str(generate_args)) - logging.debug("threshold_key " + str(threshold_key)) - logging.debug("inference_object_args " + str(inference_object_args)) - - statistical_model = InferenceObject( - wimp_mass=wimp_mass, - i_batch=i_batch, - livetime=livetime, - limit_threshold=limit_threshold, - toydata_file=toydata_file, - toydata_mode=toydata_mode, - signal_component_name=signal_component_name, - generate_args=generate_args, - threshold_key=threshold_key, - **inference_object_args) - logging.debug(f"run_toymcs with expectation values per likelihood {statistical_model.get_mus()}") - if hasattr(statistical_model, "pertoy_fix_file_generate_args"): - logging.debug("using a per-toy fixed file, setting generate args from here") - generate_args = statistical_model.pertoy_fix_file_generate_args - else: - generate_args = statistical_model.generate_args - - parameter_list = sorted(statistical_model.get_parameter_list()) - result_dtype = [(n, float) for n in parameter_list] - - try: - additional_result_dtype = [ - (n, float) for n in statistical_model.additional_parameters - ] - result_dtype += additional_result_dtype - parameter_list += statistical_model.additional_parameters - except: - pass - - nominal_expectations = statistical_model.get_mus() - nominal_signal_expectation = nominal_expectations.get( - signal_component_name, 0.) - - logging.debug("Signal expectation: " + str(signal_expectation)) - logging.debug("Nominal expectations: " + str(nominal_expectations)) - logging.debug("Nominal signal expectation: " + str(nominal_signal_expectation)) - logging.debug("generate_args: " + str(generate_args)) - if signal_expectation is not None: - generate_args[ - signal_component_name + - "_rate_multiplier"] = signal_expectation / nominal_signal_expectation - logging.debug("readjusting generate_args according to signal_expectation: " + str(generate_args)) - - extra_args_array = [] - if extra_args[0] == "iterate": - parameter_name = extra_args[1] - parameter_values = extra_args[2] - if parameter_name.startswith("mu_"): - parameter_name = parameter_name[3::] - parameter_mu = nominal_expectations.get(parameter_name, 1.) - parameter_values = np.array(parameter_values) / parameter_mu - parameter_name = parameter_name + "_rate_multiplier" - extra_args = [{parameter_name: pv} for pv in parameter_values] - logging.debug(len(extra_args), extra_args) - for extra_args_fit in extra_args: - if extra_args_fit == "null": - extra_args_fit = {signal_component_name + "_rate_multiplier": 0.} - elif extra_args_fit == "true": - extra_args_fit = { - signal_component_name + "_rate_multiplier": - generate_args.get(signal_component_name + "_rate_multiplier", - 0.) - } - elif extra_args_fit == "free": - extra_args_fit = {} - - for k, i in extra_args_fit.items(): - if i == "true": - extra_args_fit[k] = generate_args.get(k, ) - - extra_args_to_arr = deepcopy(extra_args_all) - extra_args_to_arr.update(extra_args_fit) - extra_args_array.append(extra_args_to_arr) - logging.debug("extra_args_array: " + str(extra_args_array)) - logging.debug("len(extra_args_array): " + str(len(extra_args_array))) - - results = [np.zeros(n_mc, dtype=result_dtype) for _ in extra_args_array] - logging.debug("length of results list is " + str(len(results))) - - for i in tqdm(range(n_mc)): - fit_results = statistical_model.toy_simulation( - generate_args=generate_args, - extra_args=extra_args_array, - guess=guess, - compute_confidence_interval=compute_confidence_interval, - propagate_guess=propagate_guess) - for fit_result, result_array in zip(fit_results, results): - #fit_result_array = np.array([fit_result[pn] for pn in parameter_list]) - fit_result_array = tuple(fit_result[pn] for pn in parameter_list) - result_array[i] = fit_result_array - if toydata_mode == "write": - statistical_model.write_toydata() - if inference_class_name == "alea.likelihoods.ll_GOF.InferenceObject": - statistical_model.write_reference() - - if return_expectations: - return results, statistical_model.get_mus() - else: - return results - - -def submit_batch_toymcs(): - raise NotImplementedError() - - -def toymc_to_sbatch_call( - output_filename="test_toymc.hdf5", - n_mc=10, - i_batch=0, - execution_time=60, - ram_roof=2000, #Mb, default - queue="xenon1t", - singularity_container="/project2/lgrandi/xenonnt/singularity-images/xenonnt-development.simg", - wimp_mass=50, - signal_component_name="signal", - signal_expectation=0., # no of evts. - generate_args={}, - livetime=1., #ty - extra_args=[{}], - result_names=None, - extra_args_all={}, - guess={}, - compute_confidence_interval=False, - limit_threshold=None, - threshold_key=None, - inference_object_args={}, - refit_if_first_not_best=False, - metadata={"version": "0.0"}, - inference_class_name="alea.likelihoods.ll_nt.InferenceObject", - toydata_file="none", - toydata_mode="none", - **kwargs, -): - """Write sbatch call for toyMC specified by the keyword arguments. - Args: - output_filename (str, optional): Output filename. - Defaults to "test_toymc.hdf5". - n_mc (int, optional): Number of toyMCs. Defaults to 10. - i_batch (int, optional): indexes batch-dependent values. - Defaults to 0. - execution_time (int, optional): Limit of total run time of the job - allocation in minutes. Defaults to 60. - ram_roof (int, optional): Minimum memory required per allocated CPU - in MB. Defaults to 2000. - wimp_mass (int, optional): mass of signal in GeV. Defaults to 50. - signal_component_name (str, optional): Name of the source. - Defaults to "signal". - signal_expectation (float, optional): number of events expected. - Defaults to 0.. - livetime (float, optional): exposure in t*y. Defaults to 1.. - result_names (str, optional): TODO. Defaults to None. - extra_args_all (dict, optional): dict of full extra_args passed - to each fit. Defaults to {}. - guess (dict, optional): dict passed to fitter as guess. Defaults to {}. - compute_confidence_interval (bool, optional): if True, the function - will compute confidence intervals on the signal_rate_multiplier, - for the _first_ set of extra_args. Defaults to False. - limit_threshold (float, optional): TODO. Defaults to None. - inference_object_args (dict, optional): Dict of arguments to be - passed to the initialisation of the inferenceobject. - Defaults to {}. - refit_if_first_not_best (bool, optional): If True, check if the - first extra_args (assumed to be the freest extra args) is the - best llhood and refit if not true (tolerance 1e-2). - Defaults to False. - metadata (dict, optional): TODO. Defaults to {"version":"0.0"}. - inference_class_name (str, optional): name of inference class. - Defaults to "alea.likelihoods.ll_nt.InferenceObject". - toydata_file (str, optional): Path to file containing toydata. - Defaults to "none". - toydata_mode: "none", "write" or "read". - - if "write": simulate interpolated ll and write toydata to 'toydata_file' - - if "read": read toydata from 'toydata_file' - - if "none": simulate interpolated ll - Defaults to "none" - Returns: - -------- - output_filename: output filename - call_array: sbatch call that can be submitted - """ - - log_name = splitext(output_filename)[0] + ".log" - - # ensure directory for output/logfile - dirname = os.path.dirname(log_name) - if not os.path.exists(dirname) and dirname != "": - os.makedirs(dirname) - - # getting full path for sbatch script - # this script we will write from the template-function sbatch_template() - - sbatch_name_and_path = pkg_resources.resource_filename( - "alea", "/sbatch_submission/{filename}".format( - filename=log_name.replace(".log", ".sbatch"))) - - if not os.path.exists(os.path.dirname(sbatch_name_and_path)): - os.makedirs(os.path.dirname(sbatch_name_and_path)) - - create_sbatch_file(filename=sbatch_name_and_path, - queue=queue, - logfile=log_name, - execution_time=execution_time, - ram_roof=ram_roof, - singularity_container=singularity_container) - - call_array = ['sbatch', sbatch_name_and_path] - - call_array.append("--output_filename") - call_array.append(output_filename) - call_array.append("--n_mc") - call_array.append("{:d}".format(n_mc)) - call_array.append("--i_batch") - call_array.append("{:d}".format(i_batch)) - call_array.append("--wimp_mass") - call_array.append("{:d}".format(wimp_mass)) - call_array.append("--signal_expectation") - if signal_expectation is None: - call_array.append("None") - else: - call_array.append("{:.2f}".format(signal_expectation)) - call_array.append("--signal_component_name") - call_array.append("{:s}".format(signal_component_name)) - call_array.append("--result_names") - if result_names is None: - call_array.append("None") - else: - call_array.append(dumps(result_names).replace(" ", "")) - call_array.append("--generate_args") - call_array.append(dumps(generate_args).replace(" ", "")) - call_array.append("--livetime") - if livetime is None: - call_array.append("None") - else: - call_array.append(dumps(livetime).replace(" ", "")) - call_array.append("--extra_args") - call_array.append(dumps(extra_args).replace(" ", "")) - call_array.append("--extra_args_all") - call_array.append(dumps(extra_args_all).replace(" ", "")) - call_array.append("--guess") - call_array.append(dumps(guess).replace(" ", "")) - if compute_confidence_interval: - call_array.append("--compute_confidence_interval") - call_array.append("1") - call_array.append("--limit_threshold") - if limit_threshold is None: - call_array.append("None") - else: - call_array.append(limit_threshold) - call_array.append("--inference_object_args") - call_array.append(dumps(inference_object_args).replace(" ", "")) - if refit_if_first_not_best: - call_array.append("--refit_if_first_not_best") - call_array.append("1") - call_array.append("--metadata") - call_array.append(dumps(metadata).replace(" ", "")) - call_array.append("--inference_class_name") - call_array.append(inference_class_name) - call_array.append("--toydata_file") - call_array.append(toydata_file) - call_array.append("--toydata_mode") - call_array.append(toydata_mode) - call_array.append("--threshold_key") - if threshold_key is None: - call_array.append("None") - else: - call_array.append(threshold_key) - - return output_filename, call_array - - -def toymc_to_sbatch_call_array_update( - parameters_to_vary={}, - parameters_to_zip={}, - parameters_in_common={}, - wildcards_for_threshold=["signal_rate_multiplier", "signal_expectation", "n_mc", "n_batch"]): - - output_filename = parameters_in_common.get("output_filename") - file_name_pattern_threshold = get_filename_pattern_threshold( - output_filename=output_filename, wildcards_for_threshold=wildcards_for_threshold) - - merged_combinations = alea.utils.compute_variations( - parameters_in_common=parameters_in_common, - parameters_to_vary=parameters_to_vary, - parameters_to_zip=parameters_to_zip) - - #find run toyMC default args: - callargs, _, _, calldefaults = inspect.getargspec(toymc_to_sbatch_call) - # ignore the deprecation warning for now, update can be done below - # signature = inspect.signature(toymc_to_sbatch_call) - default_args = dict(zip(callargs, calldefaults)) - default_args["n_batch"] = 1 - default_args["output_filename"] = default_args.get("output_filename", - "test_toymc.hdf5") - default_args["n_mc"] = default_args.get("n_mc", 10) - - fnames, call_arrays, function_args_to_return = [], [], [] - for combination in tqdm(merged_combinations): - function_args = deepcopy(default_args) - mergedeep.merge(function_args, combination) # update defaults with combination - function_args = alea.utils.flatten_function_args(combination=combination, function_args=function_args) - - function_args["n_mc"] = int(function_args["n_mc"] / - function_args["n_batch"]) - - filename_pattern = function_args["output_filename"] - filename_pattern = add_imc( - filename_pattern) #so that we'll index by batch - n_batch = function_args["n_batch"] - function_args.pop("n_batch", None) - toydata_file_pattern = function_args.get("toydata_file", "none") - toydata_file_pattern = add_imc( - toydata_file_pattern) #so that we'll index by batch - - # do the same for filenames passed in inference_object_args: - toy_reference_file_pattern = None - hist_reference_file_pattern = None - hist_data_file_pattern = None - filenames = { - 'toy_reference_file': toy_reference_file_pattern, - 'hist_reference_file': hist_reference_file_pattern, - 'hist_data_file': hist_data_file_pattern - } - - for filename_key in filenames.keys(): - for key in function_args["inference_object_args"].keys(): - if key == filename_key: - filenames[filename_key] = ( - function_args["inference_object_args"][key]) - filenames[filename_key] = add_imc(filenames[filename_key]) - - #filename = filename.format(**function_args) - #toydata_file = toydata_file.format(**function_args) - for i_batch in range(n_batch): - function_args["i_batch"] = i_batch - - function_args["toydata_file"] = toydata_file_pattern.format( - **function_args) - function_args["output_filename"] = filename_pattern.format( - **function_args) - for filename_key, file_pattern in filenames.items(): - if file_pattern is not None: - file = file_pattern.format(**function_args) - function_args["inference_object_args"][filename_key] = file - threshold_key, _, _ = generate_threshold_key( - file_name_pattern=file_name_pattern_threshold, - function_args=function_args) - function_args["threshold_key"] = threshold_key - fname, call_array = toymc_to_sbatch_call(**function_args) - fnames.append(fname) - call_arrays.append(call_array) - return fnames, call_arrays - - -def toymc_to_sbatch_call_array(parameters_to_vary={}, - parameters_to_zip={}, - parameters_in_common={}): - """ - Wrapper for `toymc_to_sbatch_call`. - Function that runs `toymc_to_sbatch_call` for the product of all - iterables in parameters_to_vary, keeping parameters_in_common fixed. - parameters_to_zip are passed to zip-- so varied together. - Possible keys of the dicts are the keyword arguments of - `toymc_to_sbatch_call`. - - input args: - ----------- - parameters_to_vary: dict of parameters that are varied. An sbatch call - is generated for all possible combinations pf parameters_to_vary, - defaults to {} - parameters_to_zip: dict of parameters that are varied together, - defaults to {} - parameters_in_common: dict of parameters that are fixed, defaults to {} - - outputs: - -------- - fnames: array of output filenemes - call_arrays: array of sbatch calls that can be submitted - """ - - input_parameters_to_vary = copy.deepcopy(parameters_to_vary) - - #find run toyMC default args: - callargs, _, _, calldefaults = inspect.getargspec(toymc_to_sbatch_call) - default_args = dict(zip(callargs, calldefaults)) - default_args["n_batch"] = 1 - default_args.update(parameters_in_common) - default_args["output_filename"] = default_args.get("output_filename", - "test_toymc.hdf5") - default_args["n_mc"] = default_args.get("n_mc", 10) - call_arrays = [] - fnames = [] - - parameters_to_vary_names = sorted(parameters_to_vary.keys()) - for k in parameters_to_vary_names: - if isinstance(parameters_to_vary[k], dict): - # allows variations inside of dicts - parameters_to_vary[k] = [ - item for item in alea.utils.dict_product( - parameters_to_vary[k]) - ] - else: - parameters_to_vary[k] = parameters_to_vary[k] - - parameters_to_vary_values = [ - parameters_to_vary[k] for k in parameters_to_vary_names - ] - parameters_to_zip_names = sorted(parameters_to_zip.keys()) - parameters_to_zip_values = [ - parameters_to_zip[k] for k in parameters_to_zip_names - ] - - # loop through all possible combinations of parameters_to_vary_values - # - if len(parameters_to_vary_names) == 0: - iter_product = [0] - else: - iter_product = product(*parameters_to_vary_values) - for parameters_to_vary_value in iter_product: - if len(parameters_to_zip_names) == 0: - iter_zip = [0] - else: - iter_zip = zip(*parameters_to_zip_values) - - for parameters_to_zip_value in iter_zip: - - function_args = deepcopy(default_args) - # overwrite default arguments in function_args with - # parameters_to_vary and parameters_to_zip: - if 0 < len(parameters_to_vary_names): - parameters_to_vary_dict = { - pn: pv - for pn, pv in zip(parameters_to_vary_names, - parameters_to_vary_value) - } - - #function to allow to set dict arguments from multiple places: - for pn in set(function_args.keys()) & set( - parameters_to_vary_dict.keys()): - if (type(function_args[pn]) == dict) and (type( - parameters_to_vary_dict[pn]) == dict): - parameters_to_vary_dict[pn] = dict( - function_args[pn], **parameters_to_vary_dict[pn]) - - # add all keys of dicts as function_args - # needed to allow using parameters from e.g. - # generate_args in the output_filename - for key, value in parameters_to_vary_dict[pn].items(): - function_args[key] = value - - function_args.update(parameters_to_vary_dict) - - if 0 < len(parameters_to_zip_names): - parameters_to_zip_dict = { - pn: pv - for pn, pv in zip(parameters_to_zip_names, - parameters_to_zip_value) - } - for pn in set(function_args.keys()) & set( - parameters_to_zip_dict.keys()): - if (type(function_args[pn]) == dict) and (type( - parameters_to_zip_dict[pn]) == dict): - parameters_to_zip_dict[pn] = dict( - function_args[pn], **parameters_to_zip_dict[pn]) - - function_args.update(parameters_to_zip_dict) - # for key in parameters_to_zip_dict.keys(): - # if key not in function_args.keys(): - # function_args["generate_args"][key] = parameters_to_zip_dict[key] - # function_args["generate_args"].update(parameters_to_zip_dict) - - #for pn in set(function_args.keys(), parameters_to_vary_names): - # if type(function_args)Opv - - #if 0 < len(parameters_to_vary_names): - # function_args.update(parameters_to_vary_dict) - #if 0 < len(parameters_to_zip_names): - # function_args.update({pn: pv for pn, pv in zip(parameters_to_zip_names, parameters_to_zip_value)}) - - function_args["n_mc"] = int(function_args["n_mc"] / - function_args["n_batch"]) - filename_pattern = function_args["output_filename"] - filename_pattern = add_imc( - filename_pattern) #so that we'll index by batch - n_batch = function_args["n_batch"] - function_args.pop("n_batch", None) - toydata_file_pattern = function_args.get("toydata_file", "none") - toydata_file_pattern = add_imc( - toydata_file_pattern) #so that we'll index by batch - # do the same for filenames passed in inference_object_args: - toy_reference_file_pattern = None - hist_reference_file_pattern = None - hist_data_file_pattern = None - filenames = { - 'toy_reference_file': toy_reference_file_pattern, - 'hist_reference_file': hist_reference_file_pattern, - 'hist_data_file': hist_data_file_pattern - } - - for filename_key in filenames.keys(): - for key in function_args["inference_object_args"].keys(): - if key == filename_key: - filenames[filename_key] = ( - function_args["inference_object_args"][key]) - filenames[filename_key] = add_imc( - filenames[filename_key]) - - #filename = filename.format(**function_args) - #toydata_file = toydata_file.format(**function_args) - for i_batch in range(n_batch): - function_args["i_batch"] = i_batch - - function_args["toydata_file"] = toydata_file_pattern.format( - **function_args) - function_args["output_filename"] = filename_pattern.format( - **function_args) - for filename_key, file_pattern in filenames.items(): - if file_pattern is not None: - file = file_pattern.format(**function_args) - function_args["inference_object_args"][ - filename_key] = file - - fname, call_array = toymc_to_sbatch_call(**function_args) - fnames.append(fname) - call_arrays.append(call_array) - return fnames, call_arrays - - -def parse_cl_arguments(arguments): - parser = argparse.ArgumentParser( - description="command line running of run_toymcs") - - parser.add_argument('--output_filename', - type=str, - required=True, - help="path where the output is stored") - parser.add_argument('--result_names', default="None") - parser.add_argument('--n_mc', - type=int, - default=10, - help="number of MC simulations") - parser.add_argument('--i_batch', - type=int, - default=0, - help="number of batches") - parser.add_argument('--wimp_mass', - type=int, - default=50, - help="WIMP mass in GeV") - parser.add_argument("--signal_expectation", type=str, default="None") - parser.add_argument("--signal_component_name", type=str, default="signal") - parser.add_argument('--generate_args', - type=str, - default="{}", - help="arguments for the toy generation") - parser.add_argument( - '--livetime', - type=str, - default='None', # uses definiton from the likelihood - help= - "livetime in tonne x year, if multiple livetimes are given (as a list) then the livetime per sub-likelihood will be set accoringly" - ) #ty - parser.add_argument('--extra_args', - type=str, - default='["null","free"]', - help="fitting arguments") - parser.add_argument('--extra_args_all', - type=str, - default='{}', - help="fitting arguments shared") - parser.add_argument('--guess', - type=str, - default='{}', - help='initial guess') - parser.add_argument( - '--compute_confidence_interval', type=bool, - default=False) #WARNING: --compute_ul False will s--compute_ul to True - parser.add_argument('--limit_threshold', type=str, default='None') - parser.add_argument('--inference_object_args', type=str, default='{}') - parser.add_argument( - '--refit_if_first_not_best', type=bool, - default=False) #WARNING: --compute_ul False will s--compute_ul to True - parser.add_argument('--metadata', type=str, default='{"version":"0.0"}') - - parser.add_argument('--inference_class_name', type=str, default="none") - parser.add_argument('--toydata_file', type=str, default="none") - parser.add_argument('--toydata_mode', type=str, default="none") - parser.add_argument('--threshold_key', type=str, default="none") - - args = parser.parse_args(arguments) - - args.output_filename = args.output_filename - if args.result_names == "None": - args.result_names = "None" - else: - args.result_names = loads(args.result_names) - args.n_mc = args.n_mc - args.i_batch = args.i_batch - args.wimp_mass = args.wimp_mass - args.signal_expectation = args.signal_expectation - args.signal_expectation = None if args.signal_expectation == "None" else float( - args.signal_expectation) - args.signal_component_name = args.signal_component_name - args.generate_args = loads(args.generate_args) - if args.livetime == 'None': - args.livetime = None - else: - args.livetime = loads(args.livetime) - args.extra_args = loads(args.extra_args) - args.extra_args_all = loads(args.extra_args_all) - args.guess = loads(args.guess) - args.inference_object_args = loads(args.inference_object_args) - args.compute_confidence_interval = args.compute_confidence_interval - args.limit_threshold = args.limit_threshold - args.limit_threshold = None if args.limit_threshold == "None" else args.limit_threshold - args.refit_if_first_not_best = args.refit_if_first_not_best - args.metadata = loads(args.metadata) - args.inference_class_name = args.inference_class_name - args.toydata_file = args.toydata_file - args.toydata_mode = args.toydata_mode - - return args - - -def run_toymcs_from_cl(arguments): - """Run `run_toymcs` with inputs provided in the command line and wirte the - results in an output file. - """ - args = parse_cl_arguments(arguments=arguments) - result_names = args.result_names - logging.debug("args are") - logging.debug(args) - - results, nominal_expectations = run_toymcs( - n_mc=args.n_mc, - i_batch=args.i_batch, - wimp_mass=args.wimp_mass, - signal_expectation=args.signal_expectation, # no of evts. - signal_component_name=args.signal_component_name, - generate_args=args.generate_args, - livetime=args.livetime, #ty - extra_args=args.extra_args, - extra_args_all=args.extra_args_all, - guess=args.guess, - compute_confidence_interval=args.compute_confidence_interval, - limit_threshold=args.limit_threshold, - inference_object_args=args.inference_object_args, - refit_if_first_not_best=args.refit_if_first_not_best, - return_expectations=True, - inference_class_name=args.inference_class_name, - toydata_mode=args.toydata_mode, - toydata_file=args.toydata_file, - threshold_key=args.threshold_key) - - if (args.extra_args[0] == "iterate") and (result_names == "None"): - logging.debug("extraargs is", args.extra_args) - result_names = ["{:.3f}".format(float(v)) for v in args.extra_args[2]] - logging.debug("result_names", result_names) - if result_names == "None": - result_names = ["{:d}".format(i) for i in range(len(args.extra_args))] - for i, ea in enumerate( - args.extra_args - ): #if using named extra args (free, null, true), use that name - if ea in ["null", "free", "true"]: - result_names[i] = ea - args.metadata["date"] = datetime.now().strftime('%Y%m%d_%H:%M:%S') - args.metadata["generate_args"] = args.generate_args - args.metadata["signal_expectation"] = args.signal_expectation - args.metadata["signal_component_name"] = args.signal_component_name - args.metadata["nominal_expectations"] = nominal_expectations - if args.extra_args[0] == "iterate": - eas = [{args.extra_args[1]: v} for v in args.extra_args[2]] - array_metadatas = [{"extra_args": ea} for ea in eas] - else: - array_metadatas = [{"extra_args": ea} for ea in args.extra_args] - logging.debug(f"End of run_toymcs_from_cl: len(result_names),len(results) = ({len(result_names)}, {len(results)})") - numpy_arrays_and_names = [(r, rn) for rn, r in zip(result_names, results)] - logging.debug("Results stored to file:") - logging.debug("signal_expectation: " + str(args.signal_expectation)) - logging.debug("nominal expectations: " + str(nominal_expectations)) - logging.debug("generate_args: " + str(args.generate_args)) - logging.debug("numpy_arrays_and_names: " + str(numpy_arrays_and_names)) - - for r, rn in zip(results, result_names): - logging.debug(str(rn) + str(r)) - # logging.debug(numpy_arrays_and_names) - # logging.debug(len(numpy_arrays_and_names)) - logging.debug("Metadata: " + str(array_metadatas)) - - print(f'Saving {args.output_filename}') - numpy_to_toyfile(args.output_filename, - numpy_arrays_and_names=numpy_arrays_and_names, - metadata=args.metadata, - array_metadatas=array_metadatas) - - -def compute_neyman_thresholds( - file_name_pattern, - threshold_name="thresholds.hdf5", - parameters_to_vary={}, - parameters_to_zip={}, - parameters_in_common={}, - parameters_as_wildcards=["signal_rate_multiplier", "signal_expectation", "n_mc", "n_batch"], - signal_component_name="signal", - confidence_levels=[0.8, 0.9, 0.95], - free_name="free", - null_name="true", - one_sided=False, - one_sided_minimum_mu=1., - return_to_dict=False, - metadata={ - "version": "0.0", - "date": datetime.now().strftime('%Y%m%d_%H:%M:%S') - }): - """ - function to run over any number of toyMC results, computing the llr between free_name and null_name and - selecting all with same parameters ordering by signal expectation - Result is stored, labeled by the parameters in "parameters_to_vary" - if one_sided: llrs with sbest>> list(dict_product(dict(number=[1,2], character='ab'))) - [{'character': 'a', 'number': 1}, - {'character': 'a', 'number': 2}, - {'character': 'b', 'number': 1}, - {'character': 'b', 'number': 2}] - """ - return (dict(zip(dicts, x)) for x in itertools.product(*dicts.values())) - - -def compute_variations( - parameters_to_vary, - parameters_to_zip, - parameters_in_common, - special_parameters=None, - silent=False): - """ - if parameters are defined in multiple places the order or precedence is: - 1. parameters_to_zip takes priority over - 2. parameters_to_vary takes priority over - 3. parameters_in_common - - """ - # Check that the signal_rate_multiplier is not varied when signal_expectation is not None - ptz_srm = parameters_to_zip.get("generate_args", [{}])[0].get("signal_rate_multiplier", None) - ptv_srm = parameters_to_vary.get("generate_args", [{}])[0].get("signal_rate_multiplier", None) - ptz_se = parameters_to_zip.get("signal_expectation", None) - ptv_se = parameters_to_vary.get("signal_expectation", None) - pic_se = parameters_in_common.get("signal_expectation", None) - assert (ptz_se is None and ptv_se is None and pic_se is None) or (ptz_srm is None and ptv_srm is None), "signal_rate_multiplier cannot be varied when signal_expectation is not None" - - varied_dicts = compute_parameters_to_vary( - parameters_to_vary=parameters_to_vary) - zipped_dicts = compute_parameters_to_zip( - parameters_to_zip=parameters_to_zip, silent=silent) - combined_variations = list(itertools.product(varied_dicts, zipped_dicts)) - if special_parameters is None: - special_parameters = [ - "generate_args", "livetime", "inference_object_args" - ] - - if len(combined_variations) > 0: - keys_are_shared = bool( - set(combined_variations[0][0]) & set(combined_variations[0][1])) - if keys_are_shared: - if not silent: - print("There are shared keys between parameters") - shared_parameters = list( - set(combined_variations[0][0]).intersection( - combined_variations[0][1])) - if not silent: - print(shared_parameters) - print("Looking for problems now...") - problematic_parameters = [] - for parameter in shared_parameters: - if parameter not in special_parameters: - problematic_parameters.append(parameter) - - if len(problematic_parameters) == 0: - if not silent: - print( - "You still need to watch out that everything is correct but only special_parameters are shared" - ) - else: - if len(problematic_parameters) > 1: - message = " ".join(problematic_parameters) + " are shared." - else: - message = " ".join(problematic_parameters) + " is shared." - raise Exception(message) - - merged_combinations = [] - for variation, zipped in tqdm(combined_variations, disable=silent): - pic = copy.deepcopy(parameters_in_common) - mergedeep.merge(pic, variation, zipped) - merged_combinations.append(pic) - else: - return merged_combinations - - -def flatten_nested_variables(merged_combinations): - copy_of_merged_combinations = copy.deepcopy(merged_combinations) - for idx, combination in enumerate(merged_combinations): - if "generate_args" in copy_of_merged_combinations[idx].keys(): - for key, value in combination["generate_args"].items(): - if key in copy_of_merged_combinations[idx].keys( - ) and value != copy_of_merged_combinations[idx][key]: - raise Exception( - "You are overwriting function_args without knowing what is going on!" - ) - elif key == "signal_rate_multiplier" and combination["signal_expectation"] is not None: - continue - else: - copy_of_merged_combinations[idx][key] = value - if "livetime" in combination.keys(): - # check if combination["livetime"] is a list or a single value - if isinstance(combination["livetime"], list): - for idx_lt, value in enumerate(combination["livetime"]): - if "livetime_" + str(idx_lt) in combination.keys(): - raise Exception( - "You are overwriting function_args without knowing what is going on!" - ) - copy_of_merged_combinations[idx][ - "livetime_" + str(idx_lt)] = value - else: - if "livetime" in copy_of_merged_combinations[idx].keys( - ) and combination["livetime"] != copy_of_merged_combinations[ - idx]["livetime"]: - raise Exception( - "You are overwriting function_args without knowing what is going on!" - ) - copy_of_merged_combinations[idx]["livetime"] = combination[ - "livetime"] - return copy_of_merged_combinations - - -def flatten_function_args(combination, function_args): - if "generate_args" in combination.keys(): - for key, value in combination["generate_args"].items(): - if key in function_args: - raise Exception( - "You are overwriting function_args without knowing what is going on!" - ) - elif key == "signal_rate_multiplier" and combination.get("signal_expectation", None) is not None: - continue - else: - function_args[key] = value - if "livetime" in combination.keys(): - if isinstance(combination["livetime"], list): - for idx, value in enumerate(combination["livetime"]): - if "livetime_" + str(idx) in function_args: - raise Exception( - "You are overwriting function_args without knowing what is going on!" - ) - function_args["livetime_" + str(idx)] = value - return function_args - - -def compute_parameters_to_vary(parameters_to_vary): - for k in copy.deepcopy(parameters_to_vary): - if isinstance(parameters_to_vary[k], dict): - # allows variations inside of dicts - parameters_to_vary[k] = [ - item for item in dict_product(parameters_to_vary[k]) - ] - else: - parameters_to_vary[k] = parameters_to_vary[k] - - # these are the variations of parameters_to_vary - cartesian_product = itertools.product(*parameters_to_vary.values()) - parameter_names = parameters_to_vary.keys() - - variations_to_return = [] - - for variation in cartesian_product: - parameter_configuration = { - key: value - for key, value in zip(parameter_names, variation) - } - variations_to_return.append(parameter_configuration) - - if len(variations_to_return) == 0: - return [{}] - else: - return variations_to_return - - -def compute_parameters_to_zip(parameters_to_zip, silent=False): - ptz = copy.deepcopy(parameters_to_zip) - # check that inputs have the same length - # 1. get all lists - all_lists = [] - for key, value in ptz.items(): - if isinstance(value, list): - all_lists.append(value) - elif isinstance(value, dict): - l_dicts = [] - if len(value) == 1: - key_inner, item = list(value.keys())[0], list(value.values())[0] - if isinstance(item, list): - all_lists.append(item) - expanded_dicts = [{ - key_inner: list_value - } for list_value in item] - ptz[key] = expanded_dicts - # manipulate dict to be present 4 times - else: - raise NotImplementedError( - "parameters_to_zip not implemented for dict with values of type(" - + str(type(value)) + ")") - else: - ptz[key] = list(dict_product(value)) - all_lists.append(ptz[key]) - else: - raise NotImplementedError( - "parameters_to_zip not implemented for type(" + type(value) + - ")") - - # 2. check that all values have the same length - if len(all_lists) > 0: - it = iter(all_lists) - the_len = len(next(it)) - if not all(len(l) == the_len for l in it): - raise ValueError('not all lists have same length!') - - varied_dicts_zip = [] - for values in zip(*ptz.values()): - this_dict = {key: value for key, value in zip(ptz.keys(), values)} - varied_dicts_zip.append(this_dict) - - if len(all_lists) > 0: - if len(all_lists[0]) != len(varied_dicts_zip): - raise Exception( - "Zipping failed. You probably escaped checking with a special case." - ) - else: - if not silent: - print( - "Cannot check sanity of zip - better provide a list like, var: [1,2,3]" - ) - - if len(varied_dicts_zip) == 0: - return [{}] - else: - return varied_dicts_zip - - -def manipulate_toy_data( - statistical_model, - data_sets, - sources_to_find, - plot=False, - output_dir=None): - smearing_pars = { - '[0, 10]': np.array([19.16806452, 2.80622276, 0.20968269]), - '[10, 20]': np.array([21.54000255, 2.98114261, 0.15294833]) - } - l_modified_data = [] - for idx, (data, ll) in enumerate(zip(data_sets, statistical_model.lls)): - modified_data = copy.deepcopy(data) - masker = "" - for source_name in sources_to_find: - for source_idx, source in enumerate(ll.base_model.sources): - if source.name == source_name: - masker += f'(data["source"] == {source_idx})|' - - if len(masker) > 0: - rg_and_rgx = eval(masker[:-1]) - else: - rg_and_rgx = np.array([True] * len(data)) - - if idx > 1: - continue - for cs1_range, popt in smearing_pars.items(): - this_range = eval(cs1_range) - data_mask = rg_and_rgx & (data["cs1"] > this_range[0]) & ( - data["cs1"] < this_range[1]) - nevents = np.sum(data_mask) - print(f"cs1 range: {cs1_range}, nevents: {nevents}") - modified_RG_and_RGX = sps.norm.rvs( - loc=popt[1], scale=popt[2], size=nevents) - modified_data["log10cs2"][data_mask] = modified_RG_and_RGX - - if plot: - fig, ax = plt.subplots() - title = f"cs1 range: {cs1_range}PE" - ax.set_title(title) - ax.scatter( - data[~data_mask]["cs1"], - data[~data_mask]["log10cs2"], - color="blue") - ax.scatter( - data[data_mask]["cs1"], - data[data_mask]["log10cs2"], - color="red", - label=f"RG(X) < {this_range[1]} PE") - ax.axvline(this_range[1], color="red") - ax.set_xlim(0, 100) - - ax.scatter( - data[data_mask]["cs1"], - modified_RG_and_RGX, - color="orange", - label=f"RG(X) from AmBe") - ax.legend() - - fig, ax = plt.subplots() - print(len(modified_RG_and_RGX), len(data[data_mask])) - binning = np.linspace(2, 4, 20) - ax.hist(modified_RG_and_RGX, - label="from AmBe", - bins=binning, - alpha=0.5) - ax.hist(data[data_mask]["log10cs2"], - bins=binning, - label="Template", - alpha=0.5) - - median = np.median(modified_RG_and_RGX) - label = f"AmBe median at {median:.2f}" - ax.axvline( - np.median(modified_RG_and_RGX), - label=label, - color="C0") - ax.axvline( - np.median(data[data_mask]["log10cs2"]), - label="Template median", - color="C1") - ax.set_xlim(2, 4) - ax.set_xlabel("log10cs2") - ax.legend() - plt.show() - else: - l_modified_data.append(modified_data) - else: - return l_modified_data - - -def _divide_fit_result(ll, fit_result): - l_parameters = [] - for parameter in ll.rate_parameters: - this_par = parameter + "_rate_multiplier" - if this_par in fit_result.keys(): - l_parameters.append(this_par) - - for parameter in ll.shape_parameters: - if parameter in fit_result.keys(): - l_parameters.append(parameter) - - fit_result_to_return = copy.deepcopy(fit_result) - for par in copy.deepcopy(fit_result_to_return).keys(): - if par not in l_parameters: - del fit_result_to_return[par] - - return fit_result_to_return - - -def get_log_prior_from_file(filename, parameter): - with open(filename, "rb") as f: - data = pickle.load(f) - - if parameter not in data.keys(): - raise ValueError(f"Parameter {parameter} not in {filename}") - - parameter_to_return = data[parameter] - if isinstance(parameter_to_return, dict): - if "uncertainty" not in parameter_to_return.keys(): - raise Exception("No uncertainty in file") - if "central_value" not in parameter_to_return.keys(): - raise Exception("No central_value in file") - return parameter_to_return - - -def _get_generate_args_from_toyfile(toydata_file): - datasets_array, _ = toydata_from_file(toydata_file) - # check that all generate_args in the toyfile are the same - generate_args_list = [toy_data[-1] for toy_data in datasets_array] - all_generate_args_equal = all( - generate_args_list[0] == generate_args - for generate_args in generate_args_list) - if not all_generate_args_equal: - raise ValueError("generate_args in toyfile are not all the same") - return structured_array_to_dict(generate_args_list[0]) - - -def find_file_resource(file: str, possible_locations: list) -> str: - """ - Function to look for a file in a list of folders (the file name may include folder structure too). - Return the complete path of the first location found, - and throw error if no file is found. - """ - for loc in possible_locations: - fname = Path(loc, file) - if fname.is_file(): - return str(fname) - raise FileNotFoundError("{:s} not found in any of {:s}".format(file, ",".join(possible_locations)))