From a259c9ac88ef2ac88a2e8ef6b358e5bf221d21e8 Mon Sep 17 00:00:00 2001 From: dachengx Date: Sun, 23 Jul 2023 03:59:37 +0800 Subject: [PATCH 01/19] Add tests of blueice_extended_model --- alea/__init__.py | 2 +- tests/test_blueice_extended_model.py | 56 ++++++++++++++++++++++++++++ tests/test_parameter.py | 2 + tests/test_statistical_model.py | 27 +++++++++++++- tests/test_template_source.py | 3 ++ 5 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 tests/test_blueice_extended_model.py create mode 100644 tests/test_parameter.py create mode 100644 tests/test_template_source.py diff --git a/alea/__init__.py b/alea/__init__.py index 99491ee8..95a14437 100644 --- a/alea/__init__.py +++ b/alea/__init__.py @@ -5,4 +5,4 @@ from alea import template_source from alea import toymc_running from alea import utils -from alea import runner +# from alea import runner diff --git a/tests/test_blueice_extended_model.py b/tests/test_blueice_extended_model.py new file mode 100644 index 00000000..b7e0f864 --- /dev/null +++ b/tests/test_blueice_extended_model.py @@ -0,0 +1,56 @@ +import yaml +import pkg_resources +from unittest import TestCase + +from blueice.likelihood import LogLikelihoodSum +from alea.blueice_extended_model import BlueiceExtendedModel, CustomAncillaryLikelihood + + +class TestBlueiceExtendedModel(TestCase): + # self.assertIsInstance(json_as_text, str) + # self.assertEqual(json_as_dict, get_resource(self.json_file, fmt='json')) + + def __init__(self, *args, **kwargs): + # TODO: super().__init__(*args, **kwargs) + super(TestBlueiceExtendedModel, self).__init__(*args, **kwargs) + # TODO: put the file path searching in utils.py + with open(pkg_resources.resource_filename( + 'alea', 'examples/unbinned_wimp_statistical_model.yaml'), 'r') as f: + self.config = yaml.safe_load(f) + self.n_likelihood_terms = len(self.config['likelihood_config']['likelihood_terms']) + self.model = BlueiceExtendedModel( + parameter_definition=self.config['parameter_definition'], + likelihood_config=self.config['likelihood_config'], + ) + + def test_expectation_values(self): + expectation_values = self.model.get_expectation_values() + # TODO: assert expectation values after test template source + # self.assertEqual() + + def test_generate_data(self): + data = self.model.generate_data() + self.assertEqual( + len(data), self.n_likelihood_terms + 2) + if not all(['source' in d.dtype.names for d in data[:-2]]): + raise ValueError('Data does not contain source information.') + + def test_likelihood(self): + self.assertIsInstance(self.model._likelihood, LogLikelihoodSum) + self.assertIsInstance(self.model._likelihood.likelihood_list[-1], CustomAncillaryLikelihood) + self.assertEqual( + len(self.model._likelihood.likelihood_list), + self.n_likelihood_terms + 1) + self.model.data = self.model.generate_data() + self.model._likelihood() + + def test_fit(self): + self.model.data = self.model.generate_data() + fit_result, max_llh = self.model.fit() + # TODO: check whether all parameters are in fit_result and whether fittable parameters are fitted + + +class TestCustomAncillaryLikelihood(TestCase): + def test_ancillary_likelihood(self): + pass + diff --git a/tests/test_parameter.py b/tests/test_parameter.py new file mode 100644 index 00000000..0daf895c --- /dev/null +++ b/tests/test_parameter.py @@ -0,0 +1,2 @@ +from alea.parameters import Parameters + diff --git a/tests/test_statistical_model.py b/tests/test_statistical_model.py index 08e9ea40..42d56d1a 100644 --- a/tests/test_statistical_model.py +++ b/tests/test_statistical_model.py @@ -1,2 +1,27 @@ +from alea.examples.gaussian_model import GaussianModel + + def test_gaussian_model(): - pass + parameter_definition = { + 'mu': { + 'fit_guess': 0.0, + 'fittable': True, + 'nominal_value': 0.0 + }, + 'sigma': { + 'fit_guess': 1.0, + 'fit_limits': [ + 0, + None + ], + 'fittable': True, + 'nominal_value': 1.0 + } + } + simple_model = GaussianModel( + parameter_definition=parameter_definition) + simple_model.data = simple_model.generate_data(mu=0, sigma=2) + fit_result, max_llh = simple_model.fit() + + toydata_file = 'simple_data.hdf5' + simple_model.store_data(toydata_file, [simple_model.data]) diff --git a/tests/test_template_source.py b/tests/test_template_source.py new file mode 100644 index 00000000..43fe51c7 --- /dev/null +++ b/tests/test_template_source.py @@ -0,0 +1,3 @@ +from alea.template_source import TemplateSource + + From 9fc8e43d50f1ef756e1500f4b082a1c18d94a816 Mon Sep 17 00:00:00 2001 From: dachengx Date: Sun, 23 Jul 2023 04:38:50 +0800 Subject: [PATCH 02/19] Add docstrings --- tests/test_blueice_extended_model.py | 17 +++++++++++------ tests/test_parameter.py | 1 - tests/test_statistical_model.py | 13 +++++++------ tests/test_template_source.py | 2 -- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/tests/test_blueice_extended_model.py b/tests/test_blueice_extended_model.py index b7e0f864..74360743 100644 --- a/tests/test_blueice_extended_model.py +++ b/tests/test_blueice_extended_model.py @@ -7,12 +7,11 @@ class TestBlueiceExtendedModel(TestCase): - # self.assertIsInstance(json_as_text, str) - # self.assertEqual(json_as_dict, get_resource(self.json_file, fmt='json')) + """Test of the BlueiceExtendedModel class""" def __init__(self, *args, **kwargs): - # TODO: super().__init__(*args, **kwargs) - super(TestBlueiceExtendedModel, self).__init__(*args, **kwargs) + """Initialize the BlueiceExtendedModel class""" + super().__init__(*args, **kwargs) # TODO: put the file path searching in utils.py with open(pkg_resources.resource_filename( 'alea', 'examples/unbinned_wimp_statistical_model.yaml'), 'r') as f: @@ -24,11 +23,13 @@ def __init__(self, *args, **kwargs): ) def test_expectation_values(self): + """Test of the expectation_values method""" expectation_values = self.model.get_expectation_values() # TODO: assert expectation values after test template source # self.assertEqual() def test_generate_data(self): + """Test of the generate_data method""" data = self.model.generate_data() self.assertEqual( len(data), self.n_likelihood_terms + 2) @@ -36,6 +37,7 @@ def test_generate_data(self): raise ValueError('Data does not contain source information.') def test_likelihood(self): + """Test of the _likelihood attribute""" self.assertIsInstance(self.model._likelihood, LogLikelihoodSum) self.assertIsInstance(self.model._likelihood.likelihood_list[-1], CustomAncillaryLikelihood) self.assertEqual( @@ -45,12 +47,15 @@ def test_likelihood(self): self.model._likelihood() def test_fit(self): + """Test of the fit method""" self.model.data = self.model.generate_data() fit_result, max_llh = self.model.fit() - # TODO: check whether all parameters are in fit_result and whether fittable parameters are fitted + # TODO: check whether all parameters are in fit_result + # and whether fittable parameters are fitted class TestCustomAncillaryLikelihood(TestCase): + """Test of the CustomAncillaryLikelihood class""" def test_ancillary_likelihood(self): + """Test of the ancillary_likelihood method""" pass - diff --git a/tests/test_parameter.py b/tests/test_parameter.py index 0daf895c..1faf90de 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -1,2 +1 @@ from alea.parameters import Parameters - diff --git a/tests/test_statistical_model.py b/tests/test_statistical_model.py index 42d56d1a..82976f31 100644 --- a/tests/test_statistical_model.py +++ b/tests/test_statistical_model.py @@ -2,20 +2,21 @@ def test_gaussian_model(): + """Test of the GaussianModel class""" parameter_definition = { 'mu': { - 'fit_guess': 0.0, + 'fit_guess': 0., 'fittable': True, - 'nominal_value': 0.0 + 'nominal_value': 0., }, 'sigma': { - 'fit_guess': 1.0, + 'fit_guess': 1., 'fit_limits': [ - 0, - None + 0., + None, ], 'fittable': True, - 'nominal_value': 1.0 + 'nominal_value': 1., } } simple_model = GaussianModel( diff --git a/tests/test_template_source.py b/tests/test_template_source.py index 43fe51c7..d43d1df8 100644 --- a/tests/test_template_source.py +++ b/tests/test_template_source.py @@ -1,3 +1 @@ from alea.template_source import TemplateSource - - From ef1d7138c8aa7a7078b97d61c9bf441c1febbd1b Mon Sep 17 00:00:00 2001 From: dachengx Date: Sun, 23 Jul 2023 04:45:48 +0800 Subject: [PATCH 03/19] Add iminuit --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index bcc396a5..46a68d78 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ scipy setuptools tqdm mergedeep +iminuit From 7feabb4af0d2c2617693b2504288ca7e59717fd1 Mon Sep 17 00:00:00 2001 From: dachengx Date: Sun, 23 Jul 2023 04:53:56 +0800 Subject: [PATCH 04/19] Set data before get expectation values --- tests/test_blueice_extended_model.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_blueice_extended_model.py b/tests/test_blueice_extended_model.py index 74360743..62ed22ca 100644 --- a/tests/test_blueice_extended_model.py +++ b/tests/test_blueice_extended_model.py @@ -24,6 +24,7 @@ def __init__(self, *args, **kwargs): def test_expectation_values(self): """Test of the expectation_values method""" + self.model.data = self.model.generate_data() expectation_values = self.model.get_expectation_values() # TODO: assert expectation values after test template source # self.assertEqual() @@ -56,6 +57,7 @@ def test_fit(self): class TestCustomAncillaryLikelihood(TestCase): """Test of the CustomAncillaryLikelihood class""" + def test_ancillary_likelihood(self): """Test of the ancillary_likelihood method""" pass From 33fe8ab50c1af1a85288df4c9bc8e2bb9f0ab2a8 Mon Sep 17 00:00:00 2001 From: dachengx Date: Tue, 25 Jul 2023 11:41:05 +0800 Subject: [PATCH 05/19] Change import path --- tests/test_statistical_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_statistical_model.py b/tests/test_statistical_model.py index 82976f31..6525e4d1 100644 --- a/tests/test_statistical_model.py +++ b/tests/test_statistical_model.py @@ -1,4 +1,4 @@ -from alea.examples.gaussian_model import GaussianModel +from alea.models.gaussian_model import GaussianModel def test_gaussian_model(): From 59a0122e6dedaa3403088aa4bf71af1faef07052 Mon Sep 17 00:00:00 2001 From: dachengx Date: Tue, 25 Jul 2023 13:45:41 +0800 Subject: [PATCH 06/19] Add a fucntion get_file_path to get file path --- .../unbinned_wimp_statistical_model.yaml | 2 +- alea/models/blueice_extended_model.py | 2 + alea/utils.py | 82 ++++++++++++++----- tests/test_blueice_extended_model.py | 9 +- tests/test_statistical_model.py | 2 +- 5 files changed, 68 insertions(+), 29 deletions(-) diff --git a/alea/model_configs/unbinned_wimp_statistical_model.yaml b/alea/model_configs/unbinned_wimp_statistical_model.yaml index 2f69425d..c0665718 100644 --- a/alea/model_configs/unbinned_wimp_statistical_model.yaml +++ b/alea/model_configs/unbinned_wimp_statistical_model.yaml @@ -62,7 +62,7 @@ parameter_definition: likelihood_config: likelihood_weights: [1, 1, 1] - template_folder: alea/templates + template_folder: [] likelihood_terms: # SR0 - name: sr0 diff --git a/alea/models/blueice_extended_model.py b/alea/models/blueice_extended_model.py index 6c5e511b..c2ab5f47 100644 --- a/alea/models/blueice_extended_model.py +++ b/alea/models/blueice_extended_model.py @@ -103,6 +103,8 @@ def _build_ll_from_config(self, likelihood_config: dict) -> "LogLikelihoodSum": # Iterate through each likelihood term in the configuration for config in likelihood_config["likelihood_terms"]: likelihood_object = locate(config["likelihood_type"]) + if "template_folder" not in likelihood_config: + likelihood_config["template_folder"] = [] if isinstance(likelihood_config["template_folder"], str): template_folder_list = [likelihood_config["template_folder"]] elif isinstance(likelihood_config["template_folder"], list): diff --git a/alea/utils.py b/alea/utils.py index 834c9cd5..982cfee4 100644 --- a/alea/utils.py +++ b/alea/utils.py @@ -1,8 +1,10 @@ import os +import yaml +import pkg_resources from pydoc import locate +from warnings import warn import numpy as np -import alea def get_analysis_space(analysis_space: dict) -> list: @@ -31,29 +33,12 @@ def adapt_likelihood_config_for_blueice( Args: likelihood_config (dict): likelihood config dict - template_folder_list (list): list of possible base folders where - templates are located. If a folder starts with alea/, - the alea folder is used as base. + template_folder_list (list): list of possible base folders. Ordered by priority. Returns: dict: adapted likelihood config """ - template_folder = None - for template_folder in template_folder_list: - # if template folder starts with alea: get location of alea - if template_folder.startswith("alea/"): - alea_dir = os.path.dirname(os.path.abspath(alea.__file__)) - template_folder = os.path.join(alea_dir, template_folder.replace("alea/", "")) - # check if template folder exists - if not os.path.isdir(template_folder): - template_folder = None - else: - break - - # raise error if no template folder is found - if template_folder is None: - raise FileNotFoundError("No template folder found. Please provide a valid template folder.") likelihood_config["analysis_space"] = get_analysis_space( likelihood_config["analysis_space"]) @@ -62,6 +47,61 @@ def adapt_likelihood_config_for_blueice( likelihood_config["default_source_class"]) for source in likelihood_config["sources"]: - source["templatename"] = os.path.join( - template_folder, source["template_filename"]) + source["templatename"] = get_file_path( + source["template_filename"], template_folder_list) return likelihood_config + + +def load_yaml(file_name: str): + """Load data from yaml file.""" + with open(get_file_path(file_name), 'r') as file: + data = yaml.safe_load(file) + return data + + +def _get_abspath(file_name): + """Get the abspath of the file. Raise FileNotFoundError when not found in any subfolder""" + for sub_dir in ('model_configs', 'runner_configs', 'templates'): + p = os.path.join(_package_path(sub_dir), file_name) + if os.path.exists(p): + return p + raise FileNotFoundError(f'Cannot find {file_name}') + + +def _package_path(sub_directory): + """Get the abs path of the requested sub folder""" + return pkg_resources.resource_filename('alea', f'{sub_directory}') + + +def get_file_path(fname, folder_list=[]): + """Find the full path to the resource file + Try 5 methods in the following order + + #. fname begin with '/', return absolute path + #. url_base begin with '/', return url_base + name + #. can get file from _get_abspath, return alea internal file path + #. can be found in local installed ntauxfiles, return ntauxfiles absolute path + #. can be downloaded from MongoDB, download and return cached path + """ + # 1. From absolute path + # Usually Config.default is a absolute path + if fname.startswith('/'): + return fname + + # 2. From local folder + # Use url_base as prefix + for folder in folder_list: + if folder.startswith('/'): + fpath = os.path.join(folder, fname) + if os.path.exists(fpath): + warn(f'Load {fname} successfully from {fpath}') + return fpath + + # 3. From alea internal files + try: + return _get_abspath(fname) + except FileNotFoundError: + pass + + # raise error when can not find corresponding file + raise RuntimeError(f'Can not find {fname}, please check your file system') diff --git a/tests/test_blueice_extended_model.py b/tests/test_blueice_extended_model.py index 62ed22ca..f3359367 100644 --- a/tests/test_blueice_extended_model.py +++ b/tests/test_blueice_extended_model.py @@ -1,9 +1,9 @@ -import yaml import pkg_resources from unittest import TestCase from blueice.likelihood import LogLikelihoodSum -from alea.blueice_extended_model import BlueiceExtendedModel, CustomAncillaryLikelihood +from alea.utils import load_yaml +from alea.models import BlueiceExtendedModel, CustomAncillaryLikelihood class TestBlueiceExtendedModel(TestCase): @@ -12,10 +12,7 @@ class TestBlueiceExtendedModel(TestCase): def __init__(self, *args, **kwargs): """Initialize the BlueiceExtendedModel class""" super().__init__(*args, **kwargs) - # TODO: put the file path searching in utils.py - with open(pkg_resources.resource_filename( - 'alea', 'examples/unbinned_wimp_statistical_model.yaml'), 'r') as f: - self.config = yaml.safe_load(f) + self.config = load_yaml('unbinned_wimp_statistical_model.yaml') self.n_likelihood_terms = len(self.config['likelihood_config']['likelihood_terms']) self.model = BlueiceExtendedModel( parameter_definition=self.config['parameter_definition'], diff --git a/tests/test_statistical_model.py b/tests/test_statistical_model.py index 6525e4d1..940389de 100644 --- a/tests/test_statistical_model.py +++ b/tests/test_statistical_model.py @@ -1,4 +1,4 @@ -from alea.models.gaussian_model import GaussianModel +from alea.models import GaussianModel def test_gaussian_model(): From ca81ac2bd635f59e17cf6cdfd6b26b8e19add9ba Mon Sep 17 00:00:00 2001 From: dachengx Date: Tue, 25 Jul 2023 14:53:29 +0800 Subject: [PATCH 07/19] Make Parameters deepcopyable --- alea/model.py | 9 +++++++- alea/models/blueice_extended_model.py | 24 ++++++++++++------- alea/parameters.py | 11 +++++---- tests/test_blueice_extended_model.py | 15 ++++++++++-- tests/test_gaussian_model.py | 33 +++++++++++++++++++++++++++ tests/test_parameter.py | 19 +++++++++++++++ tests/test_statistical_model.py | 28 ----------------------- tests/test_template_source.py | 8 +++++++ 8 files changed, 103 insertions(+), 44 deletions(-) create mode 100644 tests/test_gaussian_model.py delete mode 100644 tests/test_statistical_model.py diff --git a/alea/model.py b/alea/model.py index 82a327e9..b7fe59b7 100644 --- a/alea/model.py +++ b/alea/model.py @@ -7,6 +7,7 @@ from scipy.optimize import brentq from iminuit import Minuit from iminuit.util import make_func_code +from blueice.likelihood import _needs_data from inference_interface import toydata_to_file from alea.parameters import Parameters @@ -58,7 +59,10 @@ def __init__( confidence_interval_kind: str = "central", # one of central, upper, lower confidence_interval_threshold: Callable[[float], float] = None, ): - self._data = data + # following https://github.com/JelleAalbers/blueice/blob/7c10222a13227e78dc7224b1a7e56ff91e4a8043/blueice/likelihood.py#L97 + self.is_data_set = False + if data is not None: + self.data = data self._confidence_level = confidence_level self._confidence_interval_kind = confidence_interval_kind self.confidence_interval_threshold = confidence_interval_threshold @@ -93,6 +97,7 @@ def _generate_data(self, **kwargs): "You must write a data-generation method (_generate_data) for your statistical model" " or use a subclass where it is written for you") + @_needs_data def ll(self, **kwargs) -> float: """ Likelihod function, returns the loglikelihood for the given parameters. @@ -143,6 +148,7 @@ def data(self, data): representing the data-sets of one or more likelihood terms. """ self._data = data + self.is_data_set = True def store_data( self, file_name, data_list, data_name_list=None, metadata = None): @@ -205,6 +211,7 @@ def cost(args): return cost + @_needs_data def fit(self, verbose=False, **kwargs) -> Tuple[dict, float]: """ Fit the model to the data by maximizing the likelihood diff --git a/alea/models/blueice_extended_model.py b/alea/models/blueice_extended_model.py index c2ab5f47..1427cb32 100644 --- a/alea/models/blueice_extended_model.py +++ b/alea/models/blueice_extended_model.py @@ -1,5 +1,6 @@ -from pydoc import locate # to lookup likelihood class from typing import List +from copy import deepcopy +from pydoc import locate import yaml import numpy as np @@ -7,9 +8,9 @@ from blueice.likelihood import LogAncillaryLikelihood, LogLikelihoodSum from alea.model import StatisticalModel +from alea.parameters import Parameters from alea.simulators import BlueiceDataGenerator from alea.utils import adapt_likelihood_config_for_blueice -from alea.parameters import Parameters class BlueiceExtendedModel(StatisticalModel): @@ -32,7 +33,9 @@ def __init__(self, parameter_definition: dict, likelihood_config: dict): likelihood_config (dict): A dictionary defining the likelihood. """ super().__init__(parameter_definition=parameter_definition) - self._likelihood = self._build_ll_from_config(likelihood_config) + # deepcopy likelihood_config to prevent it to be + # changed by adapt_likelihood_config_for_blueice + self._likelihood = self._build_ll_from_config(deepcopy(likelihood_config)) self.likelihood_names = [t["name"] for t in likelihood_config["likelihood_terms"]] self.likelihood_names.append("ancillary_likelihood") self.data_generators = self._build_data_generators() @@ -72,6 +75,7 @@ def data(self, data: list): ll_term.set_data(d) self._data = data + self.is_data_set = True def get_expectation_values(self, **kwargs) -> dict: """ @@ -79,15 +83,19 @@ def get_expectation_values(self, **kwargs) -> dict: given a number of named parameters (kwargs) """ ret = dict() - # ancillary likelihood does not contribute - for ll in self._likelihood.likelihood_list[:-1]: - ll_pars = list(ll.rate_parameters.keys()) + list(ll.shape_parameters.keys()) + # calling ll need data to be set + self_copy = deepcopy(self) + self_copy.data = self_copy.generate_data() + + # ancillary likelihood does not contribute + for ll_term in self_copy._likelihood.likelihood_list[:-1]: + ll_pars = list(ll_term.rate_parameters.keys()) + list(ll_term.shape_parameters.keys()) ll_pars += ["livetime_days"] call_args = {k: i for k, i in kwargs.items() if k in ll_pars} - mus = ll(full_output=True, **call_args)[1] - for n, mu in zip(ll.source_name_list, mus): + mus = ll_term(full_output=True, **call_args)[1] + for n, mu in zip(ll_term.source_name_list, mus): ret[n] = ret.get(n, 0) + mu return ret diff --git a/alea/parameters.py b/alea/parameters.py index 589f5fc7..e7f26da0 100644 --- a/alea/parameters.py +++ b/alea/parameters.py @@ -274,8 +274,9 @@ def __call__( values[name] = new_val if new_val is not None else param.nominal_value if any(i is None for k, i in values.items()): emptypars = ", ".join([k for k, i in values.items() if i is None]) - raise AssertionError("All parameters must be set explicitly, or have a nominal value," - " encountered for: " + emptypars) + raise AssertionError( + "All parameters must be set explicitly, or have a nominal value," + " encountered for: " + emptypars) return values def __getattr__(self, name: str) -> Parameter: @@ -291,9 +292,9 @@ def __getattr__(self, name: str) -> Parameter: Raises: AttributeError: If the attribute is not found. """ - if name in self.parameters: - return self.parameters[name] - else: + try: + return super().__getattribute__('parameters')[name] + except KeyError: raise AttributeError(f"Attribute '{name}' not found.") def __getitem__(self, name: str) -> Parameter: diff --git a/tests/test_blueice_extended_model.py b/tests/test_blueice_extended_model.py index f3359367..ce9789a6 100644 --- a/tests/test_blueice_extended_model.py +++ b/tests/test_blueice_extended_model.py @@ -1,4 +1,3 @@ -import pkg_resources from unittest import TestCase from blueice.likelihood import LogLikelihoodSum @@ -14,6 +13,9 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.config = load_yaml('unbinned_wimp_statistical_model.yaml') self.n_likelihood_terms = len(self.config['likelihood_config']['likelihood_terms']) + self.set_new_model() + + def set_new_model(self): self.model = BlueiceExtendedModel( parameter_definition=self.config['parameter_definition'], likelihood_config=self.config['likelihood_config'], @@ -21,8 +23,16 @@ def __init__(self, *args, **kwargs): def test_expectation_values(self): """Test of the expectation_values method""" - self.model.data = self.model.generate_data() + self.set_new_model() expectation_values = self.model.get_expectation_values() + + # should avoid accidentally set data + is_data_set = False + for ll_term in self.model._likelihood.likelihood_list[:-1]: + is_data_set |= ll_term.is_data_set + if is_data_set: + raise ValueError('Data should not be set after get_expectation_values.') + # TODO: assert expectation values after test template source # self.assertEqual() @@ -57,4 +67,5 @@ class TestCustomAncillaryLikelihood(TestCase): def test_ancillary_likelihood(self): """Test of the ancillary_likelihood method""" + # TODO: pass diff --git a/tests/test_gaussian_model.py b/tests/test_gaussian_model.py new file mode 100644 index 00000000..c668587a --- /dev/null +++ b/tests/test_gaussian_model.py @@ -0,0 +1,33 @@ +from unittest import TestCase + +from alea.models import GaussianModel + + +class TestGaussianModel(TestCase): + """Test of the Parameters class""" + + def test_gaussian_model(self): + """Test of generate_data and fit method of the GaussianModel class""" + parameter_definition = { + 'mu': { + 'fit_guess': 0., + 'fittable': True, + 'nominal_value': 0., + }, + 'sigma': { + 'fit_guess': 1., + 'fit_limits': [ + 0., + None, + ], + 'fittable': True, + 'nominal_value': 1., + } + } + simple_model = GaussianModel( + parameter_definition=parameter_definition) + simple_model.data = simple_model.generate_data(mu=0, sigma=2) + fit_result, max_llh = simple_model.fit() + + toydata_file = 'simple_data.hdf5' + simple_model.store_data(toydata_file, [simple_model.data]) diff --git a/tests/test_parameter.py b/tests/test_parameter.py index 1faf90de..876788f4 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -1 +1,20 @@ +from copy import deepcopy +from unittest import TestCase + +from alea.utils import load_yaml from alea.parameters import Parameters + + +class TestParameters(TestCase): + """Test of the Parameters class""" + + def __init__(self, *args, **kwargs): + """Initialize the BlueiceExtendedModel class""" + super().__init__(*args, **kwargs) + self.config = load_yaml('unbinned_wimp_statistical_model.yaml') + self.parameters = Parameters.from_config(self.config['parameter_definition']) + + def test_deep_copyable(self): + """Test of whether Parameters instance can be deepcopied""" + if deepcopy(self.parameters) != self.parameters: + raise ValueError('Parameters instance cannot be correctly deepcopied.') diff --git a/tests/test_statistical_model.py b/tests/test_statistical_model.py deleted file mode 100644 index 940389de..00000000 --- a/tests/test_statistical_model.py +++ /dev/null @@ -1,28 +0,0 @@ -from alea.models import GaussianModel - - -def test_gaussian_model(): - """Test of the GaussianModel class""" - parameter_definition = { - 'mu': { - 'fit_guess': 0., - 'fittable': True, - 'nominal_value': 0., - }, - 'sigma': { - 'fit_guess': 1., - 'fit_limits': [ - 0., - None, - ], - 'fittable': True, - 'nominal_value': 1., - } - } - simple_model = GaussianModel( - parameter_definition=parameter_definition) - simple_model.data = simple_model.generate_data(mu=0, sigma=2) - fit_result, max_llh = simple_model.fit() - - toydata_file = 'simple_data.hdf5' - simple_model.store_data(toydata_file, [simple_model.data]) diff --git a/tests/test_template_source.py b/tests/test_template_source.py index d43d1df8..f57cb30b 100644 --- a/tests/test_template_source.py +++ b/tests/test_template_source.py @@ -1 +1,9 @@ +from unittest import TestCase + from alea.template_source import TemplateSource + + +class TestTemplateSource(TestCase): + """Test of the TemplateSource class""" + # TODO: + pass From 82e858d57f88c6b2a2d4bf1975e8787a2694db96 Mon Sep 17 00:00:00 2001 From: dachengx Date: Tue, 25 Jul 2023 14:58:35 +0800 Subject: [PATCH 08/19] Happier code style --- alea/model.py | 3 ++- alea/utils.py | 4 +++- tests/test_blueice_extended_model.py | 1 + tests/test_gaussian_model.py | 2 +- tests/test_template_source.py | 6 ++++-- 5 files changed, 11 insertions(+), 5 deletions(-) diff --git a/alea/model.py b/alea/model.py index b7fe59b7..fbbc630d 100644 --- a/alea/model.py +++ b/alea/model.py @@ -59,7 +59,8 @@ def __init__( confidence_interval_kind: str = "central", # one of central, upper, lower confidence_interval_threshold: Callable[[float], float] = None, ): - # following https://github.com/JelleAalbers/blueice/blob/7c10222a13227e78dc7224b1a7e56ff91e4a8043/blueice/likelihood.py#L97 + # following https://github.com/JelleAalbers/blueice/blob/ + # 7c10222a13227e78dc7224b1a7e56ff91e4a8043/blueice/likelihood.py#L97 self.is_data_set = False if data is not None: self.data = data diff --git a/alea/utils.py b/alea/utils.py index 982cfee4..a6485fd6 100644 --- a/alea/utils.py +++ b/alea/utils.py @@ -73,7 +73,7 @@ def _package_path(sub_directory): return pkg_resources.resource_filename('alea', f'{sub_directory}') -def get_file_path(fname, folder_list=[]): +def get_file_path(fname, folder_list=None): """Find the full path to the resource file Try 5 methods in the following order @@ -83,6 +83,8 @@ def get_file_path(fname, folder_list=[]): #. can be found in local installed ntauxfiles, return ntauxfiles absolute path #. can be downloaded from MongoDB, download and return cached path """ + if folder_list is None: + folder_list = [] # 1. From absolute path # Usually Config.default is a absolute path if fname.startswith('/'): diff --git a/tests/test_blueice_extended_model.py b/tests/test_blueice_extended_model.py index ce9789a6..f388a78e 100644 --- a/tests/test_blueice_extended_model.py +++ b/tests/test_blueice_extended_model.py @@ -16,6 +16,7 @@ def __init__(self, *args, **kwargs): self.set_new_model() def set_new_model(self): + """Set a new BlueiceExtendedModel instance""" self.model = BlueiceExtendedModel( parameter_definition=self.config['parameter_definition'], likelihood_config=self.config['likelihood_config'], diff --git a/tests/test_gaussian_model.py b/tests/test_gaussian_model.py index c668587a..5caf0d7f 100644 --- a/tests/test_gaussian_model.py +++ b/tests/test_gaussian_model.py @@ -22,7 +22,7 @@ def test_gaussian_model(self): ], 'fittable': True, 'nominal_value': 1., - } + }, } simple_model = GaussianModel( parameter_definition=parameter_definition) diff --git a/tests/test_template_source.py b/tests/test_template_source.py index f57cb30b..1088b233 100644 --- a/tests/test_template_source.py +++ b/tests/test_template_source.py @@ -5,5 +5,7 @@ class TestTemplateSource(TestCase): """Test of the TemplateSource class""" - # TODO: - pass + + def test_load_templates(self): + # TODO: + pass From 166033e9a3157d40bc41f009d2e919a6c8348245 Mon Sep 17 00:00:00 2001 From: dachengx Date: Tue, 25 Jul 2023 17:20:53 +0800 Subject: [PATCH 09/19] Move template_folder_list outside the loop --- alea/__init__.py | 3 +++ alea/models/blueice_extended_model.py | 20 +++++++------- alea/template_source.py | 39 ++++++++++++++------------- alea/utils.py | 15 ++++++----- tests/test_template_source.py | 3 ++- 5 files changed, 44 insertions(+), 36 deletions(-) diff --git a/alea/__init__.py b/alea/__init__.py index 797c9f55..f008bb17 100644 --- a/alea/__init__.py +++ b/alea/__init__.py @@ -13,3 +13,6 @@ from . import simulators from .simulators import * + +from . import template_source +from .template_source import * diff --git a/alea/models/blueice_extended_model.py b/alea/models/blueice_extended_model.py index 1427cb32..9e66eec6 100644 --- a/alea/models/blueice_extended_model.py +++ b/alea/models/blueice_extended_model.py @@ -108,18 +108,19 @@ def _build_ll_from_config(self, likelihood_config: dict) -> "LogLikelihoodSum": """ lls = [] + if "template_folder" not in likelihood_config: + likelihood_config["template_folder"] = [] + if isinstance(likelihood_config["template_folder"], str): + template_folder_list = [likelihood_config["template_folder"]] + elif isinstance(likelihood_config["template_folder"], list): + template_folder_list = likelihood_config["template_folder"] + else: + raise ValueError( + "template_folder must be either a string or a list of strings.") + # Iterate through each likelihood term in the configuration for config in likelihood_config["likelihood_terms"]: likelihood_object = locate(config["likelihood_type"]) - if "template_folder" not in likelihood_config: - likelihood_config["template_folder"] = [] - if isinstance(likelihood_config["template_folder"], str): - template_folder_list = [likelihood_config["template_folder"]] - elif isinstance(likelihood_config["template_folder"], list): - template_folder_list = likelihood_config["template_folder"] - else: - raise ValueError( - "template_folder must be either a string or a list of strings.") blueice_config = adapt_likelihood_config_for_blueice( config, template_folder_list) @@ -143,7 +144,6 @@ def _build_ll_from_config(self, likelihood_config: dict) -> "LogLikelihoodSum": ll = likelihood_object(blueice_config) for source in config["sources"]: - # Set rate parameters rate_parameters = [ p for p in source["parameters"] if self.parameters[p].ptype == "rate"] diff --git a/alea/template_source.py b/alea/template_source.py index d5c31f34..dd4480c0 100644 --- a/alea/template_source.py +++ b/alea/template_source.py @@ -24,6 +24,7 @@ class TemplateSource(blueice.HistogramPdfSource): :param log10_bins: List of axis numbers. If True, bin edges on this axis in the root file are log10() of the actual bin edges. """ + def build_histogram(self): format_dict = { k: self.config[k] @@ -71,7 +72,6 @@ def build_histogram(self): stop=slice_axis_limits[1]) logging.debug(f"Normalization after slicing: {h.n}.") - if collapse_axis is not None: if collapse_slices is None: raise ValueError( @@ -94,9 +94,8 @@ def build_histogram(self): self.config['analysis_space']): expected_bin_edges = np.array(expected_bin_edges) seen_bin_edges = h.bin_edges[axis_i] - if len( - self.config['analysis_space'] - ) == 1: # If 1D, hist1d returns bin_edges straight, not as list + # If 1D, hist1d returns bin_edges straight, not as list + if len(self.config['analysis_space']) == 1: seen_bin_edges = h.bin_edges logging.debug("axis_i: " + str(axis_i)) logging.debug("expected_bin_edges: " + str(expected_bin_edges)) @@ -175,6 +174,7 @@ class CombinedSource(blueice.HistogramPdfSource): Must be 1 shorter than histnames, templatenames :param histogram_parameters: names of parameters that should be put in the hdf5/histogram names, """ + def build_histogram(self): weight_names = self.config.get("weight_names") weights = [ @@ -355,20 +355,6 @@ def simulate(self, n_events): return ret -def get_json_spectrum(fn): - """ - Translates bbf-style JSON files to spectra. - units are keV and /kev*day*kg - """ - contents = json.load(open(fn, "r")) - logging.debug(contents["description"]) - esyst = contents["coordinate_system"][0][1] - ret = interp1d( - np.linspace(*esyst), contents["map"], - bounds_error=False, fill_value=0.) - return ret - - class SpectrumTemplateSource(blueice.HistogramPdfSource): """ :param spectrum_name: name of bbf json-like spectrum _OR_ function that can be called @@ -376,6 +362,21 @@ class SpectrumTemplateSource(blueice.HistogramPdfSource): :param histname: histogram name :param named_parameters: list of config settings to pass to .format on histname and filename """ + + @staticmethod + def _get_json_spectrum(fn): + """ + Translates bbf-style JSON files to spectra. + units are keV and /kev*day*kg + """ + contents = json.load(open(fn, "r")) + logging.debug(contents["description"]) + esyst = contents["coordinate_system"][0][1] + ret = interp1d( + np.linspace(*esyst), contents["map"], + bounds_error=False, fill_value=0.) + return ret + def build_histogram(self): logging.debug("building a hist") format_dict = { @@ -387,7 +388,7 @@ def build_histogram(self): spectrum = self.config["spectrum"] if type(spectrum) is str: - spectrum = get_json_spectrum(spectrum.format(**format_dict)) + spectrum = self._get_json_spectrum(spectrum.format(**format_dict)) slice_args = self.config.get("slice_args", {}) if type(slice_args) is dict: diff --git a/alea/utils.py b/alea/utils.py index a6485fd6..631c87f0 100644 --- a/alea/utils.py +++ b/alea/utils.py @@ -1,6 +1,7 @@ import os import yaml import pkg_resources +from copy import deepcopy from pydoc import locate from warnings import warn @@ -40,16 +41,18 @@ def adapt_likelihood_config_for_blueice( dict: adapted likelihood config """ - likelihood_config["analysis_space"] = get_analysis_space( - likelihood_config["analysis_space"]) + likelihood_config_copy = deepcopy(likelihood_config) - likelihood_config["default_source_class"] = locate( - likelihood_config["default_source_class"]) + likelihood_config_copy["analysis_space"] = get_analysis_space( + likelihood_config_copy["analysis_space"]) - for source in likelihood_config["sources"]: + likelihood_config_copy["default_source_class"] = locate( + likelihood_config_copy["default_source_class"]) + + for source in likelihood_config_copy["sources"]: source["templatename"] = get_file_path( source["template_filename"], template_folder_list) - return likelihood_config + return likelihood_config_copy def load_yaml(file_name: str): diff --git a/tests/test_template_source.py b/tests/test_template_source.py index 1088b233..36c4f4fd 100644 --- a/tests/test_template_source.py +++ b/tests/test_template_source.py @@ -7,5 +7,6 @@ class TestTemplateSource(TestCase): """Test of the TemplateSource class""" def test_load_templates(self): - # TODO: + # TODO: not sure whether we want to test TemplateSource in alea + # not sure TemplateSource.build_histogram and TemplateSource.simulate are called pass From e769327d8d0a2ef81a8dfc625407194519213962 Mon Sep 17 00:00:00 2001 From: dachengx Date: Tue, 25 Jul 2023 17:43:16 +0800 Subject: [PATCH 10/19] Raise error when initialize StatisticalModel directly --- alea/model.py | 7 +++++++ tests/test_statistical_model.py | 13 +++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 tests/test_statistical_model.py diff --git a/alea/model.py b/alea/model.py index fbbc630d..8628df7e 100644 --- a/alea/model.py +++ b/alea/model.py @@ -59,6 +59,13 @@ def __init__( confidence_interval_kind: str = "central", # one of central, upper, lower confidence_interval_threshold: Callable[[float], float] = None, ): + """Initialize a statistical model""" + if type(self) == StatisticalModel: + raise RuntimeError( + "You cannot instantiate the StatisticalModel class directly, " + "you must use a subclass where the likelihood function and data generation " + "method are implemented") + # following https://github.com/JelleAalbers/blueice/blob/ # 7c10222a13227e78dc7224b1a7e56ff91e4a8043/blueice/likelihood.py#L97 self.is_data_set = False diff --git a/tests/test_statistical_model.py b/tests/test_statistical_model.py new file mode 100644 index 00000000..3dd09526 --- /dev/null +++ b/tests/test_statistical_model.py @@ -0,0 +1,13 @@ +from alea import StatisticalModel + +def test_gaussian_model(): + try: + error_raised = True + StatisticalModel() + error_raised = False + except Exception: + print('Error correctly raised when directly instantiating StatisticalModel') + else: + if not error_raised: + raise RuntimeError( + 'Should raise error when directly instantiating StatisticalModel') From 3c27ec74f3f7c88db05d11dcfefe8ac36ff36a96 Mon Sep 17 00:00:00 2001 From: dachengx Date: Tue, 25 Jul 2023 17:52:06 +0800 Subject: [PATCH 11/19] Do not need to be too cautious because 166033e9a3157d40bc41f009d2e919a6c8348245 --- alea/models/blueice_extended_model.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/alea/models/blueice_extended_model.py b/alea/models/blueice_extended_model.py index 9e66eec6..49db7203 100644 --- a/alea/models/blueice_extended_model.py +++ b/alea/models/blueice_extended_model.py @@ -33,9 +33,7 @@ def __init__(self, parameter_definition: dict, likelihood_config: dict): likelihood_config (dict): A dictionary defining the likelihood. """ super().__init__(parameter_definition=parameter_definition) - # deepcopy likelihood_config to prevent it to be - # changed by adapt_likelihood_config_for_blueice - self._likelihood = self._build_ll_from_config(deepcopy(likelihood_config)) + self._likelihood = self._build_ll_from_config(likelihood_config) self.likelihood_names = [t["name"] for t in likelihood_config["likelihood_terms"]] self.likelihood_names.append("ancillary_likelihood") self.data_generators = self._build_data_generators() From a1654dc66d0c9b7fb3862c41442e5b8e8576029d Mon Sep 17 00:00:00 2001 From: dachengx Date: Tue, 25 Jul 2023 18:30:15 +0800 Subject: [PATCH 12/19] This is why we want to accelerate the unittest --- alea/models/blueice_extended_model.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/alea/models/blueice_extended_model.py b/alea/models/blueice_extended_model.py index 4e54cb09..ddfe47f4 100644 --- a/alea/models/blueice_extended_model.py +++ b/alea/models/blueice_extended_model.py @@ -6,7 +6,7 @@ import numpy as np import scipy.stats as stats from blueice.likelihood import LogAncillaryLikelihood, LogLikelihoodSum -from inference_interface import dict_to_structured_array +from inference_interface import dict_to_structured_array, structured_array_to_dict from alea.model import StatisticalModel from alea.parameters import Parameters @@ -291,10 +291,11 @@ def set_data(self, d: np.array): d (np.array): Data of ancillary measurements, stored as numpy array """ # This results in shifted constraint terms. - if set(d.keys()) != set(self.parameters.names): + d_dict = structured_array_to_dict(d) + if set(d_dict.keys()) != set(self.parameters.names): raise ValueError( "The data dict must contain all parameters as keys in CustomAncillaryLikelihood.") - self.constraint_functions = self._get_constraint_functions(**d) + self.constraint_functions = self._get_constraint_functions(**d_dict) def ancillary_likelihood_sum(self, evaluate_at: dict) -> float: """Return the sum of all constraint terms. From 37f00ff0aca9f9797c46d4a94d9a21e19e65f57e Mon Sep 17 00:00:00 2001 From: dachengx Date: Tue, 25 Jul 2023 23:08:29 +0800 Subject: [PATCH 13/19] Directly call ll of model --- tests/test_blueice_extended_model.py | 6 ++---- tests/{test_statistical_model.py => test_model.py} | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) rename tests/{test_statistical_model.py => test_model.py} (92%) diff --git a/tests/test_blueice_extended_model.py b/tests/test_blueice_extended_model.py index f388a78e..a0897478 100644 --- a/tests/test_blueice_extended_model.py +++ b/tests/test_blueice_extended_model.py @@ -33,9 +33,7 @@ def test_expectation_values(self): is_data_set |= ll_term.is_data_set if is_data_set: raise ValueError('Data should not be set after get_expectation_values.') - - # TODO: assert expectation values after test template source - # self.assertEqual() + # TODO: assert expectation values after manually initialize template source def test_generate_data(self): """Test of the generate_data method""" @@ -53,7 +51,7 @@ def test_likelihood(self): len(self.model._likelihood.likelihood_list), self.n_likelihood_terms + 1) self.model.data = self.model.generate_data() - self.model._likelihood() + self.model.ll() def test_fit(self): """Test of the fit method""" diff --git a/tests/test_statistical_model.py b/tests/test_model.py similarity index 92% rename from tests/test_statistical_model.py rename to tests/test_model.py index 3dd09526..5d99488b 100644 --- a/tests/test_statistical_model.py +++ b/tests/test_model.py @@ -1,6 +1,6 @@ from alea import StatisticalModel -def test_gaussian_model(): +def test_statistical_model(): try: error_raised = True StatisticalModel() From 52733344461b3d7f96fc78039fb01852c62135cf Mon Sep 17 00:00:00 2001 From: Knut Dundas Moraa Date: Tue, 25 Jul 2023 14:52:26 -0400 Subject: [PATCH 14/19] some more fitting tests --- tests/test_gaussian_model.py | 44 +++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/tests/test_gaussian_model.py b/tests/test_gaussian_model.py index 5caf0d7f..f2412a25 100644 --- a/tests/test_gaussian_model.py +++ b/tests/test_gaussian_model.py @@ -1,13 +1,24 @@ from unittest import TestCase +from os import remove + +import inference_interface + +import numpy as np + +import scipy.stats as sps + from alea.models import GaussianModel class TestGaussianModel(TestCase): """Test of the Parameters class""" - def test_gaussian_model(self): - """Test of generate_data and fit method of the GaussianModel class""" + @classmethod + def setUp(cls): + """ + Initialise the GaussianModel + """ parameter_definition = { 'mu': { 'fit_guess': 0., @@ -24,10 +35,31 @@ def test_gaussian_model(self): 'nominal_value': 1., }, } - simple_model = GaussianModel( + cls.simple_model = GaussianModel( parameter_definition=parameter_definition) - simple_model.data = simple_model.generate_data(mu=0, sigma=2) - fit_result, max_llh = simple_model.fit() + def test_data_generation(self): + """ + Test of generate_data and fit method of the GaussianModel class + """ + # test data generation: + self.simple_model.data = self.simple_model.generate_data(mu=0, sigma=2) + + def test_data_storage(self): + # test data store+load: toydata_file = 'simple_data.hdf5' - simple_model.store_data(toydata_file, [simple_model.data]) + self.simple_model.data = self.simple_model.generate_data(mu=0, sigma=2) + self.simple_model.store_data(toydata_file, [self.simple_model.data]) + stored_data = inference_interface.toydata_from_file('simple_data.hdf5') + assert self.simple_model.data == stored_data[0] , "Stored data disagrees with data!" + + remove("simple_data.hdf5") + + def test_fit(self): + # test fitting: + self.simple_model.data = self.simple_model.generate_data(mu=0, sigma=2) + hat_meas = self.simple_model.data[0]["hat_mu"] + best_fit, lf = self.simple_model.fit(sigma=2) + hat_fit = best_fit["mu"] + np.testing.assert_almost_equal(hat_meas, hat_fit), "best-fit does not agree" + np.testing.assert_almost_equal(lf, sps.norm(hat_fit, 2).logpdf(hat_meas)) , "likelihood function disagrees" \ No newline at end of file From 264e663ca7cdeff07535124d85479085c3803df9 Mon Sep 17 00:00:00 2001 From: Knut Dundas Moraa Date: Tue, 25 Jul 2023 15:21:06 -0400 Subject: [PATCH 15/19] rd drudgery --- tests/test_gaussian_model.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/tests/test_gaussian_model.py b/tests/test_gaussian_model.py index f2412a25..afe29001 100644 --- a/tests/test_gaussian_model.py +++ b/tests/test_gaussian_model.py @@ -16,9 +16,7 @@ class TestGaussianModel(TestCase): @classmethod def setUp(cls): - """ - Initialise the GaussianModel - """ + # Initialise the GaussianModel parameter_definition = { 'mu': { 'fit_guess': 0., @@ -39,9 +37,6 @@ def setUp(cls): parameter_definition=parameter_definition) def test_data_generation(self): - """ - Test of generate_data and fit method of the GaussianModel class - """ # test data generation: self.simple_model.data = self.simple_model.generate_data(mu=0, sigma=2) @@ -51,7 +46,7 @@ def test_data_storage(self): self.simple_model.data = self.simple_model.generate_data(mu=0, sigma=2) self.simple_model.store_data(toydata_file, [self.simple_model.data]) stored_data = inference_interface.toydata_from_file('simple_data.hdf5') - assert self.simple_model.data == stored_data[0] , "Stored data disagrees with data!" + assert self.simple_model.data == stored_data[0], "Stored data disagrees with data!" remove("simple_data.hdf5") @@ -61,5 +56,5 @@ def test_fit(self): hat_meas = self.simple_model.data[0]["hat_mu"] best_fit, lf = self.simple_model.fit(sigma=2) hat_fit = best_fit["mu"] - np.testing.assert_almost_equal(hat_meas, hat_fit), "best-fit does not agree" - np.testing.assert_almost_equal(lf, sps.norm(hat_fit, 2).logpdf(hat_meas)) , "likelihood function disagrees" \ No newline at end of file + np.testing.assert_almost_equal(hat_meas, hat_fit) + np.testing.assert_almost_equal(lf, sps.norm(hat_fit, 2).logpdf(hat_meas)) From 258a15b7d8ab11fb8fc88422a14c9cd6ea087c0a Mon Sep 17 00:00:00 2001 From: dachengx Date: Wed, 26 Jul 2023 03:51:11 +0800 Subject: [PATCH 16/19] Use setUp instead of __init__ of TestCase --- tests/test_blueice_extended_model.py | 16 +++++---- tests/test_gaussian_model.py | 49 +++++++++++++--------------- tests/test_parameter.py | 10 +++--- 3 files changed, 36 insertions(+), 39 deletions(-) diff --git a/tests/test_blueice_extended_model.py b/tests/test_blueice_extended_model.py index a0897478..4ff09ed3 100644 --- a/tests/test_blueice_extended_model.py +++ b/tests/test_blueice_extended_model.py @@ -8,12 +8,12 @@ class TestBlueiceExtendedModel(TestCase): """Test of the BlueiceExtendedModel class""" - def __init__(self, *args, **kwargs): - """Initialize the BlueiceExtendedModel class""" - super().__init__(*args, **kwargs) - self.config = load_yaml('unbinned_wimp_statistical_model.yaml') - self.n_likelihood_terms = len(self.config['likelihood_config']['likelihood_terms']) - self.set_new_model() + @classmethod + def setUp(cls): + """Initialise the BlueiceExtendedModel instance""" + cls.config = load_yaml('unbinned_wimp_statistical_model.yaml') + cls.n_likelihood_terms = len(cls.config['likelihood_config']['likelihood_terms']) + cls.set_new_model(cls) def set_new_model(self): """Set a new BlueiceExtendedModel instance""" @@ -57,8 +57,10 @@ def test_fit(self): """Test of the fit method""" self.model.data = self.model.generate_data() fit_result, max_llh = self.model.fit() - # TODO: check whether all parameters are in fit_result + # TODO: + # check whether all parameters are in fit_result # and whether fittable parameters are fitted + # and whether the results are in boundaries class TestCustomAncillaryLikelihood(TestCase): diff --git a/tests/test_gaussian_model.py b/tests/test_gaussian_model.py index afe29001..bc636233 100644 --- a/tests/test_gaussian_model.py +++ b/tests/test_gaussian_model.py @@ -1,22 +1,18 @@ -from unittest import TestCase - from os import remove - -import inference_interface - -import numpy as np +from unittest import TestCase import scipy.stats as sps +import inference_interface from alea.models import GaussianModel class TestGaussianModel(TestCase): - """Test of the Parameters class""" + """Test of the GaussianModel class""" @classmethod def setUp(cls): - # Initialise the GaussianModel + """Initialise the GaussianModel instance""" parameter_definition = { 'mu': { 'fit_guess': 0., @@ -33,28 +29,27 @@ def setUp(cls): 'nominal_value': 1., }, } - cls.simple_model = GaussianModel( + cls.model = GaussianModel( parameter_definition=parameter_definition) def test_data_generation(self): - # test data generation: - self.simple_model.data = self.simple_model.generate_data(mu=0, sigma=2) + """Test generation of data""" + self.model.data = self.model.generate_data(mu=0, sigma=2) def test_data_storage(self): - # test data store+load: + """Test storage of data to file and retrieval of data from file""" toydata_file = 'simple_data.hdf5' - self.simple_model.data = self.simple_model.generate_data(mu=0, sigma=2) - self.simple_model.store_data(toydata_file, [self.simple_model.data]) - stored_data = inference_interface.toydata_from_file('simple_data.hdf5') - assert self.simple_model.data == stored_data[0], "Stored data disagrees with data!" - - remove("simple_data.hdf5") - - def test_fit(self): - # test fitting: - self.simple_model.data = self.simple_model.generate_data(mu=0, sigma=2) - hat_meas = self.simple_model.data[0]["hat_mu"] - best_fit, lf = self.simple_model.fit(sigma=2) - hat_fit = best_fit["mu"] - np.testing.assert_almost_equal(hat_meas, hat_fit) - np.testing.assert_almost_equal(lf, sps.norm(hat_fit, 2).logpdf(hat_meas)) + self.model.data = self.model.generate_data(mu=0, sigma=2) + self.model.store_data(toydata_file, [self.model.data]) + stored_data = inference_interface.toydata_from_file(toydata_file) + assert self.model.data == stored_data[0], 'Stored data disagrees with data!' + remove(toydata_file) + + def test_fit_result(self): + """Test fitting of data""" + self.model.data = self.model.generate_data(mu=0, sigma=2) + hat_meas = self.model.data[0]['hat_mu'].item() + best_fit, lf = self.model.fit(sigma=2) + hat_fit = best_fit['mu'] + self.assertAlmostEqual(hat_meas, hat_fit) + self.assertAlmostEqual(lf, sps.norm(hat_fit, 2).logpdf(hat_meas)) diff --git a/tests/test_parameter.py b/tests/test_parameter.py index 876788f4..de2ff26c 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -8,11 +8,11 @@ class TestParameters(TestCase): """Test of the Parameters class""" - def __init__(self, *args, **kwargs): - """Initialize the BlueiceExtendedModel class""" - super().__init__(*args, **kwargs) - self.config = load_yaml('unbinned_wimp_statistical_model.yaml') - self.parameters = Parameters.from_config(self.config['parameter_definition']) + @classmethod + def setUp(cls): + """Initialise the Parameters instance""" + config = load_yaml('unbinned_wimp_statistical_model.yaml') + cls.parameters = Parameters.from_config(config['parameter_definition']) def test_deep_copyable(self): """Test of whether Parameters instance can be deepcopied""" From 7da8d53e6933e834cae75dbf6059c1cbc45c9b9a Mon Sep 17 00:00:00 2001 From: dachengx Date: Thu, 27 Jul 2023 00:02:38 +0800 Subject: [PATCH 17/19] Remove url_base, add get_template_folder_list function --- alea/__init__.py | 7 +------ alea/models/__init__.py | 2 -- alea/models/blueice_extended_model.py | 12 ++---------- alea/utils.py | 25 +++++++++++++++++++++---- 4 files changed, 24 insertions(+), 22 deletions(-) diff --git a/alea/__init__.py b/alea/__init__.py index f008bb17..db25e2bf 100644 --- a/alea/__init__.py +++ b/alea/__init__.py @@ -1,18 +1,13 @@ __version__ = '0.0.0' -from . import model from .model import * -from . import models +from .models import * -from . import utils from .utils import * -from . import parameters from .parameters import * -from . import simulators from .simulators import * -from . import template_source from .template_source import * diff --git a/alea/models/__init__.py b/alea/models/__init__.py index 530eeafa..17b4e8a1 100644 --- a/alea/models/__init__.py +++ b/alea/models/__init__.py @@ -1,5 +1,3 @@ -from . import gaussian_model from .gaussian_model import * -from . import blueice_extended_model from .blueice_extended_model import * diff --git a/alea/models/blueice_extended_model.py b/alea/models/blueice_extended_model.py index ddfe47f4..b63070c7 100644 --- a/alea/models/blueice_extended_model.py +++ b/alea/models/blueice_extended_model.py @@ -11,7 +11,7 @@ from alea.model import StatisticalModel from alea.parameters import Parameters from alea.simulators import BlueiceDataGenerator -from alea.utils import adapt_likelihood_config_for_blueice +from alea.utils import adapt_likelihood_config_for_blueice, get_template_folder_list class BlueiceExtendedModel(StatisticalModel): @@ -107,15 +107,7 @@ def _build_ll_from_config(self, likelihood_config: dict) -> "LogLikelihoodSum": """ lls = [] - if "template_folder" not in likelihood_config: - likelihood_config["template_folder"] = [] - if isinstance(likelihood_config["template_folder"], str): - template_folder_list = [likelihood_config["template_folder"]] - elif isinstance(likelihood_config["template_folder"], list): - template_folder_list = likelihood_config["template_folder"] - else: - raise ValueError( - "template_folder must be either a string or a list of strings.") + template_folder_list = get_template_folder_list(likelihood_config) # Iterate through each likelihood term in the configuration for config in likelihood_config["likelihood_terms"]: diff --git a/alea/utils.py b/alea/utils.py index 631c87f0..0dc2d30a 100644 --- a/alea/utils.py +++ b/alea/utils.py @@ -3,10 +3,12 @@ import pkg_resources from copy import deepcopy from pydoc import locate -from warnings import warn +import logging import numpy as np +logging.basicConfig(level=logging.INFO) + def get_analysis_space(analysis_space: dict) -> list: eval_analysis_space = [] @@ -81,7 +83,7 @@ def get_file_path(fname, folder_list=None): Try 5 methods in the following order #. fname begin with '/', return absolute path - #. url_base begin with '/', return url_base + name + #. folder begin with '/', return folder + name #. can get file from _get_abspath, return alea internal file path #. can be found in local installed ntauxfiles, return ntauxfiles absolute path #. can be downloaded from MongoDB, download and return cached path @@ -94,12 +96,12 @@ def get_file_path(fname, folder_list=None): return fname # 2. From local folder - # Use url_base as prefix + # Use folder as prefix for folder in folder_list: if folder.startswith('/'): fpath = os.path.join(folder, fname) if os.path.exists(fpath): - warn(f'Load {fname} successfully from {fpath}') + logging.info(f'Load {fname} successfully from {fpath}') return fpath # 3. From alea internal files @@ -110,3 +112,18 @@ def get_file_path(fname, folder_list=None): # raise error when can not find corresponding file raise RuntimeError(f'Can not find {fname}, please check your file system') + + +def get_template_folder_list(likelihood_config): + """Get a list of template_folder from likelihood_config""" + if "template_folder" not in likelihood_config: + # return empty list if template_folder is not specified + likelihood_config["template_folder"] = [] + if isinstance(likelihood_config["template_folder"], str): + template_folder_list = [likelihood_config["template_folder"]] + elif isinstance(likelihood_config["template_folder"], list): + template_folder_list = likelihood_config["template_folder"] + else: + raise ValueError( + "template_folder must be either a string or a list of strings.") + return template_folder_list From 0250340a58b33d2487c2bd4b8767d01f0bc68fc8 Mon Sep 17 00:00:00 2001 From: dachengx Date: Thu, 27 Jul 2023 00:25:17 +0800 Subject: [PATCH 18/19] Add TODO comment on get_expectation_values --- alea/models/blueice_extended_model.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/alea/models/blueice_extended_model.py b/alea/models/blueice_extended_model.py index b63070c7..c3afd803 100644 --- a/alea/models/blueice_extended_model.py +++ b/alea/models/blueice_extended_model.py @@ -80,6 +80,9 @@ def get_expectation_values(self, **kwargs) -> dict: """ Return total expectation values (summed over all likelihood terms with the same name) given a number of named parameters (kwargs) + TODO: current implementation is not elegant + because it calls the ll and requires data to be set. + We should update this function in the future after we stop using blueice. """ ret = dict() From 34d1c4484eabcb00640b2d338107d21ffe4cb590 Mon Sep 17 00:00:00 2001 From: dachengx Date: Thu, 27 Jul 2023 00:28:40 +0800 Subject: [PATCH 19/19] Minor change --- alea/models/blueice_extended_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/alea/models/blueice_extended_model.py b/alea/models/blueice_extended_model.py index c3afd803..0951f083 100644 --- a/alea/models/blueice_extended_model.py +++ b/alea/models/blueice_extended_model.py @@ -80,8 +80,8 @@ def get_expectation_values(self, **kwargs) -> dict: """ Return total expectation values (summed over all likelihood terms with the same name) given a number of named parameters (kwargs) - TODO: current implementation is not elegant - because it calls the ll and requires data to be set. + TODO: Current implementation is not elegant + because it calls the ll and requires data to be set. We should update this function in the future after we stop using blueice. """ ret = dict()