From aaa2ebe1240d2b95e159697c9aac2f5336c11aa2 Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Fri, 22 Mar 2024 10:26:44 +0000 Subject: [PATCH 01/22] first refactor, placing samples summary in result --- autofit/non_linear/analysis/analysis.py | 3 +- autofit/non_linear/result.py | 15 ++++--- autofit/non_linear/search/abstract_search.py | 44 +++++++++++++------- 3 files changed, 41 insertions(+), 21 deletions(-) diff --git a/autofit/non_linear/analysis/analysis.py b/autofit/non_linear/analysis/analysis.py index 73219f883..8d3ebe0d6 100644 --- a/autofit/non_linear/analysis/analysis.py +++ b/autofit/non_linear/analysis/analysis.py @@ -171,8 +171,9 @@ def modify_after_fit( """ return self - def make_result(self, samples, search_internal=None): + def make_result(self, samples_summary, samples, search_internal=None): return Result( + samples_summary=samples_summary, samples=samples, latent_variables=self.latent_variables, search_internal=search_internal, diff --git a/autofit/non_linear/result.py b/autofit/non_linear/result.py index 0fa9d1826..806a6f02f 100644 --- a/autofit/non_linear/result.py +++ b/autofit/non_linear/result.py @@ -42,9 +42,11 @@ def summary(self): class AbstractResult(ABC): + @property - def sigma(self): - return self.samples.sigma + @abstractmethod + def samples_summary(self): + pass @property @abstractmethod @@ -89,7 +91,7 @@ def log_likelihood(self): @property def instance(self): try: - return self.samples.instance + return self.samples_summary.instance except AttributeError as e: logging.warning(e) return None @@ -167,7 +169,7 @@ def model_bounded(self, b: float) -> AbstractPriorModel: class Result(AbstractResult): - def __init__(self, samples: Samples, search_internal = None, latent_variables=None): + def __init__(self, samples_summary, samples: Samples, search_internal = None, latent_variables=None): """ The result of a non-linear search, which includes: @@ -187,10 +189,11 @@ def __init__(self, samples: Samples, search_internal = None, latent_variables=No search_internal The non-linear search used to perform the model fit in its internal format. """ - self._samples = samples - self.latent_variables = latent_variables + self._samples_summary = samples_summary + self._samples = samples self.search_internal = search_internal + self.latent_variables = latent_variables self.__model = None diff --git a/autofit/non_linear/search/abstract_search.py b/autofit/non_linear/search/abstract_search.py index 45b26c24b..22386e57e 100644 --- a/autofit/non_linear/search/abstract_search.py +++ b/autofit/non_linear/search/abstract_search.py @@ -721,24 +721,40 @@ def result_via_completed_fit( """ model.freeze() - try: - samples = self.samples_from(model=model) - except FileNotFoundError: - samples = None - try: - search_internal = self.backend - except (AttributeError, FileNotFoundError): - search_internal = None + from autoconf.dictable import from_json + from autofit.non_linear.samples.summary import SamplesSummary - if search_internal is None: - try: - search_internal = self.paths.load_search_internal() - except FileNotFoundError: - search_internal = None + samples_summary = from_json( + file_path=self.paths._path_for_json("samples_summary") + ) + + print(samples_summary) +# dd + + # try: + # samples = self.samples_from(model=model) + # except FileNotFoundError: + # samples = None + # + # try: + # search_internal = self.backend + # except (AttributeError, FileNotFoundError): + # search_internal = None + # + # if search_internal is None: + # try: + # search_internal = self.paths.load_search_internal() + # except FileNotFoundError: + # search_internal = None + + samples = None + search_internal = None result = analysis.make_result( - samples=samples, search_internal=search_internal + samples_summary=samples_summary, + samples=samples, + search_internal=search_internal ) if self.is_master: From 73e9df6aa6b26922ce16f3aebf7578142db9235a Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Fri, 22 Mar 2024 10:28:44 +0000 Subject: [PATCH 02/22] nows uses samples summary max likelihood --- autofit/non_linear/result.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autofit/non_linear/result.py b/autofit/non_linear/result.py index 806a6f02f..a74d3fc63 100644 --- a/autofit/non_linear/result.py +++ b/autofit/non_linear/result.py @@ -86,7 +86,7 @@ def __lt__(self, other): @property def log_likelihood(self): - return max(self.samples.log_likelihood_list) + return self.samples_summary.max_log_likelihood_sample.log_likelihood @property def instance(self): From c3975b487c1d9ad4ce4428df1667a980aa68aaf0 Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Fri, 22 Mar 2024 10:32:37 +0000 Subject: [PATCH 03/22] median PDF added to samples summary and used for prior linking --- autofit/non_linear/samples/interface.py | 5 +++++ autofit/non_linear/samples/samples.py | 3 ++- autofit/non_linear/samples/summary.py | 5 ++++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/autofit/non_linear/samples/interface.py b/autofit/non_linear/samples/interface.py index 6ed0e848a..2f02223e7 100644 --- a/autofit/non_linear/samples/interface.py +++ b/autofit/non_linear/samples/interface.py @@ -175,6 +175,11 @@ def model_bounded(self, b: float) -> AbstractPriorModel: def _instance_from_vector(self, vector: List[float]) -> ModelInstance: return self.model.instance_from_vector(vector=vector, ignore_prior_limits=True) + @property + @abstractmethod + def median_pdf(self): + pass + @property def prior_means(self) -> [List]: """ diff --git a/autofit/non_linear/samples/samples.py b/autofit/non_linear/samples/samples.py index 08cc1d8b8..0e6bc83c3 100644 --- a/autofit/non_linear/samples/samples.py +++ b/autofit/non_linear/samples/samples.py @@ -130,8 +130,9 @@ def from_list_info_and_model( def summary(self): return SamplesSummary( - max_log_likelihood_sample=self.max_log_likelihood_sample, model=self.model, + max_log_likelihood_sample=self.max_log_likelihood_sample, + median_pdf=self.median_pdf, ) def __add__(self, other: "Samples") -> "Samples": diff --git a/autofit/non_linear/samples/summary.py b/autofit/non_linear/samples/summary.py index 400a70910..dfcaea030 100644 --- a/autofit/non_linear/samples/summary.py +++ b/autofit/non_linear/samples/summary.py @@ -14,8 +14,9 @@ class SamplesSummary(SamplesInterface): def __init__( self, - max_log_likelihood_sample: Sample, model: AbstractPriorModel, + max_log_likelihood_sample: Sample, + median_pdf : Optional[Sample] = None, covariance_matrix: Optional[np.ndarray] = None, log_evidence: Optional[float] = None, ): @@ -32,7 +33,9 @@ def __init__( The covariance matrix of the samples """ super().__init__(model=model) + self._max_log_likelihood_sample = max_log_likelihood_sample + self.median_pdf = median_pdf self.covariance_matrix = covariance_matrix self._log_evidence = log_evidence self.derived_summary = None From ad888c0e6a4593c704e202bb4d76e9809fa514a4 Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Fri, 22 Mar 2024 10:33:16 +0000 Subject: [PATCH 04/22] remove covariance matrix --- autofit/non_linear/samples/summary.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/autofit/non_linear/samples/summary.py b/autofit/non_linear/samples/summary.py index dfcaea030..ea8a80a53 100644 --- a/autofit/non_linear/samples/summary.py +++ b/autofit/non_linear/samples/summary.py @@ -17,7 +17,6 @@ def __init__( model: AbstractPriorModel, max_log_likelihood_sample: Sample, median_pdf : Optional[Sample] = None, - covariance_matrix: Optional[np.ndarray] = None, log_evidence: Optional[float] = None, ): """ @@ -25,18 +24,17 @@ def __init__( Parameters ---------- - max_log_likelihood_sample - The parameters from a non-linear search that gave the highest likelihood model A model used to map the samples to physical values - covariance_matrix - The covariance matrix of the samples + max_log_likelihood_sample + The parameters from a non-linear search that gave the highest likelihood + median_pdf + The median PDF of the samples which are used for prior linking via the search chaining API. """ super().__init__(model=model) self._max_log_likelihood_sample = max_log_likelihood_sample self.median_pdf = median_pdf - self.covariance_matrix = covariance_matrix self._log_evidence = log_evidence self.derived_summary = None From 9eac1afbfabf704ae4caf57818e7d1dfabafc8e6 Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Fri, 22 Mar 2024 10:33:37 +0000 Subject: [PATCH 05/22] fix median pdf --- autofit/non_linear/samples/summary.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/autofit/non_linear/samples/summary.py b/autofit/non_linear/samples/summary.py index ea8a80a53..2b83b67c8 100644 --- a/autofit/non_linear/samples/summary.py +++ b/autofit/non_linear/samples/summary.py @@ -34,7 +34,7 @@ def __init__( super().__init__(model=model) self._max_log_likelihood_sample = max_log_likelihood_sample - self.median_pdf = median_pdf + self._median_pdf = median_pdf self._log_evidence = log_evidence self.derived_summary = None @@ -42,6 +42,10 @@ def __init__( def max_log_likelihood_sample(self): return self._max_log_likelihood_sample + @property + def median_pdf(self): + return self._median_pdf + @property def log_evidence(self): return self._log_evidence From e3d2112066cdc250d6f764e9703f846bad55c1b0 Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Fri, 22 Mar 2024 10:35:20 +0000 Subject: [PATCH 06/22] result now only uses samples summary for prior linking --- autofit/non_linear/result.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/autofit/non_linear/result.py b/autofit/non_linear/result.py index a74d3fc63..30b03a2c4 100644 --- a/autofit/non_linear/result.py +++ b/autofit/non_linear/result.py @@ -120,7 +120,7 @@ def model_absolute(self, a: float) -> AbstractPriorModel: A model mapper created by taking results from this search and creating priors with the defined absolute width. """ - return self.samples.model_absolute(a) + return self.samples_summary.model_absolute(a) def model_relative(self, r: float) -> AbstractPriorModel: """ @@ -143,7 +143,7 @@ def model_relative(self, r: float) -> AbstractPriorModel: A model mapper created by taking results from this search and creating priors with the defined relative width. """ - return self.samples.model_relative(r) + return self.samples_summary.model_relative(r) def model_bounded(self, b: float) -> AbstractPriorModel: """ @@ -165,7 +165,7 @@ def model_bounded(self, b: float) -> AbstractPriorModel: A model mapper created by taking results from this search and creating priors with the defined bounded uniform priors. """ - return self.samples.model_bounded(b) + return self.samples_summary.model_bounded(b) class Result(AbstractResult): @@ -204,7 +204,8 @@ def dict(self) -> dict: Human-readable dictionary representation of the results """ return { - "max_log_likelihood": self.samples.max_log_likelihood_sample.model_dict(), + "max_log_likelihood": self.samples_summary.max_log_likelihood_sample.model_dict(), + "median pdf": self.samples_summary.median_pdf.model_dict(), } @property @@ -231,8 +232,8 @@ def projected_model(self) -> AbstractPriorModel: @property def model(self): if self.__model is None: - self.__model = self.samples.model.mapper_from_prior_means( - means=self.samples.prior_means + self.__model = self.model.mapper_from_prior_means( + means=self.samples_summary.prior_means ) return self.__model From 01cfbe5b28faca43b18a28c14d77bb5c49737b65 Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Fri, 22 Mar 2024 10:39:03 +0000 Subject: [PATCH 07/22] remove sky save samples --- autofit/non_linear/search/abstract_search.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/autofit/non_linear/search/abstract_search.py b/autofit/non_linear/search/abstract_search.py index 22386e57e..ac4753479 100644 --- a/autofit/non_linear/search/abstract_search.py +++ b/autofit/non_linear/search/abstract_search.py @@ -156,11 +156,6 @@ def __init__( self.force_pickle_overwrite = conf.instance["general"]["output"][ "force_pickle_overwrite" ] - self.skip_save_samples = kwargs.get("skip_save_samples") - if self.skip_save_samples is None: - self.skip_save_samples = conf.instance["general"]["output"].get( - "skip_save_samples" - ) self.force_visualize_overwrite = conf.instance["general"]["output"][ "force_visualize_overwrite" @@ -771,8 +766,7 @@ def result_via_completed_fit( if self.force_pickle_overwrite: self.logger.info("Forcing pickle overwrite") - if not self.skip_save_samples: - self.paths.save_json("samples_summary", to_dict(samples.summary())) + self.paths.save_json("samples_summary", to_dict(samples.summary())) analysis.save_results(paths=self.paths, result=result) analysis.save_results_combined(paths=self.paths, result=result) @@ -967,8 +961,7 @@ def perform_update( samples=samples, ) - if not self.skip_save_samples: - self.paths.save_json("samples_summary", to_dict(samples.summary())) + self.paths.save_json("samples_summary", to_dict(samples.summary())) self.perform_visualization( model=model, From 56a2ef937398c23134578180311dab50d4e0ab75 Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Fri, 22 Mar 2024 11:20:51 +0000 Subject: [PATCH 08/22] fix creation of Samples Summary so it doesnt use covariance matrix --- autofit/__init__.py | 2 + autofit/non_linear/analysis/analysis.py | 5 +- autofit/non_linear/result.py | 58 +++++++++++++++++--- autofit/non_linear/samples/pdf.py | 8 +-- autofit/non_linear/search/abstract_search.py | 36 +++--------- 5 files changed, 64 insertions(+), 45 deletions(-) diff --git a/autofit/__init__.py b/autofit/__init__.py index f110bd6f2..46148dc48 100644 --- a/autofit/__init__.py +++ b/autofit/__init__.py @@ -16,6 +16,7 @@ from .graphical.declarative.factor.hierarchical import HierarchicalFactor from .graphical.laplace import LaplaceOptimiser from .non_linear.grid.grid_list import GridList +from .non_linear.samples.summary import SamplesSummary from .non_linear.samples import SamplesMCMC from .non_linear.samples import SamplesNest from .non_linear.samples import Samples @@ -82,6 +83,7 @@ from .non_linear.search.optimize.lbfgs.search import LBFGS from .non_linear.search.optimize.pyswarms.search.globe import PySwarmsGlobal from .non_linear.search.optimize.pyswarms.search.local import PySwarmsLocal +from .non_linear.paths.abstract import AbstractPaths from .non_linear.paths import DirectoryPaths from .non_linear.paths import DatabasePaths from .non_linear.result import Result diff --git a/autofit/non_linear/analysis/analysis.py b/autofit/non_linear/analysis/analysis.py index 8d3ebe0d6..bcb24d596 100644 --- a/autofit/non_linear/analysis/analysis.py +++ b/autofit/non_linear/analysis/analysis.py @@ -171,12 +171,13 @@ def modify_after_fit( """ return self - def make_result(self, samples_summary, samples, search_internal=None): + def make_result(self, samples_summary, paths, samples, search_internal=None): return Result( samples_summary=samples_summary, + paths=paths, samples=samples, - latent_variables=self.latent_variables, search_internal=search_internal, + latent_variables=self.latent_variables, ) def profile_log_likelihood_function(self, paths: AbstractPaths, instance): diff --git a/autofit/non_linear/result.py b/autofit/non_linear/result.py index 30b03a2c4..f409dd8fe 100644 --- a/autofit/non_linear/result.py +++ b/autofit/non_linear/result.py @@ -1,16 +1,20 @@ import logging from abc import ABC, abstractmethod import numpy as np +from typing import Optional from autoconf import conf from autofit import exc from autofit.mapper.prior_model.abstract import AbstractPriorModel +from autofit.non_linear.paths.abstract import AbstractPaths from autofit.non_linear.samples import Samples +from autofit.non_linear.samples.summary import SamplesSummary from autofit.text import text_util class Placeholder: + def __getattr__(self, item): """ Placeholders return None to represent the missing result's value @@ -43,10 +47,10 @@ def summary(self): class AbstractResult(ABC): - @property - @abstractmethod - def samples_summary(self): - pass + def __init__(self, samples_summary, paths): + + self.samples_summary = samples_summary + self.paths = paths @property @abstractmethod @@ -169,7 +173,14 @@ def model_bounded(self, b: float) -> AbstractPriorModel: class Result(AbstractResult): - def __init__(self, samples_summary, samples: Samples, search_internal = None, latent_variables=None): + def __init__( + self, + samples_summary : SamplesSummary, + paths : AbstractPaths, + samples: Optional[Samples] = None, + search_internal : Optional[object] = None, + latent_variables=None + ): """ The result of a non-linear search, which includes: @@ -189,10 +200,14 @@ def __init__(self, samples_summary, samples: Samples, search_internal = None, la search_internal The non-linear search used to perform the model fit in its internal format. """ - self._samples_summary = samples_summary + super().__init__( + samples_summary=samples_summary, + paths=paths + ) self._samples = samples - self.search_internal = search_internal + self._search_internal = search_internal + self.latent_variables = latent_variables self.__model = None @@ -210,7 +225,34 @@ def dict(self) -> dict: @property def samples(self): - return self._samples + + if self._samples is not None: + return self._samples + + try: + Samples.from_csv( + paths=self.paths, + model=self.model, + ) + except FileNotFoundError: + pass + + @property + def search_internal(self): + + if self._search_internal is not None: + return self._search_internal + + # + # try: + # search_internal = self.backend + # except (AttributeError, FileNotFoundError): + # search_internal = None + + try: + return self.paths.load_search_internal() + except FileNotFoundError: + pass @property def projected_model(self) -> AbstractPriorModel: diff --git a/autofit/non_linear/samples/pdf.py b/autofit/non_linear/samples/pdf.py index f6fbae2fa..cd6e49f8b 100644 --- a/autofit/non_linear/samples/pdf.py +++ b/autofit/non_linear/samples/pdf.py @@ -50,15 +50,11 @@ def __init__( ) def summary(self): - try: - covariance_matrix = self.covariance_matrix - except Exception as e: - logging.warning(f"Could not create covariance matrix: {e}") - covariance_matrix = None + return SamplesSummary( max_log_likelihood_sample=self.max_log_likelihood_sample, + median_pdf=self.median_pdf(as_instance=True), model=self.model, - covariance_matrix=covariance_matrix, log_evidence=self.log_evidence, ) diff --git a/autofit/non_linear/search/abstract_search.py b/autofit/non_linear/search/abstract_search.py index ac4753479..bada527a5 100644 --- a/autofit/non_linear/search/abstract_search.py +++ b/autofit/non_linear/search/abstract_search.py @@ -669,7 +669,10 @@ def start_resume_fit(self, analysis: Analysis, model: AbstractPriorModel) -> Res ) result = analysis.make_result( - samples=samples, search_internal=search_internal + samples_summary=samples.summary(), + paths=self.paths, + samples=samples, + search_internal=search_internal ) if self.is_master: @@ -717,39 +720,14 @@ def result_via_completed_fit( model.freeze() - from autoconf.dictable import from_json - from autofit.non_linear.samples.summary import SamplesSummary - - samples_summary = from_json( - file_path=self.paths._path_for_json("samples_summary") - ) + samples_summary = self.paths.load_json(name="samples_summary") print(samples_summary) -# dd - - # try: - # samples = self.samples_from(model=model) - # except FileNotFoundError: - # samples = None - # - # try: - # search_internal = self.backend - # except (AttributeError, FileNotFoundError): - # search_internal = None - # - # if search_internal is None: - # try: - # search_internal = self.paths.load_search_internal() - # except FileNotFoundError: - # search_internal = None - - samples = None - search_internal = None + dd result = analysis.make_result( samples_summary=samples_summary, - samples=samples, - search_internal=search_internal + paths=self.paths ) if self.is_master: From a3d4b3fea623eb1924d5eac216d938c4e14d5409 Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Fri, 22 Mar 2024 13:21:03 +0000 Subject: [PATCH 09/22] samples summary no longer has model --- autofit/non_linear/analysis/analysis.py | 10 +++- autofit/non_linear/paths/abstract.py | 11 +++++ autofit/non_linear/paths/directory.py | 20 ++++++-- autofit/non_linear/result.py | 5 +- autofit/non_linear/samples/interface.py | 5 -- autofit/non_linear/samples/pdf.py | 12 ++++- autofit/non_linear/samples/samples.py | 11 ++++- autofit/non_linear/samples/summary.py | 27 ++++++++--- autofit/non_linear/search/abstract_search.py | 51 ++++++++++---------- 9 files changed, 105 insertions(+), 47 deletions(-) diff --git a/autofit/non_linear/analysis/analysis.py b/autofit/non_linear/analysis/analysis.py index bcb24d596..a3d136b96 100644 --- a/autofit/non_linear/analysis/analysis.py +++ b/autofit/non_linear/analysis/analysis.py @@ -10,6 +10,8 @@ from autofit.non_linear.paths.abstract import AbstractPaths from autofit.non_linear.paths.database import DatabasePaths from autofit.non_linear.paths.null import NullPaths +from autofit.non_linear.samples.summary import SamplesSummary +from autofit.non_linear.samples.pdf import SamplesPDF from autofit.non_linear.result import Result logger = logging.getLogger(__name__) @@ -171,7 +173,13 @@ def modify_after_fit( """ return self - def make_result(self, samples_summary, paths, samples, search_internal=None): + def make_result( + self, + samples_summary: SamplesSummary, + paths: AbstractPaths, + samples: Optional[SamplesPDF] = None, + search_internal: Optional[object] = None, + ) -> Result: return Result( samples_summary=samples_summary, paths=paths, diff --git a/autofit/non_linear/paths/abstract.py b/autofit/non_linear/paths/abstract.py index b66ca84fb..f81c18862 100644 --- a/autofit/non_linear/paths/abstract.py +++ b/autofit/non_linear/paths/abstract.py @@ -13,6 +13,7 @@ from autoconf import conf from autofit.mapper.identifier import Identifier, IdentifierField +from autofit.non_linear.samples.summary import SamplesSummary from autofit.text import text_util from autofit.tools.util import open_, zip_directory @@ -421,6 +422,16 @@ def save_samples(self, samples): Save samples to the database """ + def save_samples_summary(self, samples_summary : SamplesSummary): + """ + Save samples summary to the database. + """ + + def load_samples_summary(self): + """ + Load samples summary from the database. + """ + @abstractmethod def save_latent_variables(self, latent_variables, samples): """ diff --git a/autofit/non_linear/paths/directory.py b/autofit/non_linear/paths/directory.py index 375129a14..64e18f2f1 100644 --- a/autofit/non_linear/paths/directory.py +++ b/autofit/non_linear/paths/directory.py @@ -10,19 +10,18 @@ import logging from autoconf import conf -from autoconf.dictable import to_dict +from autoconf.dictable import to_dict, from_dict from autoconf.output import conditional_output, should_output from autofit.text import formatter from autofit.tools.util import open_ from .abstract import AbstractPaths -# from ..analysis.latent_variables import LatentVariables from ..samples import load_from_table from autofit.non_linear.samples.pdf import SamplesPDF +from autofit.non_linear.samples.summary import SamplesSummary import numpy as np -from autofit.non_linear.samples.samples import Samples from autofit.text.formatter import write_table from ...visualise import VisualiseGraph @@ -248,6 +247,21 @@ def save_samples(self, samples): f"Could not save covariance matrix because of the following error:\n{e}" ) + def save_samples_summary(self, samples_summary : SamplesSummary): + + model = samples_summary.model + + samples_summary.model = None + self.save_json("samples_summary", to_dict(samples_summary)) + samples_summary.model = model + + def load_samples_summary(self) -> SamplesSummary: + + samples_summary = from_dict(self.load_json(name="samples_summary")) + samples_summary.model = self.model + + return samples_summary + def save_latent_variables( self, latent_variables, diff --git a/autofit/non_linear/result.py b/autofit/non_linear/result.py index f409dd8fe..2e807a45c 100644 --- a/autofit/non_linear/result.py +++ b/autofit/non_linear/result.py @@ -3,8 +3,6 @@ import numpy as np from typing import Optional -from autoconf import conf - from autofit import exc from autofit.mapper.prior_model.abstract import AbstractPriorModel from autofit.non_linear.paths.abstract import AbstractPaths @@ -94,6 +92,7 @@ def log_likelihood(self): @property def instance(self): + try: return self.samples_summary.instance except AttributeError as e: @@ -274,7 +273,7 @@ def projected_model(self) -> AbstractPriorModel: @property def model(self): if self.__model is None: - self.__model = self.model.mapper_from_prior_means( + self.__model = self.samples_summary.model.mapper_from_prior_means( means=self.samples_summary.prior_means ) diff --git a/autofit/non_linear/samples/interface.py b/autofit/non_linear/samples/interface.py index 2f02223e7..6ed0e848a 100644 --- a/autofit/non_linear/samples/interface.py +++ b/autofit/non_linear/samples/interface.py @@ -175,11 +175,6 @@ def model_bounded(self, b: float) -> AbstractPriorModel: def _instance_from_vector(self, vector: List[float]) -> ModelInstance: return self.model.instance_from_vector(vector=vector, ignore_prior_limits=True) - @property - @abstractmethod - def median_pdf(self): - pass - @property def prior_means(self) -> [List]: """ diff --git a/autofit/non_linear/samples/pdf.py b/autofit/non_linear/samples/pdf.py index cd6e49f8b..69a3c4b3b 100644 --- a/autofit/non_linear/samples/pdf.py +++ b/autofit/non_linear/samples/pdf.py @@ -51,10 +51,18 @@ def __init__( def summary(self): + median_pdf_sample = Sample.from_lists( + model=self.model, + parameter_lists=[self.median_pdf(as_instance=False)], + log_likelihood_list=[self.max_log_likelihood_sample.log_likelihood], + log_prior_list=[self.max_log_likelihood_sample.log_prior], + weight_list=[self.max_log_likelihood_sample.weight], + )[0] + return SamplesSummary( - max_log_likelihood_sample=self.max_log_likelihood_sample, - median_pdf=self.median_pdf(as_instance=True), model=self.model, + max_log_likelihood_sample=self.max_log_likelihood_sample, + median_pdf_sample=median_pdf_sample, log_evidence=self.log_evidence, ) diff --git a/autofit/non_linear/samples/samples.py b/autofit/non_linear/samples/samples.py index 0e6bc83c3..7d63d98de 100644 --- a/autofit/non_linear/samples/samples.py +++ b/autofit/non_linear/samples/samples.py @@ -129,10 +129,19 @@ def from_list_info_and_model( ) def summary(self): + + median_pdf_sample = Sample.from_lists( + model=self.model, + parameter_lists=[self.median_pdf(as_instance=False)], + log_likelihood_list=[self.max_log_likelihood_sample.log_likelihood], + log_prior_list=[self.max_log_likelihood_sample.log_prior], + weight_list=[self.max_log_likelihood_sample.weight], + )[0] + return SamplesSummary( model=self.model, max_log_likelihood_sample=self.max_log_likelihood_sample, - median_pdf=self.median_pdf, + median_pdf_sample=median_pdf_sample, ) def __add__(self, other: "Samples") -> "Samples": diff --git a/autofit/non_linear/samples/summary.py b/autofit/non_linear/samples/summary.py index 2b83b67c8..fdd14fff7 100644 --- a/autofit/non_linear/samples/summary.py +++ b/autofit/non_linear/samples/summary.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import List, Optional import logging import numpy as np @@ -7,6 +7,8 @@ from autofit.mapper.prior_model.abstract import AbstractPriorModel from .sample import Sample +from autofit.non_linear.samples.interface import to_instance + logger = logging.getLogger(__name__) @@ -16,7 +18,7 @@ def __init__( self, model: AbstractPriorModel, max_log_likelihood_sample: Sample, - median_pdf : Optional[Sample] = None, + median_pdf_sample : Optional[Sample] = None, log_evidence: Optional[float] = None, ): """ @@ -28,13 +30,13 @@ def __init__( A model used to map the samples to physical values max_log_likelihood_sample The parameters from a non-linear search that gave the highest likelihood - median_pdf + median_pdf_sample The median PDF of the samples which are used for prior linking via the search chaining API. """ super().__init__(model=model) self._max_log_likelihood_sample = max_log_likelihood_sample - self._median_pdf = median_pdf + self._median_pdf_sample = median_pdf_sample self._log_evidence = log_evidence self.derived_summary = None @@ -43,8 +45,21 @@ def max_log_likelihood_sample(self): return self._max_log_likelihood_sample @property - def median_pdf(self): - return self._median_pdf + def median_pdf_sample(self): + return self._median_pdf_sample + + @to_instance + def median_pdf(self, as_instance: bool = True) -> List[float]: + """ + The parameters of the maximum log likelihood sample of the `NonLinearSearch` returned as a model instance or + list of values. + """ + + sample = self.median_pdf_sample + + return sample.parameter_lists_for_paths( + self.paths if sample.is_path_kwargs else self.names + ) @property def log_evidence(self): diff --git a/autofit/non_linear/search/abstract_search.py b/autofit/non_linear/search/abstract_search.py index bada527a5..12183e127 100644 --- a/autofit/non_linear/search/abstract_search.py +++ b/autofit/non_linear/search/abstract_search.py @@ -11,7 +11,7 @@ from typing import Optional, Union, Tuple, List, Dict from autoconf import conf, cached_property -from autoconf.dictable import to_dict +from autoconf.dictable import to_dict, from_dict from autofit import exc, jax_wrapper from autofit.database.sqlalchemy_ import sa from autofit.graphical import ( @@ -31,6 +31,7 @@ from autofit.non_linear.paths.directory import DirectoryPaths from autofit.non_linear.paths.sub_directory_paths import SubDirectoryPaths from autofit.non_linear.samples.samples import Samples +from autofit.non_linear.samples.summary import SamplesSummary from autofit.non_linear.result import Result from autofit.non_linear.timer import Timer from autofit.non_linear.analysis import Analysis @@ -719,11 +720,7 @@ def result_via_completed_fit( """ model.freeze() - - samples_summary = self.paths.load_json(name="samples_summary") - - print(samples_summary) - dd + samples_summary = self.paths.load_samples_summary() result = analysis.make_result( samples_summary=samples_summary, @@ -737,15 +734,13 @@ def result_via_completed_fit( self.perform_visualization( model=model, analysis=analysis, - search_internal=search_internal, + samples_summary=samples_summary, during_analysis=False, ) if self.force_pickle_overwrite: self.logger.info("Forcing pickle overwrite") - self.paths.save_json("samples_summary", to_dict(samples.summary())) - analysis.save_results(paths=self.paths, result=result) analysis.save_results_combined(paths=self.paths, result=result) @@ -906,8 +901,10 @@ def perform_update( samples = self.samples_from(model=model, search_internal=search_internal) + samples_summary = samples.summary() + try: - instance = samples.max_log_likelihood() + instance = samples_summary.instance except exc.FitException: return samples @@ -931,6 +928,7 @@ def perform_update( ) self.paths.save_samples(samples=samples_for_csv) + self.paths.save_samples_summary(samples_summary=samples_summary) latent_variables = analysis.latent_variables if latent_variables: @@ -939,11 +937,10 @@ def perform_update( samples=samples, ) - self.paths.save_json("samples_summary", to_dict(samples.summary())) - self.perform_visualization( model=model, analysis=analysis, + samples_summary=samples_summary, during_analysis=during_analysis, search_internal=search_internal, ) @@ -986,6 +983,7 @@ def perform_visualization( self, model: AbstractPriorModel, analysis: AbstractPriorModel, + samples_summary : SamplesSummary, during_analysis: bool, search_internal=None, ): @@ -998,9 +996,9 @@ def perform_visualization( The update performs the following tasks (if the settings indicate they should be performed): - 1) Visualize the search results. - 2) Visualize the maximum log likelihood model using model-specific visualization implented via the `Analysis` + 1) Visualize the maximum log likelihood model using model-specific visualization implented via the `Analysis` object. + 2) Visualize the search results. Parameters ---------- @@ -1013,29 +1011,30 @@ def perform_visualization( If the update is during a non-linear search, in which case tasks are only performed after a certain number of updates and only a subset of visualization may be performed. """ - samples = self.samples_from(model=model, search_internal=search_internal) - - try: - instance = samples.max_log_likelihood() - except exc.FitException: - return samples - - if analysis.should_visualize(paths=self.paths, during_analysis=during_analysis): - if not isinstance(self.paths, NullPaths): - self.plot_results(samples=samples) self.logger.debug("Visualizing") + if analysis.should_visualize(paths=self.paths, during_analysis=during_analysis): analysis.visualize( - paths=self.paths, instance=instance, during_analysis=during_analysis + paths=self.paths, + instance=samples_summary.instance, + during_analysis=during_analysis ) analysis.visualize_combined( analyses=None, paths=self.paths, - instance=instance, + instance=samples_summary.instance, during_analysis=during_analysis, ) + if analysis.should_visualize(paths=self.paths, during_analysis=during_analysis): + + if not isinstance(self.paths, NullPaths): + + samples = self.samples_from(model=model, search_internal=search_internal) + + self.plot_results(samples=samples) + @property def samples_cls(self): raise NotImplementedError() From e652312dd480eae70eac638cb6b4b4f56d8376f1 Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Fri, 22 Mar 2024 13:24:42 +0000 Subject: [PATCH 10/22] result_via_completed_fit clean and documented --- autofit/non_linear/search/abstract_search.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/autofit/non_linear/search/abstract_search.py b/autofit/non_linear/search/abstract_search.py index 12183e127..615733441 100644 --- a/autofit/non_linear/search/abstract_search.py +++ b/autofit/non_linear/search/abstract_search.py @@ -692,8 +692,11 @@ def result_via_completed_fit( """ Returns the result of the non-linear search of a completed model-fit. - The result contains the non-linear search samples, which are loaded from the searches internal results, - or the `samples.csv` file if the internal results are not available. + The result contains the non-linear search samples summary, which contains the maximum log likelihood instance + that is used for visualization and prior passing via the search chaining API. + + This funciton may also load the full samples of the completed fit, for example if visualization of the + seatch chains (e.g. a corner plot) is performed. This task is optional and be slow due to loading times. Optional tasks can be performed to update the results of the model-fit on hard-disk depending on the following entries of the `general.yaml` config file's `output` section: From e7cf73bd1924e52ba4d5197e8488f6a2a3586fc3 Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Fri, 22 Mar 2024 13:33:01 +0000 Subject: [PATCH 11/22] clean up perform update --- autofit/non_linear/paths/directory.py | 19 ++++++++++++++++++- autofit/non_linear/search/abstract_search.py | 20 +------------------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/autofit/non_linear/paths/directory.py b/autofit/non_linear/paths/directory.py index 64e18f2f1..4f732e722 100644 --- a/autofit/non_linear/paths/directory.py +++ b/autofit/non_linear/paths/directory.py @@ -237,8 +237,8 @@ def save_samples(self, samples): if conf.instance["general"]["output"]["samples_to_csv"] and should_output( "samples" ): - samples.write_table(filename=self._samples_file) self.save_json("samples_info", samples.samples_info) + if isinstance(samples, SamplesPDF): try: samples.save_covariance_matrix(self._covariance_file) @@ -247,6 +247,23 @@ def save_samples(self, samples): f"Could not save covariance matrix because of the following error:\n{e}" ) + samples_weight_threshold = conf.instance["output"]["samples_weight_threshold"] + + if os.environ.get("PYAUTOFIT_TEST_MODE") == "1": + samples_weight_threshold = None + + if samples_weight_threshold is not None: + samples = samples.samples_above_weight_threshold_from( + weight_threshold=samples_weight_threshold + ) + + logger.info( + f"Samples with weight less than {samples_weight_threshold} removed from samples.csv and not used to " + f"compute parameter estimates errors, etc." + ) + + samples.write_table(filename=self._samples_file) + def save_samples_summary(self, samples_summary : SamplesSummary): model = samples_summary.model diff --git a/autofit/non_linear/search/abstract_search.py b/autofit/non_linear/search/abstract_search.py index 615733441..7b1ae34d0 100644 --- a/autofit/non_linear/search/abstract_search.py +++ b/autofit/non_linear/search/abstract_search.py @@ -903,7 +903,6 @@ def perform_update( self.timer.update() samples = self.samples_from(model=model, search_internal=search_internal) - samples_summary = samples.summary() try: @@ -913,24 +912,7 @@ def perform_update( if self.is_master: - samples_for_csv = samples - - samples_weight_threshold = conf.instance["output"]["samples_weight_threshold"] - - if os.environ.get("PYAUTOFIT_TEST_MODE") == "1": - samples_weight_threshold = None - - if samples_weight_threshold is not None: - samples_for_csv = samples_for_csv.samples_above_weight_threshold_from( - weight_threshold=samples_weight_threshold - ) - - logger.info( - f"Samples with weight less than {samples_weight_threshold} removed from samples.csv and not used to " - f"compute parameter estimates errors, etc." - ) - - self.paths.save_samples(samples=samples_for_csv) + self.paths.save_samples(samples=samples) self.paths.save_samples_summary(samples_summary=samples_summary) latent_variables = analysis.latent_variables From fb1cb1955d637b5d447b956a2b81d3b6b22bfb79 Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Fri, 22 Mar 2024 14:20:39 +0000 Subject: [PATCH 12/22] mockSamplesSummary to fix some test --- autofit/mock.py | 1 + autofit/non_linear/mock/mock_result.py | 16 +++++- autofit/non_linear/mock/mock_samples.py | 30 +---------- .../non_linear/mock/mock_samples_summary.py | 52 +++++++++++++++++++ autofit/non_linear/mock/mock_search.py | 12 +++-- autofit/non_linear/result.py | 8 ++- .../database/identifier/test_identifiers.py | 8 +-- 7 files changed, 87 insertions(+), 40 deletions(-) create mode 100644 autofit/non_linear/mock/mock_samples_summary.py diff --git a/autofit/mock.py b/autofit/mock.py index 5c01d72df..d29ae6aa2 100644 --- a/autofit/mock.py +++ b/autofit/mock.py @@ -3,6 +3,7 @@ from autofit.non_linear.mock.mock_result import MockResultGrid from autofit.non_linear.mock.mock_search import MockSearch from autofit.non_linear.mock.mock_search import MockOptimizer +from autofit.non_linear.mock.mock_samples_summary import MockSamplesSummary from autofit.non_linear.mock.mock_samples import MockSamples from autofit.non_linear.mock.mock_samples import MockSamplesNest diff --git a/autofit/non_linear/mock/mock_result.py b/autofit/non_linear/mock/mock_result.py index 44fbd51ba..db16844c9 100644 --- a/autofit/non_linear/mock/mock_result.py +++ b/autofit/non_linear/mock/mock_result.py @@ -1,24 +1,36 @@ +from typing import Union + from autofit.mapper.model import ModelInstance from autofit.mapper.model_mapper import ModelMapper from autofit.non_linear.result import Result +from autofit.non_linear.mock.mock_samples_summary import MockSamplesSummary from autofit.non_linear.mock.mock_samples import MockSamples class MockResult(Result): def __init__( self, + samples_summary : MockSamplesSummary, + paths=None, samples=None, instance=None, analysis=None, search=None, model=None, ): - super().__init__(samples, search_internal=None) + + super().__init__( + samples_summary=samples_summary, + paths=paths, + samples=samples, + search_internal=None + ) self._instance = instance or ModelInstance() self._samples = samples or MockSamples( - max_log_likelihood_instance=self.instance, model=model or ModelMapper() + # max_log_likelihood_instance=self.instance, + model=model or ModelMapper() ) self.prior_means = None diff --git a/autofit/non_linear/mock/mock_samples.py b/autofit/non_linear/mock/mock_samples.py index dd48c1cee..2eae0e958 100644 --- a/autofit/non_linear/mock/mock_samples.py +++ b/autofit/non_linear/mock/mock_samples.py @@ -14,9 +14,7 @@ def __init__( model=None, sample_list=None, samples_info=None, - max_log_likelihood_instance=None, log_likelihood_list=None, - prior_means=None, **kwargs, ): self._log_likelihood_list = log_likelihood_list @@ -34,19 +32,11 @@ def __init__( **kwargs, ) - self._max_log_likelihood_instance = max_log_likelihood_instance - self._prior_means = prior_means - @property def default_sample_list(self): - if self._log_likelihood_list is not None: - log_likelihood_list = self._log_likelihood_list - else: - log_likelihood_list = range(3) - return [ Sample(log_likelihood=log_likelihood, log_prior=0.0, weight=0.0) - for log_likelihood in log_likelihood_list + for log_likelihood in range(3) ] @property @@ -56,24 +46,6 @@ def log_likelihood_list(self): return self._log_likelihood_list - def max_log_likelihood(self, as_instance: bool = True): - if self._max_log_likelihood_instance is None: - try: - return super().max_log_likelihood(as_instance=as_instance) - except (KeyError, AttributeError): - pass - - if as_instance: - return self._max_log_likelihood_instance - return list(self.sample_list[0].kwargs.values()) - - @property - def prior_means(self): - if self._prior_means is None: - return super().prior_means - - return self._prior_means - @property def unconverged_sample_size(self): return self.samples_info["unconverged_sample_size"] diff --git a/autofit/non_linear/mock/mock_samples_summary.py b/autofit/non_linear/mock/mock_samples_summary.py new file mode 100644 index 000000000..07437c430 --- /dev/null +++ b/autofit/non_linear/mock/mock_samples_summary.py @@ -0,0 +1,52 @@ +from autofit.non_linear.samples.sample import Sample +from autofit.non_linear.samples.summary import SamplesSummary + + +class MockSamplesSummary(SamplesSummary): + def __init__( + self, + model=None, + max_log_likelihood_sample=None, + median_pdf_sample=None, + log_evidence=None, + max_log_likelihood_instance=None, + prior_means=None, + **kwargs, + ): + + super().__init__( + model=model, + max_log_likelihood_sample=max_log_likelihood_sample, + median_pdf_sample=median_pdf_sample, + log_evidence=log_evidence, + ) + + self._max_log_likelihood_instance = max_log_likelihood_instance + self._prior_means = prior_means + + @property + def max_log_likelihood_sample(self): + return Sample(log_likelihood=1.0, log_prior=0.0, weight=0.0) + + @property + def median_pdf_sample(self): + return Sample(log_likelihood=1.0, log_prior=0.0, weight=0.0) + + def max_log_likelihood(self, as_instance: bool = True): + + if self._max_log_likelihood_instance is None: + try: + return super().max_log_likelihood(as_instance=as_instance) + except (KeyError, AttributeError): + pass + + if as_instance: + return self._max_log_likelihood_instance + + @property + def prior_means(self): + if self._prior_means is None: + return super().prior_means + + return self._prior_means + diff --git a/autofit/non_linear/mock/mock_search.py b/autofit/non_linear/mock/mock_search.py index a17c57ca8..83474d053 100644 --- a/autofit/non_linear/mock/mock_search.py +++ b/autofit/non_linear/mock/mock_search.py @@ -1,6 +1,7 @@ import math from typing import Optional, Tuple +import autofit from autoconf import conf from autofit import exc from autofit.mapper.model_mapper import ModelMapper @@ -9,6 +10,7 @@ from autofit.non_linear.search.abstract_search import NonLinearSearch from autofit.non_linear.mock.mock_result import MockResult from autofit.non_linear.mock.mock_samples import MockSamples +from autofit.non_linear.mock.mock_samples_summary import MockSamplesSummary from autofit.non_linear.samples import Sample @@ -47,7 +49,9 @@ def __init__( self.samples = samples or MockSamples(ModelMapper()) - self.result = MockResult(samples=samples) if result is None else result + self.result = MockResult( + samples_summary=MockSamplesSummary(), + ) if result is None else result self.fit_fast = fit_fast self.sample_multiplier = sample_multiplier @@ -78,7 +82,7 @@ def __call__(self, vector): log_likelihood = analysis.log_likelihood_function(instance) if self.result.instance is None: - self.result.instance = instance + self.result.samples_summary._instance = instance # Return Chi squared return -2 * log_likelihood @@ -129,7 +133,9 @@ def _fit(self, model, analysis): self.paths.save_samples(self.samples) return analysis.make_result( - samples=samples, search_internal=None + samples_summary=samples, + samples=samples, + search_internal=None ) def perform_update(self, model, analysis, during_analysis, search_internal=None): diff --git a/autofit/non_linear/result.py b/autofit/non_linear/result.py index 2e807a45c..502c23878 100644 --- a/autofit/non_linear/result.py +++ b/autofit/non_linear/result.py @@ -47,9 +47,13 @@ class AbstractResult(ABC): def __init__(self, samples_summary, paths): - self.samples_summary = samples_summary + self._samples_summary = samples_summary self.paths = paths + @property + def samples_summary(self): + return self._samples_summary + @property @abstractmethod def samples(self): @@ -219,7 +223,7 @@ def dict(self) -> dict: """ return { "max_log_likelihood": self.samples_summary.max_log_likelihood_sample.model_dict(), - "median pdf": self.samples_summary.median_pdf.model_dict(), + "median pdf": self.samples_summary.median_pdf_sample.model_dict(), } @property diff --git a/test_autofit/database/identifier/test_identifiers.py b/test_autofit/database/identifier/test_identifiers.py index cd86f82d7..47300c6d1 100644 --- a/test_autofit/database/identifier/test_identifiers.py +++ b/test_autofit/database/identifier/test_identifiers.py @@ -289,14 +289,14 @@ def test__identifier_description__after_model_and_instance(): max_log_likelihood_instance = model.instance_from_prior_medians() - samples = af.m.MockSamples( + samples_summary = af.m.MockSamplesSummary( + model=model, max_log_likelihood_instance=max_log_likelihood_instance, prior_means=[1.0, 3.0, 5.0], - model=model, ) - result = af.Result( - samples=samples, + result = af.mock.MockResult( + samples_summary=samples_summary, ) model.gaussian.centre = result.model.gaussian.centre From e36dd0e398e2630349a292c441788b7136b968ff Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Fri, 22 Mar 2024 14:22:42 +0000 Subject: [PATCH 13/22] fix unit tests by using MockResult --- autofit/non_linear/mock/mock_result.py | 2 +- test_autofit/analysis/test_regression.py | 2 +- test_autofit/graphical/test_unification.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/autofit/non_linear/mock/mock_result.py b/autofit/non_linear/mock/mock_result.py index db16844c9..4f1e6570d 100644 --- a/autofit/non_linear/mock/mock_result.py +++ b/autofit/non_linear/mock/mock_result.py @@ -11,7 +11,7 @@ class MockResult(Result): def __init__( self, - samples_summary : MockSamplesSummary, + samples_summary : MockSamplesSummary = None, paths=None, samples=None, instance=None, diff --git a/test_autofit/analysis/test_regression.py b/test_autofit/analysis/test_regression.py index 0672da39f..401fd08a9 100644 --- a/test_autofit/analysis/test_regression.py +++ b/test_autofit/analysis/test_regression.py @@ -12,7 +12,7 @@ def test_pickle(Analysis): assert isinstance(loaded, CombinedAnalysis) -class MyResult(af.Result): +class MyResult(af.mock.MockResult): pass diff --git a/test_autofit/graphical/test_unification.py b/test_autofit/graphical/test_unification.py index e3975e0e3..a403a49d9 100644 --- a/test_autofit/graphical/test_unification.py +++ b/test_autofit/graphical/test_unification.py @@ -52,7 +52,7 @@ def test_projected_model(): for _ in range(100) ], ) - result = af.Result(samples=samples) + result = af.mock.MockResult(samples=samples) projected_model = result.projected_model assert projected_model.prior_count == 3 From ce0de40cae00df0d2548a231c060a1347a5da897 Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Fri, 22 Mar 2024 16:10:07 +0000 Subject: [PATCH 14/22] test_abstract_search uses MockSamplesSummary --- autofit/graphical/declarative/result.py | 5 +- autofit/non_linear/mock/mock_result.py | 14 ++- autofit/non_linear/mock/mock_samples.py | 2 + autofit/non_linear/mock/mock_search.py | 52 ++++----- autofit/non_linear/samples/summary.py | 2 +- test_autofit/analysis/conftest.py | 2 +- test_autofit/graphical/test_history.py | 4 +- test_autofit/mapper/test_by_path.py | 44 ++++---- .../non_linear/grid/test_result_json.py | 2 +- test_autofit/non_linear/result/test_result.py | 2 +- .../non_linear/search/test_abstract_search.py | 9 +- test_autofit/serialise/test_samples.py | 105 +++++++----------- 12 files changed, 112 insertions(+), 131 deletions(-) diff --git a/autofit/graphical/declarative/result.py b/autofit/graphical/declarative/result.py index cf70ed3cb..f1136765d 100644 --- a/autofit/graphical/declarative/result.py +++ b/autofit/graphical/declarative/result.py @@ -25,7 +25,10 @@ def __init__( results Results from hierarchical factor optimisations """ - super().__init__() + super().__init__( + samples_summary=None, + paths=None + ) self.results = results @property diff --git a/autofit/non_linear/mock/mock_result.py b/autofit/non_linear/mock/mock_result.py index 4f1e6570d..d5d925e27 100644 --- a/autofit/non_linear/mock/mock_result.py +++ b/autofit/non_linear/mock/mock_result.py @@ -38,11 +38,17 @@ def __init__( self.search = search self.model = model - def model_absolute(self, absolute): - return self.model + def model_absolute(self, a): + try: + return self.samples_summary.model_absolute(a) + except AttributeError: + return self.model - def model_relative(self, relative): - return self.model + def model_relative(self, r): + try: + return self.samples_summary.model_relative(r) + except AttributeError: + return self.model @property def last(self): diff --git a/autofit/non_linear/mock/mock_samples.py b/autofit/non_linear/mock/mock_samples.py index 2eae0e958..94ce68763 100644 --- a/autofit/non_linear/mock/mock_samples.py +++ b/autofit/non_linear/mock/mock_samples.py @@ -15,6 +15,7 @@ def __init__( sample_list=None, samples_info=None, log_likelihood_list=None, + prior_means=None, **kwargs, ): self._log_likelihood_list = log_likelihood_list @@ -46,6 +47,7 @@ def log_likelihood_list(self): return self._log_likelihood_list + @property def unconverged_sample_size(self): return self.samples_info["unconverged_sample_size"] diff --git a/autofit/non_linear/mock/mock_search.py b/autofit/non_linear/mock/mock_search.py index 83474d053..8874df137 100644 --- a/autofit/non_linear/mock/mock_search.py +++ b/autofit/non_linear/mock/mock_search.py @@ -1,7 +1,5 @@ -import math from typing import Optional, Tuple -import autofit from autoconf import conf from autofit import exc from autofit.mapper.model_mapper import ModelMapper @@ -36,7 +34,7 @@ class MockSearch(NonLinearSearch): def __init__( self, name="", - samples=None, + samples_summary=None, result=None, unique_tag: Optional[str] = None, fit_fast=True, @@ -47,7 +45,7 @@ def __init__( ): super().__init__(name=name, unique_tag=unique_tag, **kwargs) - self.samples = samples or MockSamples(ModelMapper()) + self.samples_summary = samples_summary self.result = MockResult( samples_summary=MockSamplesSummary(), @@ -87,7 +85,7 @@ def __call__(self, vector): # Return Chi squared return -2 * log_likelihood - self.paths.save_samples(self.samples) + self.paths.save_samples_summary(self.samples_summary) if self.save_for_aggregator: analysis.save_attributes(paths=self.paths) @@ -120,42 +118,40 @@ def _fit(self, model, analysis): if unit_vector[index] >= 1: raise e index = (index + 1) % model.prior_count - samples = MockSamples( - sample_list=samples_with_log_likelihood_list( - self.sample_multiplier * fit, _make_samples(model) - ), + + samples_summary = MockSamplesSummary( + # sample_list=samples_with_log_likelihood_list( + # self.sample_multiplier * fit, _make_samples(model) + # ), model=model, prior_means=[ prior.mean for prior in sorted(model.priors, key=lambda prior: prior.id) ], ) - self.paths.save_samples(self.samples) + self.paths.save_samples_summary(self.samples_summary) return analysis.make_result( - samples_summary=samples, - samples=samples, - search_internal=None + samples_summary=samples_summary, ) def perform_update(self, model, analysis, during_analysis, search_internal=None): - if self.samples is not None and not self.return_sensitivity_results: - self.paths.save_samples(self.samples) - return self.samples - - return MockSamples( - sample_list=samples_with_log_likelihood_list( - [1.0, 2.0], _make_samples(model) - ), - prior_means=[ - prior.mean for prior in sorted(model.priors, key=lambda prior: prior.id) - ], - model=model, - ) - def samples_from(self, model): - return self.samples + if self.samples_summary is not None and not self.return_sensitivity_results: + + self.paths.save_samples_summary(self.samples_summary) + +# return self.samples_summary + # return MockSamples( + # sample_list=samples_with_log_likelihood_list( + # [1.0, 2.0], _make_samples(model) + # ), + # prior_means=[ + # prior.mean for prior in sorted(model.priors, key=lambda prior: prior.id) + # ], + # model=model, + # ) class MockOptimizer(MockSearch): def __init__(self, **kwargs): diff --git a/autofit/non_linear/samples/summary.py b/autofit/non_linear/samples/summary.py index fdd14fff7..b19273866 100644 --- a/autofit/non_linear/samples/summary.py +++ b/autofit/non_linear/samples/summary.py @@ -16,8 +16,8 @@ class SamplesSummary(SamplesInterface): def __init__( self, - model: AbstractPriorModel, max_log_likelihood_sample: Sample, + model: AbstractPriorModel = None, median_pdf_sample : Optional[Sample] = None, log_evidence: Optional[float] = None, ): diff --git a/test_autofit/analysis/conftest.py b/test_autofit/analysis/conftest.py index 646dd2ea1..add380f79 100644 --- a/test_autofit/analysis/conftest.py +++ b/test_autofit/analysis/conftest.py @@ -3,7 +3,7 @@ import autofit as af -class Result(af.Result): +class Result(af.mock.MockResult): pass diff --git a/test_autofit/graphical/test_history.py b/test_autofit/graphical/test_history.py index c1b7de98a..9fc9839d6 100644 --- a/test_autofit/graphical/test_history.py +++ b/test_autofit/graphical/test_history.py @@ -36,7 +36,7 @@ def make_history(factor): @pytest.fixture(name="result") def make_result(model): # noinspection PyTypeChecker - return af.Result(None) + return af.mock.MockResult(None) @pytest.fixture(name="good_history") @@ -155,7 +155,7 @@ def generate_samples(model): @pytest.fixture(name="results") def make_results(hierarchical_factor): return [ - af.Result( + af.mock.MockResult( samples=generate_samples(factor.prior_model), ) for factor in hierarchical_factor.factors diff --git a/test_autofit/mapper/test_by_path.py b/test_autofit/mapper/test_by_path.py index 7b3b6eaf9..e13f956b8 100644 --- a/test_autofit/mapper/test_by_path.py +++ b/test_autofit/mapper/test_by_path.py @@ -164,22 +164,20 @@ def test_names_linked(self, linked_model): ] -@pytest.fixture(name="samples") -def make_samples(model): - return af.Samples( - model, - [ - af.Sample( - log_likelihood=1.0, - log_prior=1.0, - weight=1.0, - kwargs={ - ("gaussian", "centre"): 0.1, - ("gaussian", "normalization"): 0.2, - ("gaussian", "sigma"): 0.3, - }, - ) - ], +@pytest.fixture(name="samples_summary") +def make_samples_summary(model): + return af.SamplesSummary( + model=model, + max_log_likelihood_sample=af.Sample( + log_likelihood=1.0, + log_prior=1.0, + weight=1.0, + kwargs={ + ("gaussian", "centre"): 0.1, + ("gaussian", "normalization"): 0.2, + ("gaussian", "sigma"): 0.3, + }, + ) ) @@ -191,20 +189,22 @@ def make_samples(model): (("gaussian", "sigma"), [0.3]), ], ) -def test_values_for_path(samples, path, value): - assert samples.values_for_path(path) == value +def test_values_for_path(samples_summary, path, value): + assert samples_summary.values_for_path(path) == value @pytest.fixture(name="result") -def make_result(model, samples): - return af.Result(samples) +def make_result(model, samples_summary): + return af.mock.MockResult( + samples_summary=samples_summary + ) @pytest.fixture(name="modified_result") -def make_modified_result(model, samples): +def make_modified_result(model, samples_summary): model.gaussian.sigma = af.GaussianPrior(mean=0.5, sigma=1) model.gaussian.centre = af.GaussianPrior(mean=0.5, sigma=1) - return af.Result(samples) + return af.mock.MockResult(samples_summary=samples_summary) class TestFromResult: diff --git a/test_autofit/non_linear/grid/test_result_json.py b/test_autofit/non_linear/grid/test_result_json.py index d39608160..3d63fff13 100644 --- a/test_autofit/non_linear/grid/test_result_json.py +++ b/test_autofit/non_linear/grid/test_result_json.py @@ -74,7 +74,7 @@ def test_embedded_sample_model_dict(): def test_result_json(sample): model = af.Model(af.Gaussian) - result = af.Result( + result = af.mock.MockResult( samples=af.Samples( sample_list=[sample], model=model, diff --git a/test_autofit/non_linear/result/test_result.py b/test_autofit/non_linear/result/test_result.py index ee816d2df..1fd628c54 100644 --- a/test_autofit/non_linear/result/test_result.py +++ b/test_autofit/non_linear/result/test_result.py @@ -9,7 +9,7 @@ def make_result(): mapper = af.ModelMapper() mapper.component = af.m.MockClassx2Tuple # noinspection PyTypeChecker - return af.Result( + return af.mock.MockResult( samples=af.m.MockSamples( sample_list=[ Sample( diff --git a/test_autofit/non_linear/search/test_abstract_search.py b/test_autofit/non_linear/search/test_abstract_search.py index 117666efb..74f6a3121 100644 --- a/test_autofit/non_linear/search/test_abstract_search.py +++ b/test_autofit/non_linear/search/test_abstract_search.py @@ -22,10 +22,10 @@ def make_result(): mapper = af.ModelMapper() mapper.component = af.m.MockClassx2Tuple # noinspection PyTypeChecker - return af.Result( - samples=af.m.MockSamples( - prior_means=[0, 1], + return af.mock.MockResult( + samples_summary=af.m.MockSamplesSummary( model=mapper, + prior_means=[0, 1], ), ) @@ -43,6 +43,7 @@ def test__environment_variable_override(): class TestResult: def test_model(self, result): + component = result.model.component assert component.one_tuple.one_tuple_0.mean == 0 assert component.one_tuple.one_tuple_1.mean == 1 @@ -66,7 +67,7 @@ def test_model_relative(self, result): def test_raises(self, result): with pytest.raises(af.exc.PriorException): result.model.mapper_from_prior_means( - result.samples.prior_means, a=2.0, r=1.0 + result.samples_summary.prior_means, a=2.0, r=1.0 ) diff --git a/test_autofit/serialise/test_samples.py b/test_autofit/serialise/test_samples.py index a999e10c0..26eeb9f42 100644 --- a/test_autofit/serialise/test_samples.py +++ b/test_autofit/serialise/test_samples.py @@ -1,5 +1,3 @@ -import numpy as np - import autofit as af import pytest @@ -46,89 +44,64 @@ def make_summary(samples_pdf): def test_summary(summary, model, sample): assert summary.model is model assert summary.max_log_likelihood_sample == sample - assert isinstance(summary.covariance_matrix, np.ndarray) @pytest.fixture(name="summary_dict") def make_summary_dict(): return { - "arguments": { - "covariance_matrix": { - "array": [ - [2.0, 3.0, 3.9999999999999996], - [3.0, 4.5, 6.0], - [4.0, 6.0, 7.999999999999999], - ], - "dtype": "float64", - "type": "ndarray", - }, - "log_evidence": None, - "max_log_likelihood_sample": { - "arguments": { - "kwargs": { - "arguments": { - "centre": 2.0, - "normalization": 4.0, - "sigma": 6.0, - }, - "type": "dict", - }, - "log_likelihood": 4.0, - "log_prior": 5.0, - "weight": 6.0, - }, - "class_path": "autofit.non_linear.samples.sample.Sample", - "type": "instance", - }, - "model": { - "arguments": { - "centre": { - "lower_limit": 0.0, - "type": "Uniform", - "upper_limit": 1.0, - }, - "normalization": { - "lower_limit": 0.0, - "type": "Uniform", - "upper_limit": 1.0, - }, - "sigma": { - "lower_limit": 0.0, - "type": "Uniform", - "upper_limit": 1.0, - }, - }, - "class_path": "autofit.example.model.Gaussian", - "type": "model", - }, - }, - "class_path": "autofit.non_linear.samples.summary.SamplesSummary", - "type": "instance", - } + 'type': + 'instance', 'class_path': + 'autofit.non_linear.samples.summary.SamplesSummary', 'arguments': { + 'log_evidence': None, + 'model': { + 'class_path': 'autofit.example.model.Gaussian', + 'type': 'model', + 'arguments': { + 'centre': { + 'lower_limit': 0.0, 'upper_limit': 1.0, 'type': 'Uniform'}, + 'normalization': {'lower_limit': 0.0, 'upper_limit': 1.0, + 'type': 'Uniform'}, + 'sigma': {'lower_limit': 0.0, 'upper_limit': 1.0, + 'type': 'Uniform'}}}, + 'max_log_likelihood_sample': {'type': 'instance', + 'class_path': 'autofit.non_linear.samples.sample.Sample', + 'arguments': {'log_likelihood': 4.0, 'log_prior': 5.0, + 'weight': 6.0, 'kwargs': {'type': 'dict', + 'arguments': { + 'centre': 2.0, + 'normalization': 4.0, + 'sigma': 6.0}}}}, + 'median_pdf_sample': {'type': 'instance', + 'class_path': 'autofit.non_linear.samples.sample.Sample', + 'arguments': {'log_likelihood': 4.0, 'log_prior': 5.0, 'weight': 6.0, + 'kwargs': {'type': 'dict', 'arguments': {'centre': 2.0, + 'normalization': 4.0, + 'sigma': 6.0}}}}}} def test_dict(summary, summary_dict, remove_ids): + print(remove_ids(to_dict(summary))) + assert remove_ids(to_dict(summary)) == summary_dict def test_from_dict(summary_dict): summary = from_dict(summary_dict) assert isinstance(summary, SamplesSummary) - assert isinstance(summary.model, af.Model) - assert isinstance(summary.max_log_likelihood_sample, af.Sample) - assert isinstance(summary.covariance_matrix, np.ndarray) - assert isinstance(summary.max_log_likelihood(), af.Gaussian) + +# assert isinstance(summary.model, af.Model) +# assert isinstance(summary.max_log_likelihood_sample, af.Sample) + +# assert isinstance(summary.max_log_likelihood(), af.Gaussian) def test_generic_from_dict(summary_dict): summary = from_dict(summary_dict) assert isinstance(summary, SamplesSummary) - assert isinstance(summary.model, af.Model) + # assert isinstance(summary.model, af.Model) assert isinstance(summary.max_log_likelihood_sample, af.Sample) - assert isinstance(summary.covariance_matrix, np.ndarray) - -def test_covariance_interpolator(summary): - interpolator = af.CovarianceInterpolator([summary]) - assert interpolator[interpolator.centre == 0.5] +# def test_covariance_interpolator(summary): +# interpolator = af.CovarianceInterpolator([summary]) +# assert interpolator[interpolator.centre == 0.5] From 4e4c59dc81254a6dc4bb85f1f496f8b796f56fcd Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Fri, 22 Mar 2024 16:17:38 +0000 Subject: [PATCH 15/22] fix result json --- autofit/non_linear/mock/mock_samples_summary.py | 8 ++++++++ test_autofit/non_linear/grid/test_result_json.py | 8 +++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/autofit/non_linear/mock/mock_samples_summary.py b/autofit/non_linear/mock/mock_samples_summary.py index 07437c430..16f5cc2f8 100644 --- a/autofit/non_linear/mock/mock_samples_summary.py +++ b/autofit/non_linear/mock/mock_samples_summary.py @@ -26,10 +26,18 @@ def __init__( @property def max_log_likelihood_sample(self): + + if self._max_log_likelihood_sample is not None: + return self._max_log_likelihood_sample + return Sample(log_likelihood=1.0, log_prior=0.0, weight=0.0) @property def median_pdf_sample(self): + + if self._median_pdf_sample is not None: + return self._median_pdf_sample + return Sample(log_likelihood=1.0, log_prior=0.0, weight=0.0) def max_log_likelihood(self, as_instance: bool = True): diff --git a/test_autofit/non_linear/grid/test_result_json.py b/test_autofit/non_linear/grid/test_result_json.py index 3d63fff13..d69b160e0 100644 --- a/test_autofit/non_linear/grid/test_result_json.py +++ b/test_autofit/non_linear/grid/test_result_json.py @@ -75,16 +75,14 @@ def test_embedded_sample_model_dict(): def test_result_json(sample): model = af.Model(af.Gaussian) result = af.mock.MockResult( - samples=af.Samples( - sample_list=[sample], + samples_summary=af.m.MockSamplesSummary( model=model, + max_log_likelihood_sample=sample, ), ) - assert result.dict() == { - "max_log_likelihood": { + assert result.dict()["max_log_likelihood"] == { "centre": 1.0, "intensity": 2.0, "sigma": 3.0, } - } From 7af128abef4741dcef65203ada344ef65d36a796 Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Fri, 22 Mar 2024 16:33:44 +0000 Subject: [PATCH 16/22] hack to fix emcee --- autofit/non_linear/paths/directory.py | 9 +++++++++ autofit/non_linear/result.py | 18 ++++++------------ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/autofit/non_linear/paths/directory.py b/autofit/non_linear/paths/directory.py index 4f732e722..d7531d3ce 100644 --- a/autofit/non_linear/paths/directory.py +++ b/autofit/non_linear/paths/directory.py @@ -205,6 +205,15 @@ def load_search_internal(self): ------- The results of the non-linear search in its internal representation. """ + + # This is a nasty hack to load emcee backends. It will be removed once the source code is more stable. + + import emcee + + backend_filename = self.search_internal_path / "search_internal.hdf" + if os.path.isfile(backend_filename): + return emcee.backends.HDFBackend(filename=str(backend_filename)) + filename = self.search_internal_path / "search_internal.dill" with open_(filename, "rb") as f: diff --git a/autofit/non_linear/result.py b/autofit/non_linear/result.py index 502c23878..846958d97 100644 --- a/autofit/non_linear/result.py +++ b/autofit/non_linear/result.py @@ -177,12 +177,12 @@ def model_bounded(self, b: float) -> AbstractPriorModel: class Result(AbstractResult): def __init__( - self, - samples_summary : SamplesSummary, - paths : AbstractPaths, - samples: Optional[Samples] = None, - search_internal : Optional[object] = None, - latent_variables=None + self, + samples_summary : SamplesSummary, + paths : AbstractPaths, + samples: Optional[Samples] = None, + search_internal : Optional[object] = None, + latent_variables=None ): """ The result of a non-linear search, which includes: @@ -246,12 +246,6 @@ def search_internal(self): if self._search_internal is not None: return self._search_internal - # - # try: - # search_internal = self.backend - # except (AttributeError, FileNotFoundError): - # search_internal = None - try: return self.paths.load_search_internal() except FileNotFoundError: From 52fd89e242c14b8f5aea4164ed40de34dbcd4c74 Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Fri, 22 Mar 2024 16:41:58 +0000 Subject: [PATCH 17/22] docs --- autofit/non_linear/result.py | 79 +++++++++++++++++++++++++++++++++--- 1 file changed, 73 insertions(+), 6 deletions(-) diff --git a/autofit/non_linear/result.py b/autofit/non_linear/result.py index 846958d97..ca4e076d8 100644 --- a/autofit/non_linear/result.py +++ b/autofit/non_linear/result.py @@ -46,6 +46,16 @@ def summary(self): class AbstractResult(ABC): def __init__(self, samples_summary, paths): + """ + Abstract result of a non-linear search. + + Parameters + ---------- + samples_summary + A summary of the most important samples of the non-linear search (e.g. maximum log likelihood, median PDF). + paths + The paths to the results of the search. + """ self._samples_summary = samples_summary self.paths = paths @@ -185,23 +195,51 @@ def __init__( latent_variables=None ): """ - The result of a non-linear search, which includes: + The result of a non-linear search. + + The default behaviour is for all key results to be in the `samples_summary` attribute, which is a concise + summary of the results of the non-linear search. The reasons for this to be the main attribute are: + + - It is concise and therefore has minimal I/O overhead, which is important because when runs are resumed + the results are loaded often, which can become very slow for large results via a `samples.csv`. + + - The `output.yaml` config files can be used to disable the output of the `samples.csv` file + and `search_internal.dill` files. This means in order for results to be loaded in a way that allows a run to + resume, the `samples_summary` must contain all results necessary to resume the run. + + For this reason, the `samples` and `search_internal` attributes are optional. On the first run of a model-fit, + they will always contain values as they are passed in via memory from the results of the search. However, if + a run is resumed they are no longer available in memory, and they will only be available if their corresponding + `samples.csv` and `search_internal.dill` files are output on disk and available to load. + + This object includes: + + - The `samples_summary` attribute, which is a summary of the results of the non-linear search. + + - The `paths` attribute, which contains the path structure to the results of the search on the hard-disk and + is used to load the samples and search internal attributes if they are required and not available in memory. - The samples of the non-linear search (E.g. MCMC chains, nested sampling samples) which are used to compute the maximum likelihood model, posteriors and other properties. - - The model used to fit the data, which uses the samples to create specific instances of the model (e.g. - an instance of the maximum log likelihood model). - - The non-linear search used to perform the model fit in its internal format (e.g. the Dynesty sampler used by dynesty itself as opposed to PyAutoFit abstract classes). + - The latent variables of the model-fit, which are the free parameters of the model that are not sampled + Parameters ---------- + samples_summary + A summary of the most important samples of the non-linear search (e.g. maximum log likelihood, median PDF). + paths + The paths to the results of the search, used to load the samples and search internal attributes if they are + required and not available in memory. samples - The samples of the non-linear search + The samples of the non-linear search, for example the MCMC chains or nested sampling samples. search_internal The non-linear search used to perform the model fit in its internal format. + latent_variables + The latent variables of the model-fit, which are the free parameters of the model that are not sampled. """ super().__init__( samples_summary=samples_summary, @@ -227,7 +265,23 @@ def dict(self) -> dict: } @property - def samples(self): + def samples(self) -> Samples: + """ + Returns the samples of the non-linear search, for example the MCMC chains or nested sampling samples. + + When a model-fit is run the first time, the samples are passed into the result via memory and therefore + always available. + + However, if a model-fit is resumed the samples are not available in memory and they only way to load them is + via the `samples.csv` file output on the hard-disk. This property handles the loading of the samples from + the `samples.csv` file if they are not available in memory. + + Returns + ------- + The samples of the non-linear search. + """ + + # TODO : This needs to load a samples class based on the samples type. if self._samples is not None: return self._samples @@ -242,7 +296,20 @@ def samples(self): @property def search_internal(self): + """ + Returns the non-linear search used to perform the model fit in its internal sampler format. + + When a model-fit is run the first time, the search internal is passed into the result via memory and therefore + always available. + + However, if a model-fit is resumed the search internal is not available in memory and they only way to load + it is via the `search_internal.dill` file output on the hard-disk. This property handles the loading of + the search internal from the `search_internal.dill` file if it is not available in memory. + Returns + ------- + The non-linear search used to perform the model fit in its internal sampler format. + """ if self._search_internal is not None: return self._search_internal From 62b2a7140c471437f822e66dd0fe4bf4fd41887c Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Sat, 23 Mar 2024 12:51:51 +0000 Subject: [PATCH 18/22] docs refactoring --- docs/cookbooks/result.rst | 4 ++-- docs/cookbooks/search.rst | 4 ++-- docs/overview/the_basics.rst | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/cookbooks/result.rst b/docs/cookbooks/result.rst index d45e090d5..8ab288b38 100644 --- a/docs/cookbooks/result.rst +++ b/docs/cookbooks/result.rst @@ -344,8 +344,8 @@ tool ``corner.py``, which is wrapped via the ``EmceePlotter`` object. .. code-block:: python - search_plotter = aplt.MCMCPlotter(samples=result.samples) - search_plotter.corner() + plotter = aplt.MCMCPlotter(samples=result.samples) + plotter.corner() This plot appears as follows: diff --git a/docs/cookbooks/search.rst b/docs/cookbooks/search.rst index 26ccd77db..e62567b65 100644 --- a/docs/cookbooks/search.rst +++ b/docs/cookbooks/search.rst @@ -174,9 +174,9 @@ Checkout the ``plot`` package for a complete description of the plots that can b samples = result.samples - search_plotter = aplt.MCMCPlotter(samples=samples) + plotter = aplt.MCMCPlotter(samples=samples) - search_plotter.corner( + plotter.corner( bins=20, range=None, color="k", diff --git a/docs/overview/the_basics.rst b/docs/overview/the_basics.rst index 775c10334..3740d6de9 100644 --- a/docs/overview/the_basics.rst +++ b/docs/overview/the_basics.rst @@ -505,8 +505,8 @@ corner of the results. .. code-block:: python - search_plotter = aplt.NestPlotter(samples=result.samples) - search_plotter.corner() + plotter = aplt.NestPlotter(samples=result.samples) + plotter.corner() The plot appears as follows: From e8c90e5f64a0d4dcbcc589538f842213a3faff64 Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Sun, 24 Mar 2024 16:26:45 +0000 Subject: [PATCH 19/22] added likelihood check to fitness function --- autofit/non_linear/fitness.py | 60 ++++++++++++++++++++ autofit/non_linear/paths/abstract.py | 2 +- autofit/non_linear/search/abstract_search.py | 2 +- 3 files changed, 62 insertions(+), 2 deletions(-) diff --git a/autofit/non_linear/fitness.py b/autofit/non_linear/fitness.py index 42ae9e842..270dffe89 100644 --- a/autofit/non_linear/fitness.py +++ b/autofit/non_linear/fitness.py @@ -1,4 +1,5 @@ import numpy as np +import os from autoconf import conf @@ -25,6 +26,7 @@ def __init__( self, model, analysis, + paths = None, fom_is_log_likelihood: bool = True, resample_figure_of_merit: float = -np.inf, convert_to_chi_squared: bool = False, @@ -80,11 +82,14 @@ def __init__( self.analysis = analysis self.model = model + self.paths = paths self.fom_is_log_likelihood = fom_is_log_likelihood self.resample_figure_of_merit = resample_figure_of_merit self.convert_to_chi_squared = convert_to_chi_squared self._log_likelihood_function = None + self.check_log_likelihood(fitness=self) + def __getstate__(self): state = self.__dict__.copy() del state["_log_likelihood_function"] @@ -144,3 +149,58 @@ def __call__(self, parameters, *kwargs): figure_of_merit *= -2.0 return figure_of_merit + + def check_log_likelihood( + self, fitness + ): + """ + Changes to the PyAutoGalaxy source code may inadvertantly change the numerics of how a log likelihood is + computed. Equally, one may set off a model-fit that resumes from previous results, but change the settings of + the pixelization or inversion in a way that changes the log likelihood function. + + This function performs an optional sanity check, which raises an exception if the log likelihood calculation + changes, to ensure a model-fit is not resumed with a different likelihood calculation to the previous run. + + If the model-fit has not been performed before (e.g. it is not a resume) this function outputs + the `figure_of_merit` (e.g. the log likelihood) of the maximum log likelihood model at the end of the model-fit. + + If the model-fit is a resume, it loads this `figure_of_merit` and compares it against a new value computed for + the resumed run (again using the maximum log likelihood model inferred). If the two likelihoods do not agree + and therefore the log likelihood function has changed, an exception is raised and the code execution terminated. + + Parameters + ---------- + paths + The PyAutoFit paths object which manages all paths, e.g. where the non-linear search outputs are stored, + visualization, and pickled objects used by the database and aggregator. + result + The result containing the maximum log likelihood fit of the model. + """ + + if os.environ.get("PYAUTOFIT_TEST_MODE") == "1": + return + + if not conf.instance["general"]["test"]["check_figure_of_merit_sanity"]: + return + + try: + samples_summary = self.paths.load_samples_summary() + except FileNotFoundError: + return + + max_log_likelihood_sample = samples_summary.max_log_likelihood_sample + log_likelihood_old = samples_summary.max_log_likelihood_sample.log_likelihood + + log_likelihood_new = fitness(parameters=max_log_likelihood_sample) + + if not np.isclose(log_likelihood_old, log_likelihood_new): + raise exc.SearchException( + f""" + Figure of merit sanity check failed. + + This means that the existing results of a model fit used a different + likelihood function compared to the one implemented now. + Old Figure of Merit = {log_likelihood_old} + New Figure of Merit = {log_likelihood_new} + """ + ) diff --git a/autofit/non_linear/paths/abstract.py b/autofit/non_linear/paths/abstract.py index f81c18862..f5460fe2c 100644 --- a/autofit/non_linear/paths/abstract.py +++ b/autofit/non_linear/paths/abstract.py @@ -427,7 +427,7 @@ def save_samples_summary(self, samples_summary : SamplesSummary): Save samples summary to the database. """ - def load_samples_summary(self): + def load_samples_summary(self) -> SamplesSummary: """ Load samples summary from the database. """ diff --git a/autofit/non_linear/search/abstract_search.py b/autofit/non_linear/search/abstract_search.py index 7b1ae34d0..52b5d56fa 100644 --- a/autofit/non_linear/search/abstract_search.py +++ b/autofit/non_linear/search/abstract_search.py @@ -1,6 +1,7 @@ import copy import logging import multiprocessing as mp +import numpy as np import os import time import warnings @@ -11,7 +12,6 @@ from typing import Optional, Union, Tuple, List, Dict from autoconf import conf, cached_property -from autoconf.dictable import to_dict, from_dict from autofit import exc, jax_wrapper from autofit.database.sqlalchemy_ import sa from autofit.graphical import ( From ace74cf0862cf29773cbe4a6d0c801d5264aa00e Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Sun, 24 Mar 2024 16:35:45 +0000 Subject: [PATCH 20/22] likelihood check now works --- autofit/config/general.yaml | 1 + autofit/non_linear/fitness.py | 12 +++++++----- autofit/non_linear/search/nest/nautilus/search.py | 1 + 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/autofit/config/general.yaml b/autofit/config/general.yaml index 42a604312..9ecbb7b8b 100644 --- a/autofit/config/general.yaml +++ b/autofit/config/general.yaml @@ -27,6 +27,7 @@ profiling: should_profile: false # If True, the ``profile_log_likelihood_function()`` function of an analysis class is called throughout a model-fit, profiling run times. repeats: 1 # The number of repeat function calls used to measure run-times when profiling. test: + check_likelihood_function: true # if True, when a search is resumed the likelihood of a previous sample is recalculated to ensure it is consistent with the previous run. exception_override: false lh_timeout_seconds: # If a float is input, the log_likelihood_function call is timed out after this many seconds, to diagnose infinite loops. Default is None, meaning no timeout. parallel_profile: false diff --git a/autofit/non_linear/fitness.py b/autofit/non_linear/fitness.py index 270dffe89..57468b6db 100644 --- a/autofit/non_linear/fitness.py +++ b/autofit/non_linear/fitness.py @@ -150,9 +150,7 @@ def __call__(self, parameters, *kwargs): return figure_of_merit - def check_log_likelihood( - self, fitness - ): + def check_log_likelihood(self, fitness): """ Changes to the PyAutoGalaxy source code may inadvertantly change the numerics of how a log likelihood is computed. Equally, one may set off a model-fit that resumes from previous results, but change the settings of @@ -180,7 +178,7 @@ def check_log_likelihood( if os.environ.get("PYAUTOFIT_TEST_MODE") == "1": return - if not conf.instance["general"]["test"]["check_figure_of_merit_sanity"]: + if not conf.instance["general"]["test"]["check_likelihood_function"]: return try: @@ -191,7 +189,9 @@ def check_log_likelihood( max_log_likelihood_sample = samples_summary.max_log_likelihood_sample log_likelihood_old = samples_summary.max_log_likelihood_sample.log_likelihood - log_likelihood_new = fitness(parameters=max_log_likelihood_sample) + parameters = max_log_likelihood_sample.parameter_lists_for_model(model=self.model) + + log_likelihood_new = fitness(parameters=parameters) if not np.isclose(log_likelihood_old, log_likelihood_new): raise exc.SearchException( @@ -204,3 +204,5 @@ def check_log_likelihood( New Figure of Merit = {log_likelihood_new} """ ) + + diff --git a/autofit/non_linear/search/nest/nautilus/search.py b/autofit/non_linear/search/nest/nautilus/search.py index 73cddeaa0..f4dab2ce2 100644 --- a/autofit/non_linear/search/nest/nautilus/search.py +++ b/autofit/non_linear/search/nest/nautilus/search.py @@ -114,6 +114,7 @@ def _fit(self, model: AbstractPriorModel, analysis): fitness = Fitness( model=model, analysis=analysis, + paths=self.paths, fom_is_log_likelihood=True, resample_figure_of_merit=-1.0e99, ) From 186d3377324cf0b36bff5a87a631b551edc2f22d Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Sun, 24 Mar 2024 16:38:45 +0000 Subject: [PATCH 21/22] update all Fitness classes with paths --- autofit/messages/transform.py | 1 + autofit/non_linear/search/mcmc/emcee/search.py | 1 + autofit/non_linear/search/mcmc/zeus/search.py | 1 + autofit/non_linear/search/nest/dynesty/search/abstract.py | 1 + autofit/non_linear/search/nest/ultranest/search.py | 1 + autofit/non_linear/search/optimize/drawer/search.py | 1 + autofit/non_linear/search/optimize/lbfgs/search.py | 1 + 7 files changed, 7 insertions(+) diff --git a/autofit/messages/transform.py b/autofit/messages/transform.py index 34132027f..1778d1d7d 100644 --- a/autofit/messages/transform.py +++ b/autofit/messages/transform.py @@ -349,6 +349,7 @@ def transform(self, p): def inv_transform(self, x): expx = np.exp(x) + print(expx) return expx / (expx.sum(axis=self.axis, keepdims=True) + 1) def jacobian(self, p): diff --git a/autofit/non_linear/search/mcmc/emcee/search.py b/autofit/non_linear/search/mcmc/emcee/search.py index 57830d33f..0f9e762da 100644 --- a/autofit/non_linear/search/mcmc/emcee/search.py +++ b/autofit/non_linear/search/mcmc/emcee/search.py @@ -103,6 +103,7 @@ def _fit(self, model: AbstractPriorModel, analysis): fitness = Fitness( model=model, analysis=analysis, + paths=self.paths, fom_is_log_likelihood=False, resample_figure_of_merit=-np.inf ) diff --git a/autofit/non_linear/search/mcmc/zeus/search.py b/autofit/non_linear/search/mcmc/zeus/search.py index 81dd60cd6..18e014152 100644 --- a/autofit/non_linear/search/mcmc/zeus/search.py +++ b/autofit/non_linear/search/mcmc/zeus/search.py @@ -124,6 +124,7 @@ def _fit(self, model: AbstractPriorModel, analysis): fitness = Fitness( model=model, analysis=analysis, + paths=self.paths, fom_is_log_likelihood=False, resample_figure_of_merit=-np.inf ) diff --git a/autofit/non_linear/search/nest/dynesty/search/abstract.py b/autofit/non_linear/search/nest/dynesty/search/abstract.py index afed6c3c6..ad0c0cb6f 100644 --- a/autofit/non_linear/search/nest/dynesty/search/abstract.py +++ b/autofit/non_linear/search/nest/dynesty/search/abstract.py @@ -119,6 +119,7 @@ def _fit( fitness = Fitness( model=model, analysis=analysis, + paths=self.paths, fom_is_log_likelihood=True, resample_figure_of_merit=-1.0e99, ) diff --git a/autofit/non_linear/search/nest/ultranest/search.py b/autofit/non_linear/search/nest/ultranest/search.py index dd82f5d62..ed2001b23 100644 --- a/autofit/non_linear/search/nest/ultranest/search.py +++ b/autofit/non_linear/search/nest/ultranest/search.py @@ -117,6 +117,7 @@ def _fit(self, model: AbstractPriorModel, analysis): fitness = Fitness( model=model, analysis=analysis, + paths=self.paths, fom_is_log_likelihood=True, resample_figure_of_merit=-1.0e99 ) diff --git a/autofit/non_linear/search/optimize/drawer/search.py b/autofit/non_linear/search/optimize/drawer/search.py index 0863e04ca..f4b40aba1 100644 --- a/autofit/non_linear/search/optimize/drawer/search.py +++ b/autofit/non_linear/search/optimize/drawer/search.py @@ -100,6 +100,7 @@ def _fit(self, model: AbstractPriorModel, analysis): fitness = Fitness( model=model, analysis=analysis, + paths=self.paths, fom_is_log_likelihood=False, resample_figure_of_merit=-np.inf, convert_to_chi_squared=False diff --git a/autofit/non_linear/search/optimize/lbfgs/search.py b/autofit/non_linear/search/optimize/lbfgs/search.py index c1531aa25..6875f505a 100644 --- a/autofit/non_linear/search/optimize/lbfgs/search.py +++ b/autofit/non_linear/search/optimize/lbfgs/search.py @@ -107,6 +107,7 @@ def _fit( fitness = Fitness( model=model, analysis=analysis, + paths=self.paths, fom_is_log_likelihood=False, resample_figure_of_merit=-np.inf, convert_to_chi_squared=True From 1757da67e634a5d888756925dfa5f7002ebc0bbe Mon Sep 17 00:00:00 2001 From: James Nightingale Date: Sun, 24 Mar 2024 17:09:35 +0000 Subject: [PATCH 22/22] update docstring and make paths for fitness optional --- autofit/non_linear/fitness.py | 51 ++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/autofit/non_linear/fitness.py b/autofit/non_linear/fitness.py index 57468b6db..f6e51852c 100644 --- a/autofit/non_linear/fitness.py +++ b/autofit/non_linear/fitness.py @@ -1,10 +1,15 @@ import numpy as np import os +from typing import Optional from autoconf import conf from autofit import exc +from autofit.mapper.prior_model.abstract import AbstractPriorModel +from autofit.non_linear.paths.abstract import AbstractPaths +from autofit.non_linear.analysis import Analysis + from timeout_decorator import timeout from autofit import jax_wrapper @@ -24,42 +29,42 @@ def get_timeout_seconds(): class Fitness: def __init__( self, - model, - analysis, - paths = None, + model : AbstractPriorModel, + analysis : Analysis, + paths : Optional[AbstractPaths] = None, fom_is_log_likelihood: bool = True, resample_figure_of_merit: float = -np.inf, convert_to_chi_squared: bool = False, ): """ - Interfaces with any non-linear in order to fit a model to the data and return a log likelihood via - an `Analysis` class. + Interfaces with any non-linear search to fit the model to the data and return a log likelihood via + the analysis. - The interface of a non-linear search and a fitness function can be summarized as follows: + The interface of a non-linear search and fitness function is summarized as follows: - 1) The non-linear search chooses a new set of parameters for the model, which are passed to the fitness + 1) The non-linear search samples a new set of model parameters, which are passed to the fitness function's `__call__` method. - 2) The parameter values (typically a list) are mapped to an instance of the model (via its priors if - appropriate for the non-linear search). + 2) The list of parameter values are mapped to an instance of the model. 3) The instance is passed to the analysis class's log likelihood function, which fits the model to the data and returns the log likelihood. 4) A final figure-of-merit is computed and returned to the non-linear search, which is either the log - likelihood or log posterior depending on the type of non-linear search. + likelihood or log posterior (e.g. adding the log prior to the log likelihood). - It is common for nested sampling algorithms to require that the figure of merit returned is a log likelihood - as priors are often built into the mapping of values from a unit hyper-cube. Optimizers and MCMC methods - typically require that the figure of merit returned is a log posterior, with the prior terms added via this - fitness function. This is not a strict rule, but is a good default. + Certain searches (commonly nested samplers) require the parameters to be mapped from unit values to physical + values, which is performed internally by the fitness object in step 2. - Some methods also require a chi-squared value to be computed (which is minimized), which is the log likelihood - multiplied by -2.0. The `Fitness` class can also compute this value, if the `convert_to_chi_squared` bool is - `True`. + Certain searches require the returned figure of merit to be a log posterior (often MCMC methods) whereas + others require it to be a log likelihood (often nested samples which account for priors internally) in step 4. + Which values is returned by the `fom_is_log_likelihood` bool. - If a model-fit raises an exception of returns a `np.nan` a `resample_figure_of_merit` value is returned. The - appropriate value depends on the non-linear search, but is typically either `None`, `-np.inf` or `1.0e99`. + Some searches require a chi-squared value (which they minimized), given by the log likelihood multiplied + by -2.0. This is returned by the fitness if the `convert_to_chi_squared` bool is `True`. + + If a model-fit raises an exception or returns a `np.nan`, a `resample_figure_of_merit` value is returned + instead. The appropriate value depends on the search, but is typically either `None`, `-np.inf` or `1.0e99`. All values indicate to the non-linear search that the model-fit should be resampled or ignored. Parameters @@ -70,6 +75,9 @@ def __init__( model The model that is fitted to the data, which is used by the non-linear search to create instances of the model that are fitted to the data via the log likelihood function. + paths + The paths of the search, which if the search is being resumed from an old run is used to check that + the likelihood function has not changed from the previous run. fom_is_log_likelihood If `True`, the figure of merit returned by the fitness function is the log likelihood. If `False`, the figure of merit is the log posterior. @@ -88,7 +96,8 @@ def __init__( self.convert_to_chi_squared = convert_to_chi_squared self._log_likelihood_function = None - self.check_log_likelihood(fitness=self) + if self.paths is not None: + self.check_log_likelihood(fitness=self) def __getstate__(self): state = self.__dict__.copy() @@ -169,7 +178,7 @@ def check_log_likelihood(self, fitness): Parameters ---------- paths - The PyAutoFit paths object which manages all paths, e.g. where the non-linear search outputs are stored, + certain searches the non-linear search outputs are stored, visualization, and pickled objects used by the database and aggregator. result The result containing the maximum log likelihood fit of the model.