diff --git a/.idea/autora.iml b/.idea/autora.iml index eeda8900f..7bfbf6e7a 100644 --- a/.idea/autora.iml +++ b/.idea/autora.iml @@ -2,7 +2,6 @@ - diff --git a/autora/synthetic/__init__.py b/autora/synthetic/__init__.py new file mode 100644 index 000000000..e2d0b94aa --- /dev/null +++ b/autora/synthetic/__init__.py @@ -0,0 +1,77 @@ +""" +Provides an interface for loading and saving synthetic experiments. + +Examples: + The registry is accessed using the `retrieve` function, optionally setting parameters: + >>> from autora.synthetic import retrieve, describe + >>> import numpy as np + >>> s = retrieve("weber_fechner",rng=np.random.default_rng(seed=180)) # the Weber-Fechner Law + + Use the describe function to give information about the synthetic experiment: + >>> describe(s) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + Weber-Fechner Law... + + The synthetic experiement `s` has properties like the name of the experiment: + >>> s.name + 'Weber-Fechner Law' + + ... a valid metadata description: + >>> s.metadata # doctest: +ELLIPSIS + VariableCollection(...) + + ... a function to generate the full domain of the data (if possible) + >>> x = s.domain() + >>> x # doctest: +ELLIPSIS + array([[0...]]) + + ... the experiment_runner runner which can be called to generate experimental results: + >>> import numpy as np + >>> y = s.experiment_runner(x) # doctest: +ELLIPSIS + >>> y + array([[ 0.00433955], + [ 1.79114625], + [ 2.39473454], + ..., + [ 0.00397802], + [ 0.01922405], + [-0.00612883]]) + + ... a function to plot the ground truth: + >>> s.plotter() + + ... against a fitted model if it exists: + >>> from sklearn.linear_model import LinearRegression + >>> model = LinearRegression().fit(x, y) + >>> s.plotter(model) + + These can be used to run a full experimental cycle + >>> from autora.experimentalist.pipeline import make_pipeline + >>> from autora.experimentalist.pooler.general_pool import grid_pool + >>> from autora.experimentalist.sampler.random import random_sampler + >>> from functools import partial + >>> import random + >>> metadata = s.metadata + >>> pool = partial(grid_pool, ivs=metadata.independent_variables) + >>> random.seed(181) # set the seed for the random sampler + >>> sampler = partial(random_sampler, n=20) + >>> experimentalist_pipeline = make_pipeline([pool, sampler]) + + >>> from autora.cycle import Cycle + >>> theorist = LinearRegression() + + >>> cycle = Cycle(metadata=metadata, experimentalist=experimentalist_pipeline, + ... experiment_runner=s.experiment_runner, theorist=theorist) + + >>> c = cycle.run(10) + >>> c.data.theories[-1].coef_ # doctest: +ELLIPSIS + array([-0.53610647, 0.58457307]) +""" + +from autora.synthetic import data +from autora.synthetic.inventory import ( + Inventory, + SyntheticExperimentCollection, + describe, + register, + retrieve, +) diff --git a/autora/synthetic/data/__init__.py b/autora/synthetic/data/__init__.py new file mode 100644 index 000000000..394d81233 --- /dev/null +++ b/autora/synthetic/data/__init__.py @@ -0,0 +1,2 @@ +""" Models bundled with AutoRA. """ +from . import expected_value, prospect_theory, weber_fechner diff --git a/autora/synthetic/data/expected_value.py b/autora/synthetic/data/expected_value.py new file mode 100644 index 000000000..a5c86f937 --- /dev/null +++ b/autora/synthetic/data/expected_value.py @@ -0,0 +1,184 @@ +from functools import partial + +import numpy as np + +from autora.variable import DV, IV, ValueType, VariableCollection + +from ..inventory import SyntheticExperimentCollection, register + + +def get_metadata(minimum_value, maximum_value, resolution): + v_a = IV( + name="V_A", + allowed_values=np.linspace( + minimum_value, + maximum_value, + resolution, + ), + value_range=(minimum_value, maximum_value), + units="dollar", + variable_label="Value of Option A", + type=ValueType.REAL, + ) + + v_b = IV( + name="V_B", + allowed_values=np.linspace( + minimum_value, + maximum_value, + resolution, + ), + value_range=(minimum_value, maximum_value), + units="dollar", + variable_label="Value of Option B", + type=ValueType.REAL, + ) + + p_a = IV( + name="P_A", + allowed_values=np.linspace(0, 1, resolution), + value_range=(0, 1), + units="probability", + variable_label="Probability of Option A", + type=ValueType.REAL, + ) + + p_b = IV( + name="P_B", + allowed_values=np.linspace(0, 1, resolution), + value_range=(0, 1), + units="probability", + variable_label="Probability of Option B", + type=ValueType.REAL, + ) + + dv1 = DV( + name="choose_A", + value_range=(0, 1), + units="probability", + variable_label="Probability of Choosing Option A", + type=ValueType.PROBABILITY, + ) + + metadata_ = VariableCollection( + independent_variables=[v_a, p_a, v_b, p_b], + dependent_variables=[dv1], + ) + return metadata_ + + +def expected_value_theory( + name="Expected Value Theory", + choice_temperature: float = 0.1, + value_lambda: float = 0.5, + resolution=10, + minimum_value=-1, + maximum_value=1, + added_noise: float = 0.01, + rng=np.random.default_rng(), +): + + params = dict( + name=name, + minimum_value=minimum_value, + maximum_value=maximum_value, + resolution=resolution, + choice_temperature=choice_temperature, + value_lambda=value_lambda, + added_noise=added_noise, + random_number_generator=rng, + ) + + metadata = get_metadata( + minimum_value=minimum_value, maximum_value=maximum_value, resolution=resolution + ) + + def experiment_runner(X: np.ndarray, added_noise_=added_noise): + + Y = np.zeros((X.shape[0], 1)) + for idx, x in enumerate(X): + value_A = value_lambda * x[0] + value_B = value_lambda * x[2] + + probability_a = x[1] + probability_b = x[3] + + expected_value_A = value_A * probability_a + rng.normal(0, added_noise_) + expected_value_B = value_B * probability_b + rng.normal(0, added_noise_) + + # compute probability of choosing option A + p_choose_A = np.exp(expected_value_A / choice_temperature) / ( + np.exp(expected_value_A / choice_temperature) + + np.exp(expected_value_B / choice_temperature) + ) + + Y[idx] = p_choose_A + + return Y + + ground_truth = partial(experiment_runner, added_noise_=0.0) + + def domain(): + X = np.array( + np.meshgrid([x.allowed_values for x in metadata.independent_variables]) + ).T.reshape(-1, 4) + return X + + def plotter(model=None): + import matplotlib.colors as mcolors + import matplotlib.pyplot as plt + + v_a_list = [-1, 0.5, 1] + v_b = 0.5 + p_b = 0.5 + p_a = np.linspace(0, 1, 100) + + for idx, v_a in enumerate(v_a_list): + X = np.zeros((len(p_a), 4)) + X[:, 0] = v_a + X[:, 1] = p_a + X[:, 2] = v_b + X[:, 3] = p_b + + y = ground_truth(X) + colors = mcolors.TABLEAU_COLORS + col_keys = list(colors.keys()) + plt.plot( + p_a, y, label=f"$V(A) = {v_a}$ (Original)", c=colors[col_keys[idx]] + ) + if model is not None: + y = model.predict(X) + plt.plot( + p_a, + y, + label=f"$V(A) = {v_a}$ (Recovered)", + c=colors[col_keys[idx]], + linestyle="--", + ) + + x_limit = [0, metadata.independent_variables[1].value_range[1]] + y_limit = [0, 1] + x_label = "Probability of Choosing Option A" + y_label = "Probability of Obtaining V(A)" + + plt.xlim(x_limit) + plt.ylim(y_limit) + plt.xlabel(x_label, fontsize="large") + plt.ylabel(y_label, fontsize="large") + plt.legend(loc=2, fontsize="medium") + plt.title(name, fontsize="x-large") + plt.show() + + collection = SyntheticExperimentCollection( + name=name, + metadata=metadata, + experiment_runner=experiment_runner, + ground_truth=ground_truth, + domain=domain, + plotter=plotter, + params=params, + ) + return collection + + +register("expected_value", expected_value_theory) diff --git a/autora/synthetic/data/prospect_theory.py b/autora/synthetic/data/prospect_theory.py new file mode 100644 index 000000000..2344e790b --- /dev/null +++ b/autora/synthetic/data/prospect_theory.py @@ -0,0 +1,198 @@ +from functools import partial + +import numpy as np + +from ..inventory import SyntheticExperimentCollection, register +from .expected_value import get_metadata + + +def prospect_theory( + name="Prospect Theory", + added_noise=0.01, + choice_temperature=0.1, + value_alpha=0.88, + value_beta=0.88, + value_lambda=2.25, + probability_alpha=0.61, + probability_beta=0.69, + resolution=10, + minimum_value=-1, + maximum_value=1, + rng=np.random.default_rng(), +): + """ + Parameters from + D. Kahneman, A. Tversky, Prospect theory: An analysis of decision under risk. + Econometrica 47, 263–292 (1979). doi:10.2307/1914185 + + Power value function according to: + - A. Tversky, D. Kahneman, Advances in prospect theory: Cumulative representation of + uncertainty. J. Risk Uncertain. 5, 297–323 (1992). doi:10.1007/BF00122574 + + - I. Gilboa, Expected utility with purely subjective non-additive probabilities. + J. Math. Econ. 16, 65–88 (1987). doi:10.1016/0304-4068(87)90022-X + + - D. Schmeidler, Subjective probability and expected utility without additivity. + Econometrica 57, 571 (1989). doi:10.2307/1911053 + + Probability function according to: + A. Tversky, D. Kahneman, Advances in prospect theory: Cumulative representation of + uncertainty. J. Risk Uncertain. 5, 297–323 (1992). doi:10.1007/BF00122574 + + """ + + params = dict( + added_noise=added_noise, + choice_temperature=choice_temperature, + value_alpha=value_alpha, + value_beta=value_beta, + value_lambda=value_lambda, + probability_alpha=probability_alpha, + probability_beta=probability_beta, + resolution=resolution, + minimum_value=minimum_value, + maximum_value=maximum_value, + rng=rng, + name=name, + ) + + metadata = get_metadata( + minimum_value=minimum_value, maximum_value=maximum_value, resolution=resolution + ) + + def experiment_runner(X: np.ndarray, added_noise_=added_noise): + + Y = np.zeros((X.shape[0], 1)) + for idx, x in enumerate(X): + + # power value function according to: + + # A. Tversky, D. Kahneman, Advances in prospect theory: Cumulative representation of + # uncertainty. J. Risk Uncertain. 5, 297–323 (1992). doi:10.1007/BF00122574 + + # I. Gilboa, Expected utility with purely subjective non-additive probabilities. + # J. Math. Econ. 16, 65–88 (1987). doi:10.1016/0304-4068(87)90022-X + + # D. Schmeidler, Subjective probability and expected utility without additivity. + # Econometrica 57, 571 (1989). doi:10.2307/1911053 + + # compute value of option A + if x[0] > 0: + value_A = x[0] ** value_alpha + else: + value_A = -value_lambda * (-x[0]) ** (value_beta) + + # compute value of option B + if x[2] > 0: + value_B = x[2] ** value_alpha + else: + value_B = -value_lambda * (-x[2]) ** (value_beta) + + # probability function according to: + + # A. Tversky, D. Kahneman, Advances in prospect theory: Cumulative representation of + # uncertainty. J. Risk Uncertain. 5, 297–323 (1992). doi:10.1007/BF00122574 + + # compute probability of option A + if x[0] >= 0: + coefficient = probability_alpha + else: + coefficient = probability_beta + + probability_a = x[1] ** coefficient / ( + x[1] ** coefficient + (1 - x[1]) ** coefficient + ) ** (1 / coefficient) + + # compute probability of option B + if x[2] >= 0: + coefficient = probability_alpha + else: + coefficient = probability_beta + + probability_b = x[3] ** coefficient / ( + x[3] ** coefficient + (1 - x[3]) ** coefficient + ) ** (1 / coefficient) + + expected_value_A = value_A * probability_a + rng.normal(0, added_noise_) + expected_value_B = value_B * probability_b + rng.normal(0, added_noise_) + + # compute probability of choosing option A + p_choose_A = np.exp(expected_value_A / choice_temperature) / ( + np.exp(expected_value_A / choice_temperature) + + np.exp(expected_value_B / choice_temperature) + ) + + Y[idx] = p_choose_A + + return Y + + ground_truth = partial(experiment_runner, added_noise_=0.0) + + def domain(): + v_a = metadata.independent_variables[0].allowed_values + p_a = metadata.independent_variables[1].allowed_values + v_b = metadata.independent_variables[2].allowed_values + p_b = metadata.independent_variables[3].allowed_values + + X = np.array(np.meshgrid(v_a, p_a, v_b, p_b)).T.reshape(-1, 4) + return X + + def plotter(model=None): + import matplotlib.colors as mcolors + import matplotlib.pyplot as plt + + v_a_list = [-0.5, 0.5, 1] + p_a = np.linspace(0, 1, 100) + + v_b = 0.5 + p_b = 0.5 + + for idx, v_a in enumerate(v_a_list): + X = np.zeros((len(p_a), 4)) + X[:, 0] = v_a + X[:, 1] = p_a + X[:, 2] = v_b + X[:, 3] = p_b + + y = ground_truth(X) + colors = mcolors.TABLEAU_COLORS + col_keys = list(colors.keys()) + plt.plot( + p_a, y, label=f"$V(A) = {v_a}$ (Original)", c=colors[col_keys[idx]] + ) + if model is not None: + y = model.predict(X) + plt.plot( + p_a, + y, + label=f"$V(A) = {v_a}$ (Recovered)", + c=colors[col_keys[idx]], + linestyle="--", + ) + + x_limit = [0, metadata.independent_variables[1].value_range[1]] + y_limit = [0, 1] + x_label = "Probability of Choosing Option A" + y_label = "Probability of Obtaining V(A)" + + plt.xlim(x_limit) + plt.ylim(y_limit) + plt.xlabel(x_label, fontsize="large") + plt.ylabel(y_label, fontsize="large") + plt.legend(loc=2, fontsize="medium") + plt.title(name, fontsize="x-large") + plt.show() + + collection = SyntheticExperimentCollection( + name=name, + params=params, + metadata=metadata, + domain=domain, + experiment_runner=experiment_runner, + ground_truth=ground_truth, + plotter=plotter, + ) + return collection + + +register("prospect_theory", prospect_theory) diff --git a/autora/synthetic/data/weber_fechner.py b/autora/synthetic/data/weber_fechner.py new file mode 100644 index 000000000..ac5e56ab4 --- /dev/null +++ b/autora/synthetic/data/weber_fechner.py @@ -0,0 +1,158 @@ +from functools import partial + +import numpy as np + +from autora.variable import DV, IV, ValueType, VariableCollection + +from ..inventory import SyntheticExperimentCollection, register + + +def weber_fechner_law( + name="Weber-Fechner Law", + resolution=100, + constant=1.0, + maximum_stimulus_intensity=5.0, + added_noise=0.01, + rng=np.random.default_rng(), +): + """Weber-Fechner Law. + + Args: + name: name of the experiment + resolution: number of allowed values for stimulus 1 and 2 + constant: constant multiplier + maximum_stimulus_intensity: maximum value for stimulus 1 and 2 + added_noise: standard deviation of normally distributed noise added to y-values + rng: `np.random` random number generator to use for generating noise + + Returns: + + """ + + params = dict( + added_noise=added_noise, + name=name, + resolution=resolution, + constant=constant, + maximum_stimulus_intensity=maximum_stimulus_intensity, + rng=rng, + ) + + iv1 = IV( + name="S1", + allowed_values=np.linspace( + 1 / resolution, maximum_stimulus_intensity, resolution + ), + value_range=(1 / resolution, maximum_stimulus_intensity), + units="intensity", + variable_label="Stimulus 1 Intensity", + type=ValueType.REAL, + ) + + iv2 = IV( + name="S2", + allowed_values=np.linspace( + 1 / resolution, maximum_stimulus_intensity, resolution + ), + value_range=(1 / resolution, maximum_stimulus_intensity), + units="intensity", + variable_label="Stimulus 2 Intensity", + type=ValueType.REAL, + ) + + dv1 = DV( + name="difference_detected", + value_range=(0, maximum_stimulus_intensity), + units="sensation", + variable_label="Sensation", + type=ValueType.REAL, + ) + + metadata = VariableCollection( + independent_variables=[iv1, iv2], + dependent_variables=[dv1], + ) + + def experiment_runner( + X: np.ndarray, + std: float = 0.01, + ): + Y = np.zeros((X.shape[0], 1)) + for idx, x in enumerate(X): + # jnd = np.min(x) * weber_constant + # response = (x[1]-x[0]) - jnd + # y = 1/(1+np.exp(-response)) + np.random.normal(0, std) + y = constant * np.log(x[1] / x[0]) + rng.normal(0, std) + Y[idx] = y + + return Y + + ground_truth = partial(experiment_runner, std=0.0) + + def domain(): + s1_values = metadata.independent_variables[0].allowed_values + s2_values = metadata.independent_variables[1].allowed_values + X = np.array(np.meshgrid(s1_values, s2_values)).T.reshape(-1, 2) + # remove all combinations where s1 > s2 + X = X[X[:, 0] <= X[:, 1]] + return X + + def plotter( + model=None, + ): + import matplotlib.colors as mcolors + import matplotlib.pyplot as plt + + colors = mcolors.TABLEAU_COLORS + col_keys = list(colors.keys()) + + S0_list = [1, 2, 4] + delta_S = np.linspace(0, 5, 100) + + for idx, S0_value in enumerate(S0_list): + S0 = S0_value + np.zeros(delta_S.shape) + S1 = S0 + delta_S + X = np.array([S0, S1]).T + y = ground_truth(X) + plt.plot( + delta_S, + y, + label=f"$S_0 = {S0_value}$ (Original)", + c=colors[col_keys[idx]], + ) + if model is not None: + y = model.predict(X) + plt.plot( + delta_S, + y, + label=f"$S_0 = {S0_value}$ (Recovered)", + c=colors[col_keys[idx]], + linestyle="--", + ) + + x_limit = [0, metadata.independent_variables[0].value_range[1]] + y_limit = [0, 2] + x_label = r"Stimulus Intensity Difference $\Delta S = S_1 - S_0$" + y_label = "Perceived Intensity of Stimulus $S_1$" + + plt.xlim(x_limit) + plt.ylim(y_limit) + plt.xlabel(x_label, fontsize="large") + plt.ylabel(y_label, fontsize="large") + plt.legend(loc=2, fontsize="medium") + plt.title("Weber-Fechner Law", fontsize="x-large") + plt.show() + + collection = SyntheticExperimentCollection( + name=name, + metadata=metadata, + experiment_runner=experiment_runner, + ground_truth=ground_truth, + domain=domain, + plotter=plotter, + params=params, + ) + return collection + + +register("weber_fechner", weber_fechner_law) diff --git a/autora/synthetic/inventory.py b/autora/synthetic/inventory.py new file mode 100644 index 000000000..4d75be832 --- /dev/null +++ b/autora/synthetic/inventory.py @@ -0,0 +1,205 @@ +""" +Module for registering and retrieving synthetic models from an inventory. + +Examples: + To add and recover a new model from the inventory, we need to define it using a function + (closure). + We start by importing the modules we'll need: + >>> from functools import partial + >>> import matplotlib.pyplot as plt + >>> import numpy as np + >>> from autora.synthetic import register, retrieve, describe, SyntheticExperimentCollection + >>> from autora.variable import IV, DV, VariableCollection + + Then we can define the function. We define all the arguments we want and add them to a + dictionary. The closure – in this case `sinusoid_experiment` – is the scope for all + the parameters we need. + >>> def sinusoid_experiment(omega=np.pi/3, delta=np.pi/2., m=0.3, resolution=1000, + ... rng=np.random.default_rng()): + ... \"\"\"Shifted sinusoid experiment, combining a sinusoid and a gradient drift. + ... Ground truth: y = sin((x - delta) * omega) + (x * m) + ... Parameters: + ... omega: angular speed in radians + ... delta: offset in radians + ... m: drift gradient in [radians ^ -1] + ... resolution: number of x values + ... \"\"\" + ... + ... name = "Shifted Sinusoid" + ... + ... params = dict(omega=omega, delta=delta, resolution=resolution, m=m, rng=rng) + ... + ... x = IV(name="x", value_range=(-6 * np.pi, 6 * np.pi)) + ... y = DV(name="y", value_range=(-1, 1)) + ... metadata = VariableCollection(independent_variables=[x], dependent_variables=[y]) + ... + ... def domain(): + ... return np.linspace(*x.value_range, resolution).reshape(-1, 1) + ... + ... def experiment_runner(X, std=0.1): + ... return np.sin((X - delta) * omega) + (X * m) + rng.normal(0, std, X.shape) + ... + ... def ground_truth(X): + ... return experiment_runner(X, std=0.) + ... + ... def plotter(model=None): + ... plt.plot(domain(), ground_truth(domain()), label="Ground Truth") + ... if model is not None: + ... plt.plot(domain(), model.predict(domain()), label="Model") + ... plt.title(name) + ... + ... collection = SyntheticExperimentCollection( + ... name=name, + ... params=params, + ... metadata=metadata, + ... domain=domain, + ... experiment_runner=experiment_runner, + ... ground_truth=ground_truth, + ... plotter=plotter, + ... ) + ... + ... return collection + + Then we can register the experiment. We register the function, rather than evaluating it. + >>> register("sinusoid_experiment", sinusoid_experiment) + + When we want to retrieve the experiment, we can just use the default values if we like: + >>> s = retrieve("sinusoid_experiment") + + We can retrieve the docstring of the model using the `describe` function + >>> describe(s) # doctest: +ELLIPSIS + Shifted sinusoid experiment, combining a sinusoid and a gradient drift. + Ground truth: y = sin((x - delta) * omega) + (x * m) + ... + + ... or using its id: + >>> describe("sinusoid_experiment") # doctest: +ELLIPSIS + Shifted sinusoid experiment, combining a sinusoid and a gradient drift. + Ground truth: y = sin((x - delta) * omega) + (x * m) + ... + + ... or we can look at the closure function directly: + >>> describe(sinusoid_experiment) # doctest: +ELLIPSIS + Shifted sinusoid experiment, combining a sinusoid and a gradient drift. + Ground truth: y = sin((x - delta) * omega) + (x * m) + ... + + The object returned includes all the used parameters as a dictionary + >>> s.params # doctest: +ELLIPSIS + {'omega': 1.0..., 'delta': 1.5..., 'resolution': 1000, 'm': 0.3, ...} + + If we need to modify the parameter values, we can pass them as arguments to the retrieve + function: + >>> t = retrieve("sinusoid_experiment",delta=0.2) + >>> t.params # doctest: +ELLIPSIS + {..., 'delta': 0.2, ...} +""" + + +from __future__ import annotations + +from dataclasses import dataclass +from functools import singledispatch +from typing import Any, Callable, Dict, Optional, Protocol, runtime_checkable + +from autora.variable import VariableCollection + + +@runtime_checkable +class _SyntheticExperimentClosure(Protocol): + """A function which returns a SyntheticExperimentCollection.""" + + def __call__(self, *args, **kwargs) -> SyntheticExperimentCollection: + ... + + +class _SupportsPredict(Protocol): + def predict(self, X) -> Any: + ... + + +@dataclass +class SyntheticExperimentCollection: + """ + Represents a synthetic experiment. + + Attributes: + name: the name of the theory + params: a dictionary with the settable parameters of the model and their respective values + metadata: a VariableCollection describing the variables of the model + domain: a function which returns all the available X values for the model + experiment_runner: a function which takes X values and returns simulated y values **with + statistical noise** + ground_truth: a function which takes X values and returns simulated y values **without any + statistical noise** + plotter: a function which plots the ground truth and, optionally, a model with a + `predict` method (e.g. scikit-learn estimators) + """ + + name: Optional[str] = None + params: Optional[Dict] = None + metadata: Optional[VariableCollection] = None + domain: Optional[Callable] = None + experiment_runner: Optional[Callable] = None + ground_truth: Optional[Callable] = None + plotter: Optional[Callable[[Optional[_SupportsPredict]], None]] = None + closure: Optional[Callable] = None + + +Inventory: Dict[str, _SyntheticExperimentClosure] = dict() +""" The dictionary of `SyntheticExperimentCollection`. """ + + +def register(id_: str, closure: _SyntheticExperimentClosure) -> None: + """ + Add a new synthetic experiment to the Inventory. + + Parameters: + id_: the unique id for the model. + closure: a function which returns a SyntheticExperimentCollection + + """ + Inventory[id_] = closure + + +def retrieve(id_: str, **kwargs) -> SyntheticExperimentCollection: + """ + Retrieve a synthetic experiment from the Inventory. + + Parameters: + id_: the unique id for the model + **kwargs: keyword arguments for the synthetic experiment (metadata, coefficients etc.) + Returns: + the synthetic experiment + """ + closure: _SyntheticExperimentClosure = Inventory[id_] + evaluated_closure = closure(**kwargs) + evaluated_closure.closure = closure + return evaluated_closure + + +@singledispatch +def describe(arg): + """ + Print the docstring for a synthetic experiment. + + Args: + arg: the experiment's ID, an object returned from the `retrieve` function, or a closure + which creates a new experiment. + """ + raise NotImplementedError(f"{arg=} not yet supported") + + +@describe.register +def _(closure: _SyntheticExperimentClosure): + print(closure.__doc__) + + +@describe.register +def _(collection: SyntheticExperimentCollection): + describe(collection.closure) + + +@describe.register +def _(id_: str): + describe(retrieve(id_)) diff --git a/example/synthetic/inventory.ipynb b/example/synthetic/inventory.ipynb new file mode 100644 index 000000000..edd49aef2 --- /dev/null +++ b/example/synthetic/inventory.ipynb @@ -0,0 +1,57 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "\n", + "from autora.synthetic import retrieve, Inventory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "for id in Inventory.keys():\n", + " s = retrieve(id)\n", + " print(s)\n", + " X = s.domain()\n", + " y_exp = s.experiment_runner(X)\n", + " y_gt = s.ground_truth(X)\n", + " s.plotter() # without model\n", + " fitter = LinearRegression().fit(X, y_exp)\n", + " s.plotter(fitter)\n" + ], + "metadata": { + "collapsed": false + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/tests/test_synthetic_inventory.py b/tests/test_synthetic_inventory.py new file mode 100644 index 000000000..9070c6836 --- /dev/null +++ b/tests/test_synthetic_inventory.py @@ -0,0 +1,22 @@ +import autora.synthetic +from autora.synthetic.inventory import SyntheticExperimentCollection +from autora.variable import VariableCollection + + +def test_model_registration_retrieval(): + # We can register a model and retrieve it + autora.synthetic.register("empty", lambda: SyntheticExperimentCollection()) + empty = autora.synthetic.retrieve("empty") + assert empty.name is None + + # We can register another model and retrieve it as well + autora.synthetic.register( + "only_metadata", + lambda: SyntheticExperimentCollection(metadata=VariableCollection()), + ) + only_metadata = autora.synthetic.retrieve("only_metadata") + assert only_metadata.metadata is not None + + # We can still retrieve the first model, and it is equal to the first version + empty_copy = autora.synthetic.retrieve("empty") + assert empty_copy == empty