Skip to content

Commit

Permalink
Input_file_handling_robustness (#107)
Browse files Browse the repository at this point in the history
* Improve flexibility of input file handling

* Split input dict processing from yaml read

* Repair tests

* Update cfspopcon/input_file_handling.py

* Update cfspopcon/input_file_handling.py

Co-authored-by: Christoph Hasse <[email protected]>

---------

Co-authored-by: Christoph Hasse <[email protected]>
  • Loading branch information
tbody-cfs and hassec authored Sep 19, 2024
1 parent 520f458 commit f4c3431
Show file tree
Hide file tree
Showing 21 changed files with 181 additions and 58 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -141,4 +141,7 @@ radas_dir/*
popcon_algorithms.yaml

# Have an untracked folder for rough working
untracked/
untracked/
# Have a cases folder for personal cases which shouldn't be added
# to the index
cases/*
3 changes: 2 additions & 1 deletion cfspopcon/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from . import file_io, formulas, named_options, shaping_and_selection
from .algorithm_class import Algorithm, CompositeAlgorithm
from .formulas.atomic_data import AtomicData
from .input_file_handling import read_case
from .input_file_handling import process_input_dictionary, read_case
from .plotting import read_plot_style
from .unit_handling import (
convert_to_default_units,
Expand All @@ -23,6 +23,7 @@
"named_options",
"magnitude_in_default_units",
"convert_to_default_units",
"process_input_dictionary",
"set_default_units",
"convert_units",
"read_case",
Expand Down
10 changes: 10 additions & 0 deletions cfspopcon/algorithm_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,16 @@ def function_wrapper(func: GenericFunctionType) -> GenericFunctionType:

return function_wrapper

@classmethod
def empty(cls) -> Algorithm:
"""Makes a 'do nothing' algorithm, in case you don't want to use the algorithm functionality."""

def do_nothing() -> dict[str, Any]:
result_dict: dict[str, Any] = {}
return result_dict

return cls(do_nothing, return_keys=[], name="empty", skip_registration=True)

def validate_inputs(
self, configuration: Union[dict, xr.Dataset], quiet: bool = False, raise_error_on_missing_inputs: bool = False
) -> bool:
Expand Down
18 changes: 13 additions & 5 deletions cfspopcon/file_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,13 @@
]


def sanitize_variable(val: xr.DataArray, key: str) -> Union[xr.DataArray, str]:
"""Strip units and Enum values from a variable so that it can be stored in a NetCDF file."""
def sanitize_variable(val: xr.DataArray, key: str, coord: bool = False) -> Union[xr.DataArray, str]:
"""Strip units and Enum values from a variable so that it can be stored in a NetCDF file.
If you set coord=True and you pass in a scalar val, val is wrapped in a length-1 array to
circumvent an xarray issue regarding single-value coordinates.
See https://github.com/pydata/xarray/issues/1709.
"""
try:
val = convert_to_default_units(val, key).pint.dequantify()
except KeyError:
Expand All @@ -34,9 +39,12 @@ def sanitize_variable(val: xr.DataArray, key: str) -> Union[xr.DataArray, str]:
if val.dtype == object:
try:
if val.size == 1:
val = val.item().name
if not coord:
val = val.item().name
else:
val = xr.DataArray([val.item().name])
else:
val = xr.DataArray([v.name for v in val.values])
val = xr.DataArray([v.name for v in val.values], dims=val.dims)
except AttributeError:
warnings.warn(f"Cannot handle {key}. Dropping variable.", stacklevel=3)
return "UNHANDLED"
Expand All @@ -62,7 +70,7 @@ def write_dataset_to_netcdf(

for key in serialized_dataset.coords: # type:ignore[assignment]
assert isinstance(key, str)
serialized_dataset[key] = sanitize_variable(dataset[key], key)
serialized_dataset[key] = sanitize_variable(dataset[key], key, coord=True)

serialized_dataset.to_netcdf(filepath, engine=netcdf_writer)

Expand Down
65 changes: 45 additions & 20 deletions cfspopcon/input_file_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,43 +22,68 @@ def read_case(
kwargs can be an arbitrary dictionary of key-value pairs that overwrite the config values.
"""
if kwargs is None:
kwargs = dict()
if Path(case).exists():
case = Path(case)
if case.is_dir():
input_file = case / "input.yaml"
else:
input_file = case
else:
case = Path(case)

if not case.exists():
raise FileNotFoundError(f"Could not find {case}.")

if case.is_dir():
case_dir = case
input_file = case_dir / "input.yaml"
else:
case_dir = case.parent
input_file = case

with open(input_file) as file:
repr_d: dict[str, Any] = yaml.load(file, Loader=yaml.FullLoader)
repr_d = yaml.load(file, Loader=yaml.FullLoader)

repr_d.update(kwargs)
if kwargs is not None:
repr_d.update(kwargs)

algorithms = repr_d.pop("algorithms")
algorithm_list = [Algorithm.get_algorithm(algorithm) for algorithm in algorithms]
return process_input_dictionary(repr_d, case_dir)


def process_input_dictionary(
repr_d: dict[str, Any], case_dir: Path
) -> tuple[dict[str, Any], Union[CompositeAlgorithm, Algorithm], dict[str, Any], dict[str, Path]]:
"""Convert an input dictionary into an processed dictionary, a CompositeAlgorithm and dictionaries defining points and plots.
Several processing steps are applied, including;
* The `algorithms` entry is converted into a `cfspopcon.CompositeAlgorithm`. This basically gives the list of operations that we want to perform on the input data.
* The `points` entry is stored in a separate dictionary. This gives a set of key-value pairs of 'optimal' points (for instance, giving the point with the maximum fusion power gain).
* The `grids` entry is converted into an `xr.DataArray` storing a `np.linspace` or `np.logspace` of values which we scan over. We usually scan over `average_electron_density` and `average_electron_temp`, but there's nothing preventing you from scanning over other numerical input variables or having more than 2 dimensions which you scan over (n.b. this can get expensive!).
* Each input variable is checked to see if its name matches one of the enumerators in `cfspopcon.named_options`. These are used to store switch values, such as `cfspopcon.named_options.ReactionType.DT` which indicates that we're interested in the DT fusion reaction.
* Each input variable is converted into its default units. Default units are retrieved via the `cfspopcon.unit_handling.default_unit` function. This will set, for instance, the `average_electron_temp` values to have units of `keV`.
Args:
repr_d: Dictionary to process
case_dir: Relative paths specified in repr_d are interpreted as relative to this directory
"""
algorithms = repr_d.pop("algorithms", dict())
algorithm_list: list[Union[Algorithm, CompositeAlgorithm]] = [Algorithm.get_algorithm(algorithm) for algorithm in algorithms]

# why doesn't mypy deduce the below without hint?
algorithm: Union[Algorithm, CompositeAlgorithm] = CompositeAlgorithm(algorithm_list) if len(algorithm_list) > 1 else algorithm_list[0]
if len(algorithm_list) > 1:
algorithm = CompositeAlgorithm(algorithm_list)
elif len(algorithm_list) == 1:
algorithm = algorithm_list[0] # type:ignore[assignment]
elif len(algorithm_list) == 0:
algorithm = Algorithm.empty() # type:ignore[assignment]

points = repr_d.pop("points", dict())
plots = repr_d.pop("plots", dict())

process_grid_values(repr_d)
process_named_options(repr_d)
process_paths(repr_d, input_file)
process_paths(plots, input_file)
process_paths(repr_d, case_dir)
process_paths(plots, case_dir)
process_units(repr_d)

return repr_d, algorithm, points, plots


def process_grid_values(repr_d: dict[str, Any]): # type:ignore[no-untyped-def]
"""Process the grid of values to run POPCON over."""
grid_values = repr_d.pop("grid")
grid_values = repr_d.pop("grid", dict())
for key, grid_spec in grid_values.items():
grid_spacing = grid_spec.get("spacing", "linear")

Expand All @@ -81,15 +106,15 @@ def process_named_options(repr_d: dict[str, Any]): # type:ignore[no-untyped-def
repr_d[key] = convert_named_options(key=key, val=val)


def process_paths(repr_d: dict[str, Any], input_file: Path): # type:ignore[no-untyped-def]
def process_paths(repr_d: dict[str, Any], case_dir: Path): # type:ignore[no-untyped-def]
"""Process path tags, up to a maximum of one tag per input variable.
Allowed tags are:
* CASE_DIR: the folder that the input.yaml file is located in
* WORKING_DIR: the current working directory that the script is being run from
"""
path_mappings = dict(
CASE_DIR=input_file.parent,
CASE_DIR=case_dir,
WORKING_DIR=Path("."),
)
if repr_d is None:
Expand Down
10 changes: 10 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,16 @@
r"https://doi.org/10.13182/FST43-67",
r"https://www.tandfonline.com/doi/full/10.13182/FST43-67",
r"https://www-internal.psfc.mit.edu/research/alcator/data/fst_cmod.pdf",
# these links in the time_independent_inductances_and_fluxes notebook are on private servers that are sometimes down
r"https://fire.pppl.gov/iaea06_ftp7_5_matsukawa.pdf",
r"https://escholarship.org/content/qt78k0v04v/qt78k0v04v_noSplash_c44c701847deffab65024dd9ceff9c59.pdf?t=p15pc5",
r"https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=585f5eb3f62f3bd76f3d667c1df357562f54c084",
r"https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=585f5eb3f62f3bd76f3d667c1df357562f54c084",
r"https://fire.pppl.gov/Snowmass_BP/FIRE.pdf",
r"https://www.ipp.mpg.de/16208/einfuehrung",
r"https://www.ipp.mpg.de/16701/jet",
r"https://iopscience.iop.org/article/10.1088/1009-0630/13/1/01",
r"https://www-internal.psfc.mit.edu/research/alcator/data/fst_cmod.pdf",
]
linkcheck_retries = 5
linkcheck_timeout = 120
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ classifiers = [

[tool.poetry.scripts]
popcon = 'cfspopcon.cli:run_popcon_cli'
cfspopcon = 'cfspopcon.cli:run_popcon_cli'
popcon_algorithms = 'cfspopcon.cli:write_algorithms_yaml'

[tool.poetry.dependencies]
Expand Down
Binary file modified tests/regression_results/AUG_SepOS_result.nc
Binary file not shown.
3 changes: 1 addition & 2 deletions tests/regression_results/PRD.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"Tungsten"
],
"dims": [
"dim_0"
"dim_species"
]
}
},
Expand Down Expand Up @@ -1333,7 +1333,6 @@
}
},
"dims": {
"dim_0": 5,
"dim_rho": 50,
"dim_species": 5
}
Expand Down
Binary file modified tests/regression_results/SPARC_PRD_result.nc
Binary file not shown.
10 changes: 10 additions & 0 deletions tests/test_algorithms_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,3 +270,13 @@ def test_get_algorithm():
for key in Algorithm.algorithms():
alg = Algorithm.get_algorithm(key)
assert alg._name in [f"run_{key}", key, "<lambda>"]


def test_blank_algorithm():
test_ds = xr.Dataset(data_vars=dict(a=xr.DataArray([1, 2, 3])))

algorithm = Algorithm.empty()

updated_ds = algorithm.update_dataset(test_ds)

xr.testing.assert_allclose(test_ds, updated_ds)
9 changes: 5 additions & 4 deletions tests/test_confinement_switch.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from cfspopcon.formulas.energy_confinement.switch_confinement_scaling_on_threshold import switch_to_L_mode_confinement_below_threshold
from cfspopcon.formulas.energy_confinement.solve_for_input_power import solve_energy_confinement_scaling_for_input_power
from cfspopcon.formulas.energy_confinement.read_energy_confinement_scalings import read_confinement_scalings
from cfspopcon.unit_handling import ureg, magnitude_in_units
import numpy as np

from cfspopcon.formulas.energy_confinement.read_energy_confinement_scalings import read_confinement_scalings
from cfspopcon.formulas.energy_confinement.solve_for_input_power import solve_energy_confinement_scaling_for_input_power
from cfspopcon.formulas.energy_confinement.switch_confinement_scaling_on_threshold import switch_to_L_mode_confinement_below_threshold
from cfspopcon.unit_handling import magnitude_in_units, ureg


def test_switch_to_L_mode_confinement_below_threshold():
kwargs = dict(
Expand Down
5 changes: 3 additions & 2 deletions tests/test_docs.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import pytest
import subprocess
import warnings

import pytest

pytest.importorskip("sphinx")
from importlib.resources import files


@pytest.mark.docs
def test_docs():
"Test the Sphinx documentation."
"""Test the Sphinx documentation."""
popcon_directory = files("cfspopcon")

doctest_output = subprocess.run(
Expand Down
5 changes: 3 additions & 2 deletions tests/test_for_anonymous_algorithms.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from cfspopcon.algorithm_class import Algorithm
import cfspopcon
from importlib import import_module

import cfspopcon
from cfspopcon.algorithm_class import Algorithm


def import_all_submodules(importable, module, prefix):
for module in module.__all__:
Expand Down
10 changes: 5 additions & 5 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
import numpy as np
import pytest
import xarray as xr
import numpy as np

from cfspopcon import named_options
from cfspopcon.helpers import (
convert_named_options,
)
from cfspopcon.named_options import AtomicSpecies
from cfspopcon.formulas.impurities.impurity_array_helpers import (
extend_impurity_concentration_array,
make_impurity_concentration_array,
make_impurity_concentration_array_from_kwargs,
)
from cfspopcon.helpers import (
convert_named_options,
)
from cfspopcon.named_options import AtomicSpecies


def test_convert_named_options():
Expand Down
4 changes: 2 additions & 2 deletions tests/test_infra/test_line_selection.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import numpy as np
import xarray as xr
from cfspopcon.unit_handling import ureg, Quantity, magnitude, convert_units

from cfspopcon.shaping_and_selection.line_selection import interpolate_onto_line, find_coords_of_contour
from cfspopcon.shaping_and_selection.line_selection import find_coords_of_contour, interpolate_onto_line
from cfspopcon.unit_handling import Quantity, convert_units, magnitude, ureg


def test_extract_values_along_contour():
Expand Down
57 changes: 57 additions & 0 deletions tests/test_input_file_handling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from pathlib import Path

import pytest
import yaml

from cfspopcon.algorithm_class import Algorithm, CompositeAlgorithm
from cfspopcon.input_file_handling import read_case, process_input_dictionary


@pytest.fixture
def test_dict():
return dict(Q=1.0)


@pytest.fixture
def case_dir():
return Path(".").absolute()


def test_blank_dictionary(test_dict, case_dir):
process_input_dictionary(test_dict, case_dir)


def test_blank_file(test_dict, tmp_path):
with open(tmp_path / "input.yaml", "w") as file:
yaml.dump(test_dict, file)

read_case(tmp_path)


def test_blank_file_with_another_suffix(test_dict, tmp_path):
with open(tmp_path / "another.filetype", "w") as file:
yaml.dump(test_dict, file)

read_case(tmp_path / "another.filetype")


def test_algorithm_read_single_from_input_file(case_dir):
test_dict = dict(algorithms=["read_atomic_data"])

repr_d, algorithm, points, plots = process_input_dictionary(test_dict, case_dir)

assert isinstance(algorithm, Algorithm)


def test_algorithm_read_multiple_from_input_file(case_dir):
test_dict = dict(algorithms=["read_atomic_data", "set_up_impurity_concentration_array"])

repr_d, algorithm, points, plots = process_input_dictionary(test_dict, case_dir)

assert isinstance(algorithm, CompositeAlgorithm)


def test_read_example_input_file():
example_case = Path(__file__).parents[1] / "example_cases" / "SPARC_PRD" / "input.yaml"

read_case(example_case)
Loading

0 comments on commit f4c3431

Please sign in to comment.