Input_file_handling_robustness (#107)

* Improve flexibility of input file handling * Split input dict processing from yaml read * Repair tests * Update cfspopcon/input_file_handling.py * Update cfspopcon/input_file_handling.py Co-authored-by: Christoph Hasse <[email protected]> --------- Co-authored-by: Christoph Hasse <[email protected]>
cfs-energy · Sep 19, 2024 · f4c3431 · f4c3431
1 parent 520f458
commit f4c3431
Show file tree

Hide file tree

Showing 21 changed files with 181 additions and 58 deletions.
diff --git a/.gitignore b/.gitignore
@@ -141,4 +141,7 @@ radas_dir/*
 popcon_algorithms.yaml
 
 # Have an untracked folder for rough working
-untracked/
+untracked/
+# Have a cases folder for personal cases which shouldn't be added
+# to the index
+cases/*
diff --git a/cfspopcon/__init__.py b/cfspopcon/__init__.py
@@ -8,7 +8,7 @@
 from . import file_io, formulas, named_options, shaping_and_selection
 from .algorithm_class import Algorithm, CompositeAlgorithm
 from .formulas.atomic_data import AtomicData
-from .input_file_handling import read_case
+from .input_file_handling import process_input_dictionary, read_case
 from .plotting import read_plot_style
 from .unit_handling import (
     convert_to_default_units,
@@ -23,6 +23,7 @@
     "named_options",
     "magnitude_in_default_units",
     "convert_to_default_units",
+    "process_input_dictionary",
     "set_default_units",
     "convert_units",
     "read_case",

diff --git a/cfspopcon/algorithm_class.py b/cfspopcon/algorithm_class.py
@@ -174,6 +174,16 @@ def function_wrapper(func: GenericFunctionType) -> GenericFunctionType:
 
         return function_wrapper
 
+    @classmethod
+    def empty(cls) -> Algorithm:
+        """Makes a 'do nothing' algorithm, in case you don't want to use the algorithm functionality."""
+
+        def do_nothing() -> dict[str, Any]:
+            result_dict: dict[str, Any] = {}
+            return result_dict
+
+        return cls(do_nothing, return_keys=[], name="empty", skip_registration=True)
+
     def validate_inputs(
         self, configuration: Union[dict, xr.Dataset], quiet: bool = False, raise_error_on_missing_inputs: bool = False
     ) -> bool:

diff --git a/cfspopcon/file_io.py b/cfspopcon/file_io.py
@@ -24,8 +24,13 @@
 ]
 
 
-def sanitize_variable(val: xr.DataArray, key: str) -> Union[xr.DataArray, str]:
-    """Strip units and Enum values from a variable so that it can be stored in a NetCDF file."""
+def sanitize_variable(val: xr.DataArray, key: str, coord: bool = False) -> Union[xr.DataArray, str]:
+    """Strip units and Enum values from a variable so that it can be stored in a NetCDF file.
+
+    If you set coord=True and you pass in a scalar val, val is wrapped in a length-1 array to
+    circumvent an xarray issue regarding single-value coordinates.
+    See https://github.com/pydata/xarray/issues/1709.
+    """
     try:
         val = convert_to_default_units(val, key).pint.dequantify()
     except KeyError:
@@ -34,9 +39,12 @@ def sanitize_variable(val: xr.DataArray, key: str) -> Union[xr.DataArray, str]:
     if val.dtype == object:
         try:
             if val.size == 1:
-                val = val.item().name
+                if not coord:
+                    val = val.item().name
+                else:
+                    val = xr.DataArray([val.item().name])
             else:
-                val = xr.DataArray([v.name for v in val.values])
+                val = xr.DataArray([v.name for v in val.values], dims=val.dims)
         except AttributeError:
             warnings.warn(f"Cannot handle {key}. Dropping variable.", stacklevel=3)
             return "UNHANDLED"
@@ -62,7 +70,7 @@ def write_dataset_to_netcdf(
 
     for key in serialized_dataset.coords:  # type:ignore[assignment]
         assert isinstance(key, str)
-        serialized_dataset[key] = sanitize_variable(dataset[key], key)
+        serialized_dataset[key] = sanitize_variable(dataset[key], key, coord=True)
 
     serialized_dataset.to_netcdf(filepath, engine=netcdf_writer)
 

diff --git a/cfspopcon/input_file_handling.py b/cfspopcon/input_file_handling.py
@@ -22,43 +22,68 @@ def read_case(
 
     kwargs can be an arbitrary dictionary of key-value pairs that overwrite the config values.
     """
-    if kwargs is None:
-        kwargs = dict()
-    if Path(case).exists():
-        case = Path(case)
-        if case.is_dir():
-            input_file = case / "input.yaml"
-        else:
-            input_file = case
-    else:
+    case = Path(case)
+
+    if not case.exists():
         raise FileNotFoundError(f"Could not find {case}.")
 
+    if case.is_dir():
+        case_dir = case
+        input_file = case_dir / "input.yaml"
+    else:
+        case_dir = case.parent
+        input_file = case
+
     with open(input_file) as file:
-        repr_d: dict[str, Any] = yaml.load(file, Loader=yaml.FullLoader)
+        repr_d = yaml.load(file, Loader=yaml.FullLoader)
 
-    repr_d.update(kwargs)
+    if kwargs is not None:
+        repr_d.update(kwargs)
 
-    algorithms = repr_d.pop("algorithms")
-    algorithm_list = [Algorithm.get_algorithm(algorithm) for algorithm in algorithms]
+    return process_input_dictionary(repr_d, case_dir)
+
+
+def process_input_dictionary(
+    repr_d: dict[str, Any], case_dir: Path
+) -> tuple[dict[str, Any], Union[CompositeAlgorithm, Algorithm], dict[str, Any], dict[str, Path]]:
+    """Convert an input dictionary into an processed dictionary, a CompositeAlgorithm and dictionaries defining points and plots.
+
+    Several processing steps are applied, including;
+        * The `algorithms` entry is converted into a `cfspopcon.CompositeAlgorithm`. This basically gives the list of operations that we want to perform on the input data.
+        * The `points` entry is stored in a separate dictionary. This gives a set of key-value pairs of 'optimal' points (for instance, giving the point with the maximum fusion power gain).
+        * The `grids` entry is converted into an `xr.DataArray` storing a `np.linspace` or `np.logspace` of values which we scan over. We usually scan over `average_electron_density` and `average_electron_temp`, but there's nothing preventing you from scanning over other numerical input variables or having more than 2 dimensions which you scan over (n.b. this can get expensive!).
+        * Each input variable is checked to see if its name matches one of the enumerators in `cfspopcon.named_options`. These are used to store switch values, such as `cfspopcon.named_options.ReactionType.DT` which indicates that we're interested in the DT fusion reaction.
+        * Each input variable is converted into its default units. Default units are retrieved via the `cfspopcon.unit_handling.default_unit` function. This will set, for instance, the `average_electron_temp` values to have units of `keV`.
+
+    Args:
+        repr_d: Dictionary to process
+        case_dir: Relative paths specified in repr_d are interpreted as relative to this directory
+    """
+    algorithms = repr_d.pop("algorithms", dict())
+    algorithm_list: list[Union[Algorithm, CompositeAlgorithm]] = [Algorithm.get_algorithm(algorithm) for algorithm in algorithms]
 
-    # why doesn't mypy deduce the below without hint?
-    algorithm: Union[Algorithm, CompositeAlgorithm] = CompositeAlgorithm(algorithm_list) if len(algorithm_list) > 1 else algorithm_list[0]
+    if len(algorithm_list) > 1:
+        algorithm = CompositeAlgorithm(algorithm_list)
+    elif len(algorithm_list) == 1:
+        algorithm = algorithm_list[0]  # type:ignore[assignment]
+    elif len(algorithm_list) == 0:
+        algorithm = Algorithm.empty()  # type:ignore[assignment]
 
     points = repr_d.pop("points", dict())
     plots = repr_d.pop("plots", dict())
 
     process_grid_values(repr_d)
     process_named_options(repr_d)
-    process_paths(repr_d, input_file)
-    process_paths(plots, input_file)
+    process_paths(repr_d, case_dir)
+    process_paths(plots, case_dir)
     process_units(repr_d)
 
     return repr_d, algorithm, points, plots
 
 
 def process_grid_values(repr_d: dict[str, Any]):  # type:ignore[no-untyped-def]
     """Process the grid of values to run POPCON over."""
-    grid_values = repr_d.pop("grid")
+    grid_values = repr_d.pop("grid", dict())
     for key, grid_spec in grid_values.items():
         grid_spacing = grid_spec.get("spacing", "linear")
 
@@ -81,15 +106,15 @@ def process_named_options(repr_d: dict[str, Any]):  # type:ignore[no-untyped-def
             repr_d[key] = convert_named_options(key=key, val=val)
 
 
-def process_paths(repr_d: dict[str, Any], input_file: Path):  # type:ignore[no-untyped-def]
+def process_paths(repr_d: dict[str, Any], case_dir: Path):  # type:ignore[no-untyped-def]
     """Process path tags, up to a maximum of one tag per input variable.
 
     Allowed tags are:
     * CASE_DIR: the folder that the input.yaml file is located in
     * WORKING_DIR: the current working directory that the script is being run from
     """
     path_mappings = dict(
-        CASE_DIR=input_file.parent,
+        CASE_DIR=case_dir,
         WORKING_DIR=Path("."),
     )
     if repr_d is None:

diff --git a/docs/conf.py b/docs/conf.py
@@ -41,6 +41,16 @@
     r"https://doi.org/10.13182/FST43-67",
     r"https://www.tandfonline.com/doi/full/10.13182/FST43-67",
     r"https://www-internal.psfc.mit.edu/research/alcator/data/fst_cmod.pdf",
+    # these links in the time_independent_inductances_and_fluxes notebook are on private servers that are sometimes down
+    r"https://fire.pppl.gov/iaea06_ftp7_5_matsukawa.pdf",
+    r"https://escholarship.org/content/qt78k0v04v/qt78k0v04v_noSplash_c44c701847deffab65024dd9ceff9c59.pdf?t=p15pc5",
+    r"https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=585f5eb3f62f3bd76f3d667c1df357562f54c084",
+    r"https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=585f5eb3f62f3bd76f3d667c1df357562f54c084",
+    r"https://fire.pppl.gov/Snowmass_BP/FIRE.pdf",
+    r"https://www.ipp.mpg.de/16208/einfuehrung",
+    r"https://www.ipp.mpg.de/16701/jet",
+    r"https://iopscience.iop.org/article/10.1088/1009-0630/13/1/01",
+    r"https://www-internal.psfc.mit.edu/research/alcator/data/fst_cmod.pdf",
 ]
 linkcheck_retries = 5
 linkcheck_timeout = 120

diff --git a/pyproject.toml b/pyproject.toml
@@ -19,6 +19,7 @@ classifiers = [
 
 [tool.poetry.scripts]
 popcon = 'cfspopcon.cli:run_popcon_cli'
+cfspopcon = 'cfspopcon.cli:run_popcon_cli'
 popcon_algorithms = 'cfspopcon.cli:write_algorithms_yaml'
 
 [tool.poetry.dependencies]

diff --git a/tests/regression_results/AUG_SepOS_result.nc b/tests/regression_results/AUG_SepOS_result.nc
diff --git a/tests/regression_results/PRD.json b/tests/regression_results/PRD.json
@@ -21,7 +21,7 @@
                 "Tungsten"
             ],
             "dims": [
-                "dim_0"
+                "dim_species"
             ]
         }
     },
@@ -1333,7 +1333,6 @@
         }
     },
     "dims": {
-        "dim_0": 5,
         "dim_rho": 50,
         "dim_species": 5
     }

diff --git a/tests/regression_results/SPARC_PRD_result.nc b/tests/regression_results/SPARC_PRD_result.nc
diff --git a/tests/test_algorithms_class.py b/tests/test_algorithms_class.py
@@ -270,3 +270,13 @@ def test_get_algorithm():
     for key in Algorithm.algorithms():
         alg = Algorithm.get_algorithm(key)
         assert alg._name in [f"run_{key}", key, "<lambda>"]
+
+
+def test_blank_algorithm():
+    test_ds = xr.Dataset(data_vars=dict(a=xr.DataArray([1, 2, 3])))
+
+    algorithm = Algorithm.empty()
+
+    updated_ds = algorithm.update_dataset(test_ds)
+
+    xr.testing.assert_allclose(test_ds, updated_ds)
diff --git a/tests/test_confinement_switch.py b/tests/test_confinement_switch.py
@@ -1,9 +1,10 @@
-from cfspopcon.formulas.energy_confinement.switch_confinement_scaling_on_threshold import switch_to_L_mode_confinement_below_threshold
-from cfspopcon.formulas.energy_confinement.solve_for_input_power import solve_energy_confinement_scaling_for_input_power
-from cfspopcon.formulas.energy_confinement.read_energy_confinement_scalings import read_confinement_scalings
-from cfspopcon.unit_handling import ureg, magnitude_in_units
 import numpy as np
 
+from cfspopcon.formulas.energy_confinement.read_energy_confinement_scalings import read_confinement_scalings
+from cfspopcon.formulas.energy_confinement.solve_for_input_power import solve_energy_confinement_scaling_for_input_power
+from cfspopcon.formulas.energy_confinement.switch_confinement_scaling_on_threshold import switch_to_L_mode_confinement_below_threshold
+from cfspopcon.unit_handling import magnitude_in_units, ureg
+
 
 def test_switch_to_L_mode_confinement_below_threshold():
     kwargs = dict(

diff --git a/tests/test_docs.py b/tests/test_docs.py
@@ -1,14 +1,15 @@
-import pytest
 import subprocess
 import warnings
 
+import pytest
+
 pytest.importorskip("sphinx")
 from importlib.resources import files
 
 
 @pytest.mark.docs
 def test_docs():
-    "Test the Sphinx documentation."
+    """Test the Sphinx documentation."""
     popcon_directory = files("cfspopcon")
 
     doctest_output = subprocess.run(

diff --git a/tests/test_for_anonymous_algorithms.py b/tests/test_for_anonymous_algorithms.py
@@ -1,7 +1,8 @@
-from cfspopcon.algorithm_class import Algorithm
-import cfspopcon
 from importlib import import_module
 
+import cfspopcon
+from cfspopcon.algorithm_class import Algorithm
+
 
 def import_all_submodules(importable, module, prefix):
     for module in module.__all__:

diff --git a/tests/test_helpers.py b/tests/test_helpers.py
@@ -1,17 +1,17 @@
+import numpy as np
 import pytest
 import xarray as xr
-import numpy as np
 
 from cfspopcon import named_options
-from cfspopcon.helpers import (
-    convert_named_options,
-)
-from cfspopcon.named_options import AtomicSpecies
 from cfspopcon.formulas.impurities.impurity_array_helpers import (
     extend_impurity_concentration_array,
     make_impurity_concentration_array,
     make_impurity_concentration_array_from_kwargs,
 )
+from cfspopcon.helpers import (
+    convert_named_options,
+)
+from cfspopcon.named_options import AtomicSpecies
 
 
 def test_convert_named_options():

diff --git a/tests/test_infra/test_line_selection.py b/tests/test_infra/test_line_selection.py
@@ -1,8 +1,8 @@
 import numpy as np
 import xarray as xr
-from cfspopcon.unit_handling import ureg, Quantity, magnitude, convert_units
 
-from cfspopcon.shaping_and_selection.line_selection import interpolate_onto_line, find_coords_of_contour
+from cfspopcon.shaping_and_selection.line_selection import find_coords_of_contour, interpolate_onto_line
+from cfspopcon.unit_handling import Quantity, convert_units, magnitude, ureg
 
 
 def test_extract_values_along_contour():

diff --git a/tests/test_input_file_handling.py b/tests/test_input_file_handling.py
@@ -0,0 +1,57 @@
+from pathlib import Path
+
+import pytest
+import yaml
+
+from cfspopcon.algorithm_class import Algorithm, CompositeAlgorithm
+from cfspopcon.input_file_handling import read_case, process_input_dictionary
+
+
+@pytest.fixture
+def test_dict():
+    return dict(Q=1.0)
+
+
+@pytest.fixture
+def case_dir():
+    return Path(".").absolute()
+
+
+def test_blank_dictionary(test_dict, case_dir):
+    process_input_dictionary(test_dict, case_dir)
+
+
+def test_blank_file(test_dict, tmp_path):
+    with open(tmp_path / "input.yaml", "w") as file:
+        yaml.dump(test_dict, file)
+
+    read_case(tmp_path)
+
+
+def test_blank_file_with_another_suffix(test_dict, tmp_path):
+    with open(tmp_path / "another.filetype", "w") as file:
+        yaml.dump(test_dict, file)
+
+    read_case(tmp_path / "another.filetype")
+
+
+def test_algorithm_read_single_from_input_file(case_dir):
+    test_dict = dict(algorithms=["read_atomic_data"])
+
+    repr_d, algorithm, points, plots = process_input_dictionary(test_dict, case_dir)
+
+    assert isinstance(algorithm, Algorithm)
+
+
+def test_algorithm_read_multiple_from_input_file(case_dir):
+    test_dict = dict(algorithms=["read_atomic_data", "set_up_impurity_concentration_array"])
+
+    repr_d, algorithm, points, plots = process_input_dictionary(test_dict, case_dir)
+
+    assert isinstance(algorithm, CompositeAlgorithm)
+
+
+def test_read_example_input_file():
+    example_case = Path(__file__).parents[1] / "example_cases" / "SPARC_PRD" / "input.yaml"
+
+    read_case(example_case)