Skip to content

Commit

Permalink
Allow quick exclusion from run_methods (#368)
Browse files Browse the repository at this point in the history
* Add more descriptive name specification with ~ as prefix

* Remove run_tags; Update behavior of run_methods and run_columns

* Remove tags from new simple example

* Fix & clean should_run logic

* Improve test

* Fix default example

* Change to inline comments to be consistent with get_shot_data

* Shorten inline comments

* Remove inline comments

* revamp comments in defaults.py

---------

Co-authored-by: Gregorio L. Trevisan <[email protected]>
Co-authored-by: Amos Decker <[email protected]>
Co-authored-by: yumouwei <[email protected]>
  • Loading branch information
4 people authored Jan 23, 2025
1 parent b16e3dc commit ce0f27d
Show file tree
Hide file tree
Showing 16 changed files with 123 additions and 80 deletions.
8 changes: 1 addition & 7 deletions disruption_py/core/physics_method/decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
def physics_method(
cache: bool = True,
tokamak: Union[Tokamak, List[Tokamak]] = None,
tags: List[str] = None,
columns: Union[List[str], Callable] = None,
) -> Callable:
"""
Expand All @@ -23,7 +22,7 @@ def physics_method(
The decorated method calculates disruption parameters and returns a Pandas
DataFrame. All decorated methods must take the single argument params of type
`PhysicsMethodParams`. The decorated method will be run if designated by the
`run_methods`, `run_tags`, or `run_columns` attributes of the `RetrievalSettings`
`run_methods` or `run_columns` attributes of the `RetrievalSettings`
class, and if included inside of the `custom_physics_methods` argument of the
`retrieval_settings` or in the built-in method list. If run, the result of the
decorated method will be output to the `output_setting`.
Expand All @@ -38,10 +37,6 @@ def physics_method(
tokamak : Union['Tokamak', List['Tokamak']], optional
A list of Tokamak objects that represent which tokamak this method may
be used for, by default None, allows the method to be run for any tokamak.
tags : List[str], optional
The list of tags to help identify this method. Tags can be used when calling
`get_shots_data` to have disruption_py use multiple functions at the same
time. Default is ["all"].
columns : Union[List[str], Callable], optional
The columns that are in the DataFrame returned by the method. Alternatively,
you can pass a method that returns the names of used trees at runtime. Default
Expand All @@ -64,7 +59,6 @@ def outer_wrapper(method: Callable) -> Callable:
cache=cache,
tokamaks=tokamak,
columns=columns,
tags=tags,
)

wrapper.method_metadata = method_metadata
Expand Down
2 changes: 0 additions & 2 deletions disruption_py/core/physics_method/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,13 @@ class MethodMetadata:
cache: bool
tokamaks: Union[Tokamak, List[Tokamak]]
columns: Union[List[str], Callable]
tags: List[str]

ALLOWED_UNRESOLVED = [
"columns",
"tokamaks",
]

def __post_init__(self):
object.__setattr__(self, "tags", self.tags or ["all"])
object.__setattr__(self, "columns", self.columns or [])


Expand Down
45 changes: 31 additions & 14 deletions disruption_py/core/physics_method/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def filter_methods_to_run(
all_bound_method_metadata : list[BoundMethodMetadata]
A list of bound method metadata instances.
retrieval_settings : RetrievalSettings
The settings that dictate which methods should be run based on tags, methods,
The settings that dictate which methods should be run based on methods
and columns.
physics_method_params : PhysicsMethodParams
The parameters that will be passed to the methods that are run.
Expand All @@ -154,10 +154,12 @@ def filter_methods_to_run(
list
A list of bound method metadata instances that are eligible to run.
"""
tags = retrieval_settings.run_tags
methods = retrieval_settings.run_methods
columns = REQUIRED_COLS.union(retrieval_settings.run_columns)

if retrieval_settings.run_columns is not None:
columns = REQUIRED_COLS.union(retrieval_settings.run_columns)
else:
columns = None
only_excluded_methods_specified = all("~" in method for method in methods or [])
methods_to_run = []
for bound_method_metadata in all_bound_method_metadata:
# exclude if tokamak does not match
Expand All @@ -171,15 +173,27 @@ def filter_methods_to_run(
):
continue

if tags is not None and bool(
set(bound_method_metadata.tags).intersection(tags)
):
methods_to_run.append(bound_method_metadata)
elif methods is not None and bound_method_metadata.name in methods:
methods_to_run.append(bound_method_metadata)
elif columns is not None and bool(
both_none = methods is None and columns is None
method_specified = methods is not None and bound_method_metadata.name in methods
column_speficied = columns is not None and bool(
set(bound_method_metadata.columns).intersection(columns)
):
)
is_not_excluded = (
only_excluded_methods_specified
and not columns
and methods
and (("~" + bound_method_metadata.name) not in methods)
)
should_run = (
both_none or method_specified or column_speficied or is_not_excluded
)

# reasons that methods should be exluded from should run
should_not_run = (
methods is not None and ("~" + bound_method_metadata.name) in methods
)

if should_run and not should_not_run:
methods_to_run.append(bound_method_metadata)
else:
physics_method_params.logger.debug(
Expand Down Expand Up @@ -256,7 +270,7 @@ def populate_shot(
This function executes the physics methods included through the
`custom_physics_methods` property of retrieval_settings or in the built-in list
of methods. It selects methods based on run_methods, run_tags, and run_columns
of methods. It selects methods based on run_methods and run_columns
in retrieval_settings.
Parameters
Expand Down Expand Up @@ -375,7 +389,10 @@ def populate_shot(

local_data = pd.concat([pre_filled_shot_data] + filtered_methods, axis=1)
local_data = local_data.loc[:, ~local_data.columns.duplicated()]
if retrieval_settings.only_requested_columns:
if (
retrieval_settings.only_requested_columns
and retrieval_settings.run_columns is not None
):
include_columns = list(
REQUIRED_COLS.union(
set(retrieval_settings.run_columns).intersection(
Expand Down
41 changes: 15 additions & 26 deletions disruption_py/settings/retrieval_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,6 @@
)


def default_tags():
"""Return the default tag 'all'."""
return ["all"]


class InterpolationMethod(Enum):
"""Enum for specifying interpolation methods."""

Expand Down Expand Up @@ -56,29 +51,23 @@ class RetrievalSettings:
efit_nickname_setting : NicknameSetting, optional
Nickname setting for retrieving efit tree data (default is "disruption").
run_methods : list of str, optional
List of physics methods to run (default is an empty list). Named methods
will be run when retrieving data from MDSplus for the shot. Named methods
must have the physics_method decorator and either be passed in the
`custom_physics_methods` argument or included in the built-in list. Defaults
to an empty list.
run_tags : list of str, optional
List of method tags to run (default is ["all"]). Methods used for retrieving
data from MDSplus can be tagged with the physics_method decorator and can
either be passed in the `custom_physics_methods` argument or included in
the built-in list. All methods with at least one included tag will be run.
List of physics methods to run (default is None). If None, and run_columns
is None, all methods will be run. Named methods will be run when retrieving
data from MDSplus for the shot. Named methods must have the physics_method
decorator and either be passed in the `custom_physics_methods` argument
or included in the built-in method holders.
run_columns : list of str, optional
List of columns to retrieve (default is an empty list). All methods with
the physics_method decorator referenced as containing an included column
will be run and all columns returned by those methods will be used. Methods
can either be passed in the `custom_physics_methods` argument or included
in the built-in list. If you wish to only return the requested columns,
set only_requested_columns to true in the retrieval_settings.
List of columns to retrieve (default is None). If None, and run_methods is
None, all methods will be run. If specified, all methods with the physics_method
decorator referencing the specified column will be run and all columns returned
by those methods will be used. If you wish to only return the requested columns,
set only_requested_columns to True in the retrieval_settings.
only_requested_columns : bool, optional
Whether to only include requested columns in the result (default is False).
custom_physics_methods : list, optional
List of custom physics methods (default is an empty list). The Methods are
collected and run when retrieving data from MDSplus if the method is included
through either the run_methods, run_tags, run_columns setting.
through either the run_methods or run_columns setting.
time_setting : TimeSetting, optional
Time setting for the shot (default is "disruption_warning"). The retrieved
data will be interpolated to this timebase. Can pass any `TimeSettingType`
Expand All @@ -100,9 +89,8 @@ class RetrievalSettings:
efit_nickname_setting: NicknameSetting = "disruption"

# Shot run settings
run_methods: List[str] = field(default_factory=list)
run_tags: List[str] = field(default_factory=default_tags)
run_columns: List[str] = field(default_factory=list)
run_methods: List[str] | None = None
run_columns: List[str] | None = None
only_requested_columns: bool = False
custom_physics_methods: list = field(default_factory=list)

Expand Down Expand Up @@ -170,4 +158,5 @@ def resolve(self):
self.time_setting, CacheTimeSetting
):
self.time_setting = CacheTimeSetting(self.time_setting)
self.run_columns = [col.lower() for col in self.run_columns]
if self.run_columns is not None:
self.run_columns = [col.lower() for col in self.run_columns]
3 changes: 1 addition & 2 deletions docs/quickstart/usage_quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,10 @@ For a simple way to get started, check out [`simple.py`](https://github.com/MIT-
```python
from disruption_py.settings.retrieval_settings import RetrievalSettings

# Run all available methods by default
retrieval_settings = RetrievalSettings(
# Use the efit timebase when returning data
time_setting="efit",
# Run all available methods
run_tags=["all"],
)
```

Expand Down
4 changes: 2 additions & 2 deletions docs/usage/physics_methods/physics_method_reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,10 @@ For a physics method to be run after calling [`get_shots_data`][disruption_py.wo

2. The method must have the `physics_method` decorator with its `tokamak` parameter either not set or set to the tokamak that you are retrieving data from.

3. The method is included to run via either the `run_methods`, `run_tags`, or `run_columns` parameters of the shot settings.
3. The method is included to run via either the `run_methods` or `run_columns` parameters of the shot settings.
- To be included via `run_methods`, the method name must be listed inside of `run_methods`
- To be included via `run_tags`, the method must have a tag listed in the `tags` parameter of the `physics_method` decorator that is included in `run_tags`
- To be included via `run_columns`, the method must have a column list in the `columns` parameter of the `physics_method` decorator that is included in `run_columns`
- If neither `run_methods` nor `run_columns` is specified, all built-in methods will be run


Once all designated methods have been collected, DisruptionPy optimizes their execution order to minimize resource usage by using the information supplied in the `physics_method` decorator. Once reordering is complete, the methods are run.
Expand Down
2 changes: 0 additions & 2 deletions drafts/machine/cmod/physics.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ def get_edge_parameters(times, p_Te, p_ne, edge_rho_min=0.85, edge_rho_max=0.95)

@staticmethod
@physics_method(
tags=["experimental"],
columns=["te_edge", "ne_edge"],
tokamak=Tokamak.CMOD,
)
Expand Down Expand Up @@ -205,7 +204,6 @@ def get_H98():
# TODO: Finish
@staticmethod
@physics_method(
tags=["experimental"],
columns=["h98", "wmhd", "btor", "dwmhd_dt", "p_input"],
tokamak=Tokamak.CMOD,
)
Expand Down
1 change: 0 additions & 1 deletion drafts/machine/d3d/physics.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ class D3DDraftPhysicsMethods:

@staticmethod
@physics_method(
tags=["experimental"],
tokamak=Tokamak.D3D,
columns=[
"te_core",
Expand Down
1 change: 0 additions & 1 deletion drafts/plots/peaking_factors_ece.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
retrieval_settings = RetrievalSettings(
time_setting="disruption_warning", # use the set efit's timebase
efit_nickname_setting="efit18", # set the efit
run_tags=[],
run_methods=["_get_te_profile_params_ece"],
run_columns=signals,
only_requested_columns=True,
Expand Down
27 changes: 16 additions & 11 deletions examples/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,36 +8,41 @@
from disruption_py.workflow import get_shots_data

retrieval_settings = RetrievalSettings(
# data settings
cache_setting=None,
efit_nickname_setting="disruption",
# method selection
run_methods=[],
run_tags=["all"],
run_columns=[],
# method/column selection
# default None: all methods/columns
run_methods=None,
run_columns=None,
only_requested_columns=False,
custom_physics_methods=[],
# timebase settings
time_setting="disruption_warning",
domain_setting="full",
use_cache_setting_timebase=False,
# not yet implemented
interpolation_method="linear",
)

shot_data = get_shots_data(
shotlist_setting=[], # required
tokamak=None, # defaults to detect from environment
database_initializer=None, # defaults to SQL connection for tokamak
mds_connection_initializer=None, # defaults to MDSplus server string for tokamak
# required argument
shotlist_setting=[],
# default None: detect from environment
tokamak=None,
# default None: standard SQL/MDSplus connection
database_initializer=None,
mds_connection_initializer=None,
retrieval_settings=retrieval_settings,
output_setting="dataframe",
num_processes=1,
log_settings=LogSettings(
log_file_path=None, # defaults to "output.log" in tmp folder for the session
# default None: "output.log" in temporary session folder
log_file_path=None,
file_log_level="DEBUG",
log_file_write_mode="w",
log_to_console=True,
console_log_level=None, # defaults to VERBOSE but varies based on number of shots
# default None: VERBOSE, or higher based on number of shots
console_log_level=None,
use_custom_logging=False,
),
)
1 change: 0 additions & 1 deletion examples/efit.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def main():
print(f"Initialized for tokamak: {tokamak.value}")

retrieval_settings = RetrievalSettings(
run_tags=[],
run_methods=run_methods,
efit_nickname_setting="default",
)
Expand Down
4 changes: 2 additions & 2 deletions examples/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ def main(tokamak, methods, shots, processes, log_level):
tokamak = resolve_tokamak_from_environment()
if not shots:
shots, *_ = get_tokamak_test_shotlist(tokamak)
tags = [] if methods else ["all"]
methods = methods or None

out = get_shots_data(
tokamak=tokamak,
shotlist_setting=shots,
retrieval_settings=RetrievalSettings(run_methods=methods, run_tags=tags),
retrieval_settings=RetrievalSettings(run_methods=methods),
num_processes=processes,
log_settings=log_level,
output_setting="dataframe",
Expand Down
1 change: 0 additions & 1 deletion tests/test_decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def my_physics_method(params: PhysicsMethodParams):
return {col_name: np.ones(shape=len(params.times))}

retrieval_settings = RetrievalSettings(
run_tags=[],
run_columns=[col_name],
only_requested_columns=True,
custom_physics_methods=[my_physics_method],
Expand Down
2 changes: 0 additions & 2 deletions tests/test_output_setting.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ def initial_mdsplus_data_fixture(shotlist, tokamak, test_file_path_f) -> Dict:
retrieval_settings = RetrievalSettings(
efit_nickname_setting="disruption",
run_columns=FIRST_ITERATION_COLUMNS,
run_tags=[],
only_requested_columns=True,
)
all_outputs = get_shots_data(
Expand Down Expand Up @@ -121,7 +120,6 @@ def test_sql_output_setting(
retrieval_settings = RetrievalSettings(
efit_nickname_setting="disruption",
run_columns=ALL_ITERATION_COLUMNS,
run_tags=[],
only_requested_columns=True,
)
shot_data = get_shots_data(
Expand Down
Loading

0 comments on commit ce0f27d

Please sign in to comment.