Allow quick exclusion from run_methods (#368)

* Add more descriptive name specification with ~ as prefix * Remove run_tags; Update behavior of run_methods and run_columns * Remove tags from new simple example * Fix & clean should_run logic * Improve test * Fix default example * Change to inline comments to be consistent with get_shot_data * Shorten inline comments * Remove inline comments * revamp comments in defaults.py --------- Co-authored-by: Gregorio L. Trevisan <[email protected]> Co-authored-by: Amos Decker <[email protected]> Co-authored-by: yumouwei <[email protected]>
MIT-PSFC · Jan 23, 2025 · ce0f27d · ce0f27d
1 parent b16e3dc
commit ce0f27d
Show file tree

Hide file tree

Showing 16 changed files with 123 additions and 80 deletions.
diff --git a/disruption_py/core/physics_method/decorator.py b/disruption_py/core/physics_method/decorator.py
@@ -14,7 +14,6 @@
 def physics_method(
     cache: bool = True,
     tokamak: Union[Tokamak, List[Tokamak]] = None,
-    tags: List[str] = None,
     columns: Union[List[str], Callable] = None,
 ) -> Callable:
     """
@@ -23,7 +22,7 @@ def physics_method(
     The decorated method calculates disruption parameters and returns a Pandas
     DataFrame. All decorated methods must take the single argument params of type
     `PhysicsMethodParams`. The decorated method will be run if designated by the
-    `run_methods`, `run_tags`, or `run_columns` attributes of the `RetrievalSettings`
+    `run_methods` or `run_columns` attributes of the `RetrievalSettings`
     class, and if included inside of the `custom_physics_methods` argument of the
     `retrieval_settings` or in the built-in method list. If run, the result of the
     decorated method will be output to the `output_setting`.
@@ -38,10 +37,6 @@ def physics_method(
     tokamak : Union['Tokamak', List['Tokamak']], optional
         A list of Tokamak objects that represent which tokamak this method may
         be used for, by default None, allows the method to be run for any tokamak.
-    tags : List[str], optional
-        The list of tags to help identify this method. Tags can be used when calling
-        `get_shots_data` to have disruption_py use multiple functions at the same
-        time. Default is ["all"].
     columns : Union[List[str], Callable], optional
         The columns that are in the DataFrame returned by the method. Alternatively,
         you can pass a method that returns the names of used trees at runtime. Default
@@ -64,7 +59,6 @@ def outer_wrapper(method: Callable) -> Callable:
             cache=cache,
             tokamaks=tokamak,
             columns=columns,
-            tags=tags,
         )
 
         wrapper.method_metadata = method_metadata

diff --git a/disruption_py/core/physics_method/metadata.py b/disruption_py/core/physics_method/metadata.py
@@ -22,15 +22,13 @@ class MethodMetadata:
     cache: bool
     tokamaks: Union[Tokamak, List[Tokamak]]
     columns: Union[List[str], Callable]
-    tags: List[str]
 
     ALLOWED_UNRESOLVED = [
         "columns",
         "tokamaks",
     ]
 
     def __post_init__(self):
-        object.__setattr__(self, "tags", self.tags or ["all"])
         object.__setattr__(self, "columns", self.columns or [])
 
 

diff --git a/disruption_py/core/physics_method/runner.py b/disruption_py/core/physics_method/runner.py
@@ -144,7 +144,7 @@ def filter_methods_to_run(
     all_bound_method_metadata : list[BoundMethodMetadata]
         A list of bound method metadata instances.
     retrieval_settings : RetrievalSettings
-        The settings that dictate which methods should be run based on tags, methods,
+        The settings that dictate which methods should be run based on methods
         and columns.
     physics_method_params : PhysicsMethodParams
         The parameters that will be passed to the methods that are run.
@@ -154,10 +154,12 @@ def filter_methods_to_run(
     list
         A list of bound method metadata instances that are eligible to run.
     """
-    tags = retrieval_settings.run_tags
     methods = retrieval_settings.run_methods
-    columns = REQUIRED_COLS.union(retrieval_settings.run_columns)
-
+    if retrieval_settings.run_columns is not None:
+        columns = REQUIRED_COLS.union(retrieval_settings.run_columns)
+    else:
+        columns = None
+    only_excluded_methods_specified = all("~" in method for method in methods or [])
     methods_to_run = []
     for bound_method_metadata in all_bound_method_metadata:
         # exclude if tokamak does not match
@@ -171,15 +173,27 @@ def filter_methods_to_run(
         ):
             continue
 
-        if tags is not None and bool(
-            set(bound_method_metadata.tags).intersection(tags)
-        ):
-            methods_to_run.append(bound_method_metadata)
-        elif methods is not None and bound_method_metadata.name in methods:
-            methods_to_run.append(bound_method_metadata)
-        elif columns is not None and bool(
+        both_none = methods is None and columns is None
+        method_specified = methods is not None and bound_method_metadata.name in methods
+        column_speficied = columns is not None and bool(
             set(bound_method_metadata.columns).intersection(columns)
-        ):
+        )
+        is_not_excluded = (
+            only_excluded_methods_specified
+            and not columns
+            and methods
+            and (("~" + bound_method_metadata.name) not in methods)
+        )
+        should_run = (
+            both_none or method_specified or column_speficied or is_not_excluded
+        )
+
+        # reasons that methods should be exluded from should run
+        should_not_run = (
+            methods is not None and ("~" + bound_method_metadata.name) in methods
+        )
+
+        if should_run and not should_not_run:
             methods_to_run.append(bound_method_metadata)
         else:
             physics_method_params.logger.debug(
@@ -256,7 +270,7 @@ def populate_shot(
 
     This function executes the physics methods included through the
     `custom_physics_methods` property of retrieval_settings or in the built-in list
-    of methods. It selects methods based on run_methods, run_tags, and run_columns
+    of methods. It selects methods based on run_methods and run_columns
     in retrieval_settings.
 
     Parameters
@@ -375,7 +389,10 @@ def populate_shot(
 
     local_data = pd.concat([pre_filled_shot_data] + filtered_methods, axis=1)
     local_data = local_data.loc[:, ~local_data.columns.duplicated()]
-    if retrieval_settings.only_requested_columns:
+    if (
+        retrieval_settings.only_requested_columns
+        and retrieval_settings.run_columns is not None
+    ):
         include_columns = list(
             REQUIRED_COLS.union(
                 set(retrieval_settings.run_columns).intersection(

diff --git a/disruption_py/settings/retrieval_settings.py b/disruption_py/settings/retrieval_settings.py
@@ -24,11 +24,6 @@
 )
 
 
-def default_tags():
-    """Return the default tag 'all'."""
-    return ["all"]
-
-
 class InterpolationMethod(Enum):
     """Enum for specifying interpolation methods."""
 
@@ -56,29 +51,23 @@ class RetrievalSettings:
     efit_nickname_setting : NicknameSetting, optional
         Nickname setting for retrieving efit tree data (default is "disruption").
     run_methods : list of str, optional
-        List of physics methods to run (default is an empty list). Named methods
-        will be run when retrieving data from  MDSplus for the shot. Named methods
-        must have the physics_method decorator and either be passed in the
-        `custom_physics_methods` argument or included in the built-in list. Defaults
-        to an empty list.
-    run_tags : list of str, optional
-        List of method tags to run (default is ["all"]). Methods used for retrieving
-        data from MDSplus can be tagged with the physics_method decorator and can
-        either be passed in the `custom_physics_methods` argument or included in
-        the built-in list. All methods with at least one included tag will be run.
+        List of physics methods to run (default is None). If None, and run_columns
+        is None, all methods will be run. Named methods will be run when retrieving
+        data from  MDSplus for the shot. Named methods must have the physics_method
+        decorator and either be passed in the `custom_physics_methods` argument
+        or included in the built-in method holders.
     run_columns : list of str, optional
-        List of columns to retrieve (default is an empty list). All methods with
-        the physics_method decorator referenced as containing an included column
-        will be run and all columns returned by those methods will be used. Methods
-        can either be passed in the `custom_physics_methods` argument or included
-        in the built-in list. If you wish to only return the requested columns,
-        set only_requested_columns to true in the retrieval_settings.
+        List of columns to retrieve (default is None). If None, and run_methods is
+        None, all methods will be run. If specified, all methods with the physics_method
+        decorator referencing the specified column will be run and all columns returned
+        by those methods will be used. If you wish to only return the requested columns,
+        set only_requested_columns to True in the retrieval_settings.
     only_requested_columns : bool, optional
         Whether to only include requested columns in the result (default is False).
     custom_physics_methods : list, optional
         List of custom physics methods (default is an empty list). The Methods are
         collected and run when retrieving data from MDSplus if the method is included
-        through either the run_methods, run_tags, run_columns setting.
+        through either the run_methods or run_columns setting.
     time_setting : TimeSetting, optional
         Time setting for the shot (default is "disruption_warning"). The retrieved
         data will be interpolated to this timebase. Can pass any `TimeSettingType`
@@ -100,9 +89,8 @@ class RetrievalSettings:
     efit_nickname_setting: NicknameSetting = "disruption"
 
     # Shot run settings
-    run_methods: List[str] = field(default_factory=list)
-    run_tags: List[str] = field(default_factory=default_tags)
-    run_columns: List[str] = field(default_factory=list)
+    run_methods: List[str] | None = None
+    run_columns: List[str] | None = None
     only_requested_columns: bool = False
     custom_physics_methods: list = field(default_factory=list)
 
@@ -170,4 +158,5 @@ def resolve(self):
             self.time_setting, CacheTimeSetting
         ):
             self.time_setting = CacheTimeSetting(self.time_setting)
-        self.run_columns = [col.lower() for col in self.run_columns]
+        if self.run_columns is not None:
+            self.run_columns = [col.lower() for col in self.run_columns]
diff --git a/docs/quickstart/usage_quickstart.md b/docs/quickstart/usage_quickstart.md
@@ -12,11 +12,10 @@ For a simple way to get started, check out [`simple.py`](https://github.com/MIT-
 	```python
 	from disruption_py.settings.retrieval_settings import RetrievalSettings
 
+	# Run all available methods by default
 	retrieval_settings = RetrievalSettings(
 		# Use the efit timebase when returning data
 		time_setting="efit",
-		# Run all available methods
-		run_tags=["all"],
 	)
 	```
 

diff --git a/docs/usage/physics_methods/physics_method_reference.md b/docs/usage/physics_methods/physics_method_reference.md
@@ -63,10 +63,10 @@ For a physics method to be run after calling [`get_shots_data`][disruption_py.wo
 
 2. The method must have the `physics_method` decorator with its `tokamak` parameter either not set or set to the tokamak that you are retrieving data from.
 
-3. The method is included to run via either the `run_methods`, `run_tags`, or `run_columns` parameters of the shot settings.
+3. The method is included to run via either the `run_methods` or `run_columns` parameters of the shot settings.
     - To be included via `run_methods`, the method name must be listed inside of `run_methods`
-	- To be included via `run_tags`, the method must have a tag listed in the `tags` parameter of the `physics_method` decorator that is included in `run_tags`
 	- To be included via `run_columns`, the method must have a column list in the `columns` parameter of the `physics_method` decorator that is included in `run_columns`
+	- If neither `run_methods` nor `run_columns` is specified, all built-in methods will be run
 
 
 Once all designated methods have been collected, DisruptionPy optimizes their execution order to minimize resource usage by using the information supplied in the `physics_method` decorator. Once reordering is complete, the methods are run.

diff --git a/drafts/machine/cmod/physics.py b/drafts/machine/cmod/physics.py
@@ -112,7 +112,6 @@ def get_edge_parameters(times, p_Te, p_ne, edge_rho_min=0.85, edge_rho_max=0.95)
 
     @staticmethod
     @physics_method(
-        tags=["experimental"],
         columns=["te_edge", "ne_edge"],
         tokamak=Tokamak.CMOD,
     )
@@ -205,7 +204,6 @@ def get_H98():
     # TODO: Finish
     @staticmethod
     @physics_method(
-        tags=["experimental"],
         columns=["h98", "wmhd", "btor", "dwmhd_dt", "p_input"],
         tokamak=Tokamak.CMOD,
     )

diff --git a/drafts/machine/d3d/physics.py b/drafts/machine/d3d/physics.py
@@ -13,7 +13,6 @@ class D3DDraftPhysicsMethods:
 
     @staticmethod
     @physics_method(
-        tags=["experimental"],
         tokamak=Tokamak.D3D,
         columns=[
             "te_core",

diff --git a/drafts/plots/peaking_factors_ece.py b/drafts/plots/peaking_factors_ece.py
@@ -30,7 +30,6 @@
 retrieval_settings = RetrievalSettings(
     time_setting="disruption_warning",  # use the set efit's timebase
     efit_nickname_setting="efit18",  # set the efit
-    run_tags=[],
     run_methods=["_get_te_profile_params_ece"],
     run_columns=signals,
     only_requested_columns=True,

diff --git a/examples/defaults.py b/examples/defaults.py
@@ -8,36 +8,41 @@
 from disruption_py.workflow import get_shots_data
 
 retrieval_settings = RetrievalSettings(
-    # data settings
     cache_setting=None,
     efit_nickname_setting="disruption",
-    # method selection
-    run_methods=[],
-    run_tags=["all"],
-    run_columns=[],
+    # method/column selection
+    # default None: all methods/columns
+    run_methods=None,
+    run_columns=None,
     only_requested_columns=False,
     custom_physics_methods=[],
     # timebase settings
     time_setting="disruption_warning",
     domain_setting="full",
     use_cache_setting_timebase=False,
+    # not yet implemented
     interpolation_method="linear",
 )
 
 shot_data = get_shots_data(
-    shotlist_setting=[],  # required
-    tokamak=None,  # defaults to detect from environment
-    database_initializer=None,  # defaults to SQL connection for tokamak
-    mds_connection_initializer=None,  # defaults to MDSplus server string for tokamak
+    # required argument
+    shotlist_setting=[],
+    # default None: detect from environment
+    tokamak=None,
+    # default None: standard SQL/MDSplus connection
+    database_initializer=None,
+    mds_connection_initializer=None,
     retrieval_settings=retrieval_settings,
     output_setting="dataframe",
     num_processes=1,
     log_settings=LogSettings(
-        log_file_path=None,  # defaults to "output.log" in tmp folder for the session
+        # default None: "output.log" in temporary session folder
+        log_file_path=None,
         file_log_level="DEBUG",
         log_file_write_mode="w",
         log_to_console=True,
-        console_log_level=None,  # defaults to VERBOSE but varies based on number of shots
+        # default None: VERBOSE, or higher based on number of shots
+        console_log_level=None,
         use_custom_logging=False,
     ),
 )
diff --git a/examples/efit.py b/examples/efit.py
@@ -29,7 +29,6 @@ def main():
     print(f"Initialized for tokamak: {tokamak.value}")
 
     retrieval_settings = RetrievalSettings(
-        run_tags=[],
         run_methods=run_methods,
         efit_nickname_setting="default",
     )

diff --git a/examples/simple.py b/examples/simple.py
@@ -25,12 +25,12 @@ def main(tokamak, methods, shots, processes, log_level):
         tokamak = resolve_tokamak_from_environment()
     if not shots:
         shots, *_ = get_tokamak_test_shotlist(tokamak)
-    tags = [] if methods else ["all"]
+    methods = methods or None
 
     out = get_shots_data(
         tokamak=tokamak,
         shotlist_setting=shots,
-        retrieval_settings=RetrievalSettings(run_methods=methods, run_tags=tags),
+        retrieval_settings=RetrievalSettings(run_methods=methods),
         num_processes=processes,
         log_settings=log_level,
         output_setting="dataframe",

diff --git a/tests/test_decorator.py b/tests/test_decorator.py
@@ -34,7 +34,6 @@ def my_physics_method(params: PhysicsMethodParams):
         return {col_name: np.ones(shape=len(params.times))}
 
     retrieval_settings = RetrievalSettings(
-        run_tags=[],
         run_columns=[col_name],
         only_requested_columns=True,
         custom_physics_methods=[my_physics_method],

diff --git a/tests/test_output_setting.py b/tests/test_output_setting.py
@@ -61,7 +61,6 @@ def initial_mdsplus_data_fixture(shotlist, tokamak, test_file_path_f) -> Dict:
     retrieval_settings = RetrievalSettings(
         efit_nickname_setting="disruption",
         run_columns=FIRST_ITERATION_COLUMNS,
-        run_tags=[],
         only_requested_columns=True,
     )
     all_outputs = get_shots_data(
@@ -121,7 +120,6 @@ def test_sql_output_setting(
     retrieval_settings = RetrievalSettings(
         efit_nickname_setting="disruption",
         run_columns=ALL_ITERATION_COLUMNS,
-        run_tags=[],
         only_requested_columns=True,
     )
     shot_data = get_shots_data(