ray-project · krfricke · May 16, 2023 · May 8, 2023 · May 10, 2023 · May 10, 2023
@@ -53,6 +53,7 @@ parts:
               - file: ray-air/computer-vision
               - file: ray-air/examples/serving_guide
               - file: ray-air/deployment
+              - file: ray-air/experimental-features
           - file: ray-air/examples/index
             sections:
               - file: ray-air/examples/opt_deepspeed_batch_inference

@@ -0,0 +1,133 @@
+.. _air-experimental-features:
+
+================================
+Experimental features in Ray AIR
+================================
+
+The Ray Team is testing a number of experimental features in Ray AIR.
+
+During development, the features
+are disabled per default. You can opt-in by setting a
+feature-specific environment variable.
+
+After some time, the Ray Team enables the feature by default to gather
+more feedback from the community. In that case, you can still
+disable the feature using the same environment variable to
+fully revert to the old behavior.
+
+If you run into issues with experimental features,
+`open an issue <https://github.com/ray-project/ray/issues/>`_
+on GitHub. The Ray Team considers feedback before removing
+the old implementation and making the new implementation the
+default.
+
+.. note::
+
+    Experimental features can undergo frequent changes,
+    especially on the master branch and the nightly wheels.
+
+.. _air-experimental-new-output:
+
+Context-aware progress reporting
+--------------------------------
+
+.. note::
+
+    This feature is *disabled by default* in Ray 2.5.
+
+    To enable, set the environment variable ``RAY_AIR_NEW_OUTPUT=1``.
+
+A context-aware output engine is available for Ray Train and Ray Tune runs.
+
+This output engine affects how the training progress
+is printed in the console. The output changes depending on the execution
+context: Ray Tune runs will be displayed differently to Ray Train runs.
+
+The features include:
+
+- Ray Train runs report status relevant to the single training run.
+  It does not use the default Ray Tune table layout from previous versions.
+- The table format has been updated.
+- The format of reporting configurations and observed metrics is different from pervious versions.
+- Significant reduction in the default metrics displayed in the console output for runs (e.g., RLlib runs).
+- Decluttered the output to improve readability.
+
+
+This output feature only works for the regular console.
+It is automatically disabled when you use Jupyter Notebooks
+or Ray client.
+
+
+.. _air-experimental-rich:
+
+Rich layout (sticky status)
+---------------------------
+
+.. note::
+
+    This feature is *disabled by default*.
+
+    To enable, set the environment variable ``RAY_AIR_RICH_LAYOUT=1``.
+
+The :ref:`context-aware output engine <air-experimental-new-output>`
+exposes an advanced layout using the
+`rich <https://github.com/Textualize/rich>`_ library.
+
+The *rich* layout provides a sticky
+status table: The regular console logs are still printed
+as before, but the trial overview table (in Ray Tune) is stuck to the bottom of the
+screen and periodically updated.
+
+This feature is still in development. You can opt-in to try
+it out.
+
+To opt-in, set the ``RAY_AIR_RICH_LAYOUT=1`` environment variable
+and install rich (``pip install rich``).
+
+.. figure:: images/rich-sticky-status.png
+
+
+.. _air-experimental-execution:
+
+Event-based trial execution engine
+----------------------------------
+
+.. note::
+
+    This feature is *enabled by default* starting Ray 2.5.
+
+    To disable, set the environment variable ``TUNE_NEW_EXECUTION=0``.
+
+
+Ray Tune has an updated trial execution engine.
+Since Ray Tune is also the execution backend for
+Ray Train, the updated engine affects both tuning and training runs.
+
+The update is a refactor of the :ref:`TrialRunner <trialrunner-docstring>`
+which uses a generic Ray actor and future manager instead of
+the previous ``RayTrialExecutor``. This manager exposes an
+interface to react to scheduling and task execution events, which makes
+it easier to maintain and develop.
+
+This is a drop-in replacement of an internal class, and you shouldn't see
+any change to the previous behavior.
+
+However, if you notice any odd behavior, you can opt out of
+the event-based execution engine and see if it resolves your problem.
+
+In that case, please `open an issue <https://github.com/ray-project/ray/issues/>`_
+on GitHub, ideally with a reproducible script.
+
+Things to look out for:
+
+- Less trials are running in parallel than before
+- It takes longer to start new trials (or goes much faster)
+- The tuning run finishes, but the script does not exit
+- The end-to-end runtime is much slower than before
+- The CPU load on the head node is high,
+  even though the training jobs don't
+  require many resources or don't run on the head node
+- Any exceptions are raised that indicate an error in starting or
+  stopping trials or the experiment
+
+Note that some edge cases may not be captured in the regression tests. Your feedback is welcome.
@@ -110,6 +110,11 @@ Please also see the :ref:`Ray Tune environment variables <tune-env-vars>`.
 - **RAY_AIR_FULL_TRACEBACKS**: If set to 1, will print full tracebacks for training functions,
   including internal code paths. Otherwise, abbreviated tracebacks that only show user code
   are printed. Defaults to 0 (disabled).
+- **RAY_AIR_NEW_OUTPUT**: If set to 0, this disables
+  the :ref:`experimental new console output <air-experimental-new-output>`.
+- **RAY_AIR_RICH_LAYOUT**: If set to 1, this enables
+  the :ref:`stick table layout <air-experimental-rich>`
+  (only available for Ray Tune).
 
 .. _air-multi-tenancy:
 
@@ -125,3 +130,20 @@ If you still want to do this, refer to
 the
 :ref:`Ray Tune multi-tenancy docs <tune-multi-tenancy>`
 for potential pitfalls.
+
+.. _air-experimental-overview:
+
+Experimental features in Ray 2.5+
+---------------------------------
+Starting in Ray 2.5, some experimental
+features are enabled by default.
+
+Experimental features are enabled to allow for feedback
+from users. Every experimental feature can be disabled
+by setting an environment variable. Some features are
+not ready for general testing and can only be *enabled* using an
+environment variable.
+
+Please see the :ref:`experimental features <air-experimental-features>`
+page for more details on the current features and how to enable
+or disable them.
@@ -21,6 +21,7 @@ These are the environment variables Ray Tune currently considers:
 * **TUNE_DISABLE_DATED_SUBDIR**: Ray Tune automatically adds a date string to experiment
   directories when the name is not specified explicitly or the trainable isn't passed
   as a string. Setting this environment variable to ``1`` disables adding these date strings.
+* **TUNE_NEW_EXECUTION**: Disable :ref:`Ray Tune's new execution engine <air-experimental-execution>`.
 * **TUNE_DISABLE_STRICT_METRIC_CHECKING**: When you report metrics to Tune via
   ``session.report()`` and passed a ``metric`` parameter to ``Tuner()``, a scheduler,
   or a search algorithm, Tune will error

diff --git a/python/ray/air/config.py b/python/ray/air/config.py
@@ -31,6 +31,7 @@
     from ray.tune.search.sample import Domain
     from ray.tune.stopper import Stopper
     from ray.tune.syncer import SyncConfig
+    from ray.tune.experimental.output import AirVerbosity
     from ray.tune.utils.log import Verbosity
     from ray.tune.execution.placement_groups import PlacementGroupFactory
 
@@ -726,9 +727,12 @@ class RunConfig:
             intermediate experiment progress. Defaults to CLIReporter if
             running in command-line, or JupyterNotebookReporter if running in
             a Jupyter notebook.
-        verbose: 0, 1, 2, or 3. Verbosity mode.
+        verbose: 0, 1, or 2. Verbosity mode.
+            0 = silent, 1 = default, 2 = verbose. Defaults to 1.
+            If the ``RAY_AIR_NEW_OUTPUT=0`` environment variable is set,
+            uses the old verbosity settings:
             0 = silent, 1 = only status updates, 2 = status and brief
-            results, 3 = status and detailed results. Defaults to 2.
+            results, 3 = status and detailed results.
         log_to_file: Log stdout and stderr to files in
             trial directories. If this is `False` (default), no files
             are written. If `true`, outputs are written to `trialdir/stdout`
@@ -748,7 +752,7 @@ class RunConfig:
     sync_config: Optional["SyncConfig"] = None
     checkpoint_config: Optional[CheckpointConfig] = None
     progress_reporter: Optional["ProgressReporter"] = None
-    verbose: Union[int, "Verbosity"] = 3
+    verbose: Optional[Union[int, "AirVerbosity", "Verbosity"]] = None
     log_to_file: Union[bool, str, Tuple[str, str]] = False
 
     # Deprecated
@@ -757,6 +761,7 @@ class RunConfig:
     def __post_init__(self):
         from ray.tune.syncer import SyncConfig, Syncer
         from ray.tune.utils.util import _resolve_storage_path
+        from ray.tune.experimental.output import AirVerbosity, get_air_verbosity
 
         if not self.failure_config:
             self.failure_config = FailureConfig()
@@ -822,6 +827,13 @@ def __post_init__(self):
                 "Must specify a remote `storage_path` to use a custom `syncer`."
             )
 
+        if self.verbose is None:
+            # Default `verbose` value. For new output engine,
+            # this is AirVerbosity.DEFAULT.
+            # For old output engine, this is Verbosity.V3_TRIAL_DETAILS
+            # Todo (krfricke): Currently uses number to pass test_configs::test_repr
+            self.verbose = get_air_verbosity(AirVerbosity.DEFAULT) or 3
+
     def __repr__(self):
         from ray.tune.syncer import SyncConfig
 

diff --git a/python/ray/air/constants.py b/python/ray/air/constants.py
@@ -66,4 +66,7 @@
 AIR_ENV_VARS = {
     COPY_DIRECTORY_CHECKPOINTS_INSTEAD_OF_MOVING_ENV,
     DISABLE_LAZY_CHECKPOINTING_ENV,
+    "RAY_AIR_FULL_TRACEBACKS",
+    "RAY_AIR_NEW_OUTPUT",
+    "RAY_AIR_RICH_LAYOUT",
 }
@@ -5,11 +5,11 @@
 # NOTE: When adding a new environment variable, please track it in this list.
 TUNE_ENV_VARS = {
     "RAY_AIR_LOCAL_CACHE_DIR",
-    "RAY_AIR_FULL_TRACEBACKS",
     "TUNE_DISABLE_AUTO_CALLBACK_LOGGERS",
     "TUNE_DISABLE_AUTO_CALLBACK_SYNCER",
     "TUNE_DISABLE_AUTO_INIT",
     "TUNE_DISABLE_DATED_SUBDIR",
+    "TUNE_NEW_EXECUTION",
     "TUNE_DISABLE_STRICT_METRIC_CHECKING",
     "TUNE_DISABLE_SIGINT_HANDLER",
     "TUNE_FALLBACK_TO_LATEST_CHECKPOINT",

@@ -1,5 +1,15 @@
 import sys
-from typing import Any, Collection, Dict, Iterable, List, Optional, Tuple, TYPE_CHECKING
+from typing import (
+    Any,
+    Collection,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Tuple,
+    Union,
+    TYPE_CHECKING,
+)
 
 import contextlib
 import collections
@@ -15,6 +25,8 @@
 import textwrap
 import time
 
+from ray.tune.utils.log import Verbosity
+
 try:
     import rich
     import rich.layout
@@ -90,10 +102,21 @@ class AirVerbosity(IntEnum):
 IS_NOTEBOOK = ray.widgets.util.in_notebook()
 
 
-def get_air_verbosity() -> Optional[AirVerbosity]:
-    verbosity = os.environ.get("AIR_VERBOSITY", None)
-    if verbosity:
-        return AirVerbosity(int(verbosity)) if verbosity else None
+def get_air_verbosity(
+    verbose: Union[int, AirVerbosity, Verbosity]
+) -> Optional[AirVerbosity]:
+    if os.environ.get("RAY_AIR_NEW_OUTPUT", "0") == "0":
+        return None
+
+    if isinstance(verbose, AirVerbosity):
+        return verbose
+
+    verbose_int = verbose if isinstance(verbose, int) else verbose.value
+
+    # Verbosity 2 and 3 both map to AirVerbosity 2
+    verbose_int = min(2, verbose_int)
+
+    return AirVerbosity(verbose_int)
 
 
 def _get_time_str(start_time: float, current_time: float) -> Tuple[str, str]:
@@ -520,7 +543,7 @@ def _detect_reporter(
     mode: Optional[str] = None,
 ):
     # TODO: Add JupyterNotebook and Ray Client case later.
-    rich_enabled = "ENABLE_RICH" in os.environ
+    rich_enabled = bool(int(os.environ.get("RAY_AIR_RICH_LAYOUT", "0")))
     if num_samples and num_samples > 1:
         if rich_enabled:
             if not rich:
@@ -530,7 +553,7 @@ def _detect_reporter(
             reporter = TuneTerminalReporter(verbosity, num_samples, metric, mode)
     else:
         if rich_enabled:
-            logger.warning("`ENABLE_RICH` is only effective with Tune usecase.")
+            logger.warning("`RAY_AIR_RICH_LAYOUT` is only effective with Tune usecase.")
         reporter = TrainReporter(verbosity)
     return reporter