Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

batch system -> job runner #3992

Merged
merged 18 commits into from
Dec 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions cylc/flow/cfgspec/globalcfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,11 +319,15 @@

with Conf('platforms'):
with Conf('<platform name>') as Platform:
Conf('batch system', VDR.V_STRING, 'background')
Conf('batch submit command template', VDR.V_STRING)
Conf('job runner', VDR.V_STRING, 'background', desc='''
The batch system/job submit method used to run jobs on the
platform, e.g., ``background``, ``at``, ``slurm``,
``loadleveler``...
''')
Conf('job runner command template', VDR.V_STRING)
Conf('shell', VDR.V_STRING, '/bin/bash')
Conf('run directory', VDR.V_STRING, '$HOME/cylc-run', desc='''
The number of old run directory trees to retain at start-up.
The directory in which to install workflows.
''')
Conf('work directory', VDR.V_STRING, '$HOME/cylc-run', desc='''
The top level for suite work and share directories. Can contain
Expand Down Expand Up @@ -386,7 +390,7 @@
Conf('execution time limit polling intervals',
VDR.V_INTERVAL_LIST, desc='''
The intervals between polling after a task job (submitted to
the relevant batch system on the relevant host) exceeds its
the relevant job runner on the relevant host) exceeds its
execution time limit. The default setting is PT1M, PT2M, PT7M.
The accumulated times (in minutes) for these intervals will be
roughly 1, 1 + 2 = 3 and 1 + 2 + 7 = 10 after a task job
Expand Down Expand Up @@ -525,7 +529,7 @@
qcat -o %(job_id)s
''')
Conf('job name length maximum', VDR.V_INTEGER, desc='''
The maximum length for job name acceptable by a batch system on
The maximum length for job name acceptable by a job runner on
a given host. Currently, this setting is only meaningful for
PBS jobs. For example, PBS 12 or older will fail a job submit
if the job name has more than 15 characters; whereas PBS 13
Expand Down
59 changes: 45 additions & 14 deletions cylc/flow/cfgspec/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from cylc.flow.parsec.validate import (
DurationFloat, CylcConfigValidator as VDR, cylc_config_validate)
from cylc.flow.platforms import get_platform
from cylc.flow.task_events_mgr import EventData

# Regex to check whether a string is a command
REC_COMMAND = re.compile(r'(`|\$\()\s*(.*)\s*([`)])$')
Expand Down Expand Up @@ -995,10 +996,11 @@
Try number
``%(id)s``
Task ID (i.e. %(name)s.%(point)s)
``%(batch_sys_name)s``
Batch system name
``%(batch_sys_job_id)``
Batch system job ID
``%(job_runner_name)s``
Job runner name (previously ``%(batch_sys_name)s``)
``%(job_id)s``
Job ID in the job runner
(previously ``%(batch_sys_job_id)s``)
``%(submit_time)s``
Date-time when task job is submitted
``%(start_time)s``
Expand Down Expand Up @@ -1213,10 +1215,10 @@

with Conf('directives', desc='''
Batch queue scheduler directives. Whether or not these are
used depends on the batch system. For the built-in methods
that support directives (``loadleveler``, ``lsf``, ``pbs``,
``sge``, ``slurm``, ``slurm_packjob``, ``moab``), directives
are written to the
used depends on the batch system/job runner. For the built-in
methods support directives (``loadleveler``, ``lsf``,
``pbs``, ``sge``, ``slurm``, ``slurm_packjob``, ``moab``),
directives written to the
top of the task job script in the correct format for the
method. Specifying directives individually like this allows
use of default directives that can be individually overridden
Expand All @@ -1225,7 +1227,7 @@
Conf('<directive>', VDR.V_STRING, desc='''
e.g. ``class = parallel``.

Example directives for the built-in batch system handlers
Example directives for the built-in job runner handlers
are shown in :ref:`AvailableMethods`.
''')

Expand Down Expand Up @@ -1382,11 +1384,8 @@ def upg(cfg, descr):
upgrade_graph_section(cfg, descr)
upgrade_param_env_templates(cfg, descr)

if 'runtime' in cfg:
for task_name, task_cfg in cfg['runtime'].items():
platform = get_platform(task_cfg, task_name, warn_only=True)
if type(platform) == str:
LOG.warning(platform)
warn_about_depr_platform(cfg)
warn_about_depr_event_handler_tmpl(cfg)


def upgrade_graph_section(cfg, descr):
Expand Down Expand Up @@ -1460,6 +1459,38 @@ def upgrade_param_env_templates(cfg, descr):
task_items.pop('parameter environment templates')


def warn_about_depr_platform(cfg):
"""Warn if deprecated host or batch system appear in config."""
if 'runtime' in cfg:
for task_name, task_cfg in cfg['runtime'].items():
platform = get_platform(task_cfg, task_name, warn_only=True)
if type(platform) == str:
LOG.warning(platform)


def warn_about_depr_event_handler_tmpl(cfg):
"""Warn if deprecated template strings appear in event handlers."""
if 'runtime' not in cfg:
return
deprecation_msg = (
'The event handler template variable "%({0})s" is deprecated - '
'use "%({1})s" instead.')
for task in cfg['runtime']:
if 'events' not in cfg['runtime'][task]:
continue
for event, handler in cfg['runtime'][task]['events'].items():
if f'%({EventData.JobID_old.value})' in handler:
LOG.warning(
deprecation_msg.format(EventData.JobID_old.value,
EventData.JobID.value)
)
if f'%({EventData.JobRunnerName_old.value})' in handler:
LOG.warning(
deprecation_msg.format(EventData.JobRunnerName_old.value,
EventData.JobRunnerName.value)
)


class RawSuiteConfig(ParsecConfig):
"""Raw suite configuration."""

Expand Down
4 changes: 0 additions & 4 deletions cylc/flow/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,6 @@ class SuiteConfig:
"""Class for suite configuration items and derived quantities."""

Q_DEFAULT = 'default'
TASK_EVENT_TMPL_KEYS = (
'event', 'suite', 'suite_uuid', 'point', 'name', 'submit_num', 'id',
'message', 'batch_sys_name', 'batch_sys_job_id', 'submit_time',
'start_time', 'finish_time', 'platform_name', 'try_num')

def __init__(
self,
Expand Down
4 changes: 2 additions & 2 deletions cylc/flow/data_messages.proto
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ message PbJob {
optional string submitted_time = 6;
optional string started_time = 7;
optional string finished_time = 8;
optional string batch_sys_job_id = 9;
optional string batch_sys_name = 10;
optional string job_id = 9;
optional string job_runner_name = 10;
Comment on lines -111 to +112
Copy link
Member Author

@MetRonnie MetRonnie Dec 11, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dwsutherland Can I get a quick check from you that the GraphQL and protobuf stuff has been done correctly? (this commit b64822c and this one 7a23e2f#diff-3c027c5232fb05de824ca58eb160f34cd3e253500824026046cbf56fc526431a)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks good to me, as long as the instructions at the top were followed, the binary used to generate the module is the same version protobuf, and tests pass 👍

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The diff in the generated module looks appropriate.. Will re-generate on review and see if there are any changes 👍

optional string env_script = 11;
optional string err_script = 12;
optional string exit_script = 13;
Expand Down
Loading