Skip to content

Commit

Permalink
workaround: cylc#2799 (comment)
Browse files Browse the repository at this point in the history
  • Loading branch information
oliver-sanders committed Nov 8, 2018
1 parent ddb6334 commit b46982d
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 1 deletion.
4 changes: 3 additions & 1 deletion lib/cylc/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1454,7 +1454,9 @@ def can_auto_restart(self):
('pool_hold_point', self.pool_hold_point),
('run_mode', self.run_mode != 'live'),
('stop_clock_time', self.stop_clock_time),
('stop_point', self.stop_point),
('stop_point', (self.stop_point and
self.stop_point != self.final_point)),
# ^ https://github.com/cylc/cylc/issues/2799#issuecomment-436720805
('stop_task', self.stop_task)
] if value]

Expand Down
117 changes: 117 additions & 0 deletions tests/restart/42-auto-restart-ping-pong.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#!/bin/bash
# THIS FILE IS PART OF THE CYLC SUITE ENGINE.
# Copyright (C) 2008-2018 NIWA & British Crown (Met Office) & Contributors.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#-------------------------------------------------------------------------------
# play a game of Cylc suite ping pong bouncing a suite back and forth between
# two servers by condemning them in turn in order to see if anything breaks
. "$(dirname "$0")/test_header"
export CLOWNS=$( \
cylc get-global-config -i '[test battery]remote host with shared fs' \
2>'/dev/null')
if [[ -z "${CLOWNS}" ]]; then
skip_all '"[test battery]remote host with shared fs": not defined'
fi
export JOKERS="$(hostname)"

BASE_GLOBALRC='
[cylc]
health check interval = PT5S
[[events]]
abort on inactivity = True
abort on timeout = True
inactivity = PT2M
timeout = PT2M
'

TEST_DIR="$HOME/cylc-run/" init_suite "${TEST_NAME_BASE}" <<< '
[cylc]
abort if any task fails = True
[scheduling]
initial cycle point = 2000
final cycle point = 9999 # test https://github.com/cylc/cylc/issues/2799
[[dependencies]]
[[[P1Y]]]
graph = foo[-P1Y] => foo
[runtime]
[[root]]
script = sleep 5
'

stuck_in_the_middle() {
# swap the condemned host forcing the suite to jump ship
local temp="${JOKERS}"
JOKERS="${CLOWNS}"; CLOWNS="${temp}"
create_test_globalrc '' "
${BASE_GLOBALRC}
[suite servers]
run hosts = ${JOKERS}, ${CLOWNS}
condemned hosts = ${CLOWNS}
"
cat "${CYLC_CONF_PATH}/"*".rc" >&2
}

kill_suite() {
cylc stop "${SUITE_NAME}" --now --now
poll ! test -f "${SUITE_RUN_DIR}/.service/contact"
purge_suite "${SUITE_NAME}"
}

log_scan2() {
# abort if any test fails = True
NO_TESTS="$(( NO_TESTS - $# + 4 ))"
if ! log_scan "$@"; then
skip $NO_TESTS # skip remaining tests
kill_suite
exit 1
fi
}

EARS=5 # number of times to bounce the suite between hosts
NO_TESTS="$(( EARS * 5 ))"
set_test_number "${NO_TESTS}"

# run the suite
stuck_in_the_middle
cylc run "${SUITE_NAME}" --host="${JOKERS}"
poll test -f "${SUITE_RUN_DIR}/.service/contact"
sleep 1

# get the log file
FILE=$(cylc cat-log "${SUITE_NAME}" -m p |xargs readlink -f)
#-------------------------------------------------------------------------------
for ear in $(seq 1 "${EARS}"); do
stuck_in_the_middle # swap the condemned host

# test the shutdown procedure
log_scan2 "${TEST_NAME_BASE}-${ear}-stop" "${FILE}" 40 1 \
'The Cylc suite host will soon become un-available' \
'Suite shutting down - REQUEST(NOW-NOW)' \
"Attempting to restart on \"${JOKERS}\"" \
"Suite now running on \"${JOKERS}\"" \

poll test -f "${SUITE_RUN_DIR}/.service/contact"
sleep 1

# test the restart procedure
FILE=$(cylc cat-log "${SUITE_NAME}" -m p |xargs readlink -f)
log_scan2 "${TEST_NAME_BASE}-${ear}-restart" "${FILE}" 20 1 \
"Suite starting: server=$(ssh "${JOKERS}" hostname -f)"
sleep 2
done

kill_suite

exit

0 comments on commit b46982d

Please sign in to comment.