diff --git a/.github/workflows/crmsh-ci.yml b/.github/workflows/crmsh-ci.yml index 4a758cceb..fa10187d9 100644 --- a/.github/workflows/crmsh-ci.yml +++ b/.github/workflows/crmsh-ci.yml @@ -450,6 +450,21 @@ jobs: token: ${{ secrets.CODECOV_TOKEN }} flags: integration + functional_test_blocking_ssh: + runs-on: ubuntu-24.04 + timeout-minutes: 40 + steps: + - uses: actions/checkout@v4 + - name: functional test for blocking ssh + run: | + echo '{ "exec-opts": ["native.cgroupdriver=systemd"] }' | sudo tee /etc/docker/daemon.json + sudo systemctl restart docker.service + $DOCKER_SCRIPT `$GET_INDEX_OF cluster_blocking_ssh` + - uses: codecov/codecov-action@v4 + with: + token: ${{ secrets.CODECOV_TOKEN }} + flags: integration + original_regression_test: runs-on: ubuntu-24.04 timeout-minutes: 40 diff --git a/codecov.yml b/codecov.yml index 8810859ee..47000a652 100644 --- a/codecov.yml +++ b/codecov.yml @@ -8,7 +8,7 @@ coverage: threshold: 0.35% codecov: notify: - after_n_builds: 27 + after_n_builds: 28 comment: - after_n_builds: 27 + after_n_builds: 28 layout: "condensed_header, flags, files, condensed_footer" diff --git a/crmsh/config.py b/crmsh/config.py index eafe76d93..21f86e299 100644 --- a/crmsh/config.py +++ b/crmsh/config.py @@ -258,6 +258,7 @@ def get(self, value): 'lock_timeout': opt_string('120'), 'add_advised_op_values': opt_boolean('yes'), 'OCF_1_1_SUPPORT': opt_boolean('yes'), + 'no_ssh': opt_boolean('no'), 'obscure_pattern': opt_string('passw*') }, 'path': { diff --git a/crmsh/constants.py b/crmsh/constants.py index c49a69b2e..4bdb6704f 100644 --- a/crmsh/constants.py +++ b/crmsh/constants.py @@ -448,4 +448,6 @@ # Commands that are deprecated and hidden from UI HIDDEN_COMMANDS = {'ms'} + +NO_SSH_ERROR_MSG = "ssh-related operations are disabled. crmsh works in local mode." # vim:ts=4:sw=4:et: diff --git a/crmsh/main.py b/crmsh/main.py index a2ba69e57..a3a929779 100644 --- a/crmsh/main.py +++ b/crmsh/main.py @@ -369,6 +369,9 @@ def run(): else: upgradeutil.upgrade_if_needed() return main_input_loop(context, user_args) + except utils.NoSSHError as msg: + logger.error('%s', msg) + sys.exit(1) except KeyboardInterrupt: if config.core.debug: raise diff --git a/crmsh/report/core.py b/crmsh/report/core.py index 1563e925c..1d4b7ec3d 100644 --- a/crmsh/report/core.py +++ b/crmsh/report/core.py @@ -20,6 +20,7 @@ import crmsh.report.sh import crmsh.user_of_host from crmsh import utils as crmutils +from crmsh import constants as crmconstants from crmsh import config, log, tmpfiles, ui_cluster from crmsh.sh import ShellUtils @@ -331,13 +332,20 @@ def process_dest(context: Context) -> None: context.dest_path = f"{context.dest_dir}/{context.dest}.tar{context.compress_suffix}" +def local_mode(context: Context) -> bool: + return context.single or (len(context.node_list) == 1 and context.node_list[0] == context.me) + + def process_node_list(context: Context) -> None: - if not context.node_list: + if not local_mode(context) and config.core.no_ssh: + logger.error('%s', crmconstants.NO_SSH_ERROR_MSG) + context.single = True + if context.single: + context.node_list = [context.me] + elif not context.node_list: context.node_list = crmutils.list_cluster_nodes() if not context.node_list: raise utils.ReportGenericError("Could not figure out a list of nodes; is this a cluster node?") - if context.single: - context.node_list = [context.me] for node in context.node_list[:]: if node == context.me: diff --git a/crmsh/ui_cluster.py b/crmsh/ui_cluster.py index 97ab97bf1..d51d563be 100644 --- a/crmsh/ui_cluster.py +++ b/crmsh/ui_cluster.py @@ -180,10 +180,15 @@ def do_start(self, context, *args): service_check_list.append("corosync-qdevice.service") service_manager = ServiceManager() - for node in node_list[:]: - if all([service_manager.service_is_active(srv, remote_addr=node) for srv in service_check_list]): - logger.info("The cluster stack already started on {}".format(node)) - node_list.remove(node) + try: + for node in node_list[:]: + if all([service_manager.service_is_active(srv, remote_addr=node) for srv in service_check_list]): + logger.info("The cluster stack already started on {}".format(node)) + node_list.remove(node) + except utils.NoSSHError as msg: + logger.error('%s', msg) + logger.info("Please try 'crm cluster start' on each node") + return if not node_list: return @@ -240,7 +245,12 @@ def do_stop(self, context, *args): Stops the cluster stack on all nodes or specific node(s) ''' node_list = parse_option_for_nodes(context, *args) - node_list = [n for n in node_list if self._node_ready_to_stop_cluster_service(n)] + try: + node_list = [n for n in node_list if self._node_ready_to_stop_cluster_service(n)] + except utils.NoSSHError as msg: + logger.error('%s', msg) + logger.info("Please try 'crm cluster stop' on each node") + return if not node_list: return logger.debug(f"stop node list: {node_list}") diff --git a/crmsh/upgradeutil.py b/crmsh/upgradeutil.py index 7267a1449..ed08283ac 100644 --- a/crmsh/upgradeutil.py +++ b/crmsh/upgradeutil.py @@ -8,6 +8,8 @@ import crmsh.parallax import crmsh.utils from crmsh import user_of_host +import crmsh.config +import crmsh.constants from crmsh.prun import prun @@ -152,6 +154,8 @@ def ask(msg: str): def upgrade_if_needed(): + if crmsh.config.core.no_ssh: + return if os.geteuid() != 0: return if not crmsh.utils.can_ask(background_wait=False): diff --git a/crmsh/user_of_host.py b/crmsh/user_of_host.py index 9c990daec..3067a0f81 100644 --- a/crmsh/user_of_host.py +++ b/crmsh/user_of_host.py @@ -45,6 +45,9 @@ def user_of(self, host): def user_pair_for_ssh(self, host: str) -> typing.Tuple[str, str]: """Return (local_user, remote_user) pair for ssh connection""" + if config.core.no_ssh: + from .utils import NoSSHError + raise NoSSHError(constants.NO_SSH_ERROR_MSG) local_user = None remote_user = None try: diff --git a/crmsh/utils.py b/crmsh/utils.py index 54ddc5ef2..73ac65f77 100644 --- a/crmsh/utils.py +++ b/crmsh/utils.py @@ -3155,4 +3155,20 @@ def ansible_facts(module_name) -> dict: out = out[bracket_pos:] json_tree = json.loads(out) return json_tree['ansible_facts'] + + +class NoSSHError(Exception): + pass + + +def ssh_command(): + """ + Wrapper function for ssh command + + When ssh between cluster nodes is blocked, core.no_ssh + should be set to 'yes', then this function will raise NoSSHError + """ + if config.core.no_ssh: + raise NoSSHError(constants.NO_SSH_ERROR_MSG) + return "ssh" # vim:ts=4:sw=4:et: diff --git a/data-manifest b/data-manifest index a56836fae..b1535dd2b 100644 --- a/data-manifest +++ b/data-manifest @@ -71,6 +71,7 @@ test/features/bootstrap_options.feature test/features/bootstrap_sbd_delay.feature test/features/bootstrap_sbd_normal.feature test/features/cluster_api.feature +test/features/cluster_blocking_ssh.feature test/features/configure_bugs.feature test/features/constraints_bugs.feature test/features/corosync_ui.feature diff --git a/etc/crm.conf.in b/etc/crm.conf.in index ddff3d2b7..1bbf6a18a 100644 --- a/etc/crm.conf.in +++ b/etc/crm.conf.in @@ -21,6 +21,7 @@ ; report_tool_options = ; lock_timeout = 120 ; add_advised_op_values = yes +; no_ssh = no ; set OCF_1_1_SUPPORT to yes is to fully turn on OCF 1.1 feature once the corresponding CIB detected. ; OCF_1_1_SUPPORT = yes diff --git a/test/features/cluster_blocking_ssh.feature b/test/features/cluster_blocking_ssh.feature new file mode 100644 index 000000000..b7a26b74e --- /dev/null +++ b/test/features/cluster_blocking_ssh.feature @@ -0,0 +1,77 @@ +Feature: cluster testing with ssh blocked + + Need nodes: hanode1 hanode2 + + Scenario: Cluster testing with ssh blocked (bsc#1228899) + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + # without bootstrap, write corosync.conf and authkey directly + When Run "dd if=/dev/urandom of=/etc/corosync/authkey bs=1 count=256" on "hanode1" + And Write multi lines to file "/etc/corosync/corosync.conf" on "hanode1" + """ + totem { + version: 2 + cluster_name: hacluster + transport: knet + token: 5000 + join: 60 + max_messages: 20 + token_retransmits_before_loss_const: 10 + crypto_hash: sha1 + crypto_cipher: aes256 + } + + quorum { + provider: corosync_votequorum + two_node: 1 + } + + logging { + to_logfile: yes + logfile: /var/log/cluster/corosync.log + to_syslog: yes + timestamp: on + } + + nodelist { + node { + ring0_addr: @hanode1.ip.default + name: hanode1 + nodeid: 1 + } + + node { + ring0_addr: @hanode2.ip.default + name: hanode2 + nodeid: 2 + } + + } + """ + And Run "scp /etc/corosync/authkey /etc/corosync/corosync.conf hanode2:/etc/corosync/" on "hanode1" + And Run "systemctl start pacemaker" on "hanode1" + And Run "systemctl start pacemaker" on "hanode2" + And Wait for DC + Then Cluster service is "started" on "hanode1" + And Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + # block ssh between nodes + When Run "systemctl start firewalld" on "hanode2" + And Run "firewall-cmd --zone=public --add-rich-rule='rule port port=22 protocol=tcp drop' --permanent && firewall-cmd --reload" on "hanode2" + And Try "ssh -o ConnectTimeout=5 hanode2" on "hanode1" + Then Except "ssh: connect to host hanode2 port 22: Connection timed out" in stderr + When Run "timeout 5s crm report || echo "timeout"" on "hanode1" + Then Expected "timeout" in stdout + When Write multi lines to file "/etc/crm/crm.conf" on "hanode1" + """ + [core] + no_ssh = yes + """ + When Run "crm configure property stonith-enabled=false" on "hanode1" + And Run "crm report -d /tmp/report" on "hanode1" + Then Directory "/tmp/report/hanode1" created + Then Directory "/tmp/report/hanode2" not created + Then Expected "ERROR: ssh-related operations are disabled. crmsh works in local mode." in stderr + Then Run "crm status" OK on "hanode1" + When Run "crm cluster stop --all" on "hanode1" + Then Expected "ERROR: ssh-related operations are disabled. crmsh works in local mode." in stderr diff --git a/test/unittests/test_report_core.py b/test/unittests/test_report_core.py index fe39ad1e2..3be9fd67e 100644 --- a/test/unittests/test_report_core.py +++ b/test/unittests/test_report_core.py @@ -455,23 +455,30 @@ def test_process_arguments_value_error(self): core.process_arguments(mock_ctx_inst) self.assertEqual("The start time must be before the finish time", str(err.exception)) + @mock.patch('crmsh.report.core.local_mode') @mock.patch('crmsh.utils.list_cluster_nodes') - def test_process_node_list_exception(self, mock_list_nodes): - mock_ctx_inst = mock.Mock(node_list=[]) + def test_process_node_list_exception(self, mock_list_nodes, mock_local_mode): + mock_local_mode.return_value = False + config.core.no_ssh = False + mock_ctx_inst = mock.Mock(node_list=[], single=False) mock_list_nodes.return_value = [] with self.assertRaises(utils.ReportGenericError) as err: core.process_node_list(mock_ctx_inst) self.assertEqual("Could not figure out a list of nodes; is this a cluster node?", str(err.exception)) + @mock.patch('crmsh.report.core.local_mode') @mock.patch('crmsh.utils.list_cluster_nodes') - def test_process_node_list_single(self, mock_list_nodes): + def test_process_node_list_single(self, mock_list_nodes, mock_local_mode): + mock_local_mode.return_value = True mock_ctx_inst = mock.Mock(node_list=["node1", "node2"], single=True, me="node1") core.process_node_list(mock_ctx_inst) + @mock.patch('crmsh.report.core.local_mode') @mock.patch('logging.Logger.error') @mock.patch('crmsh.utils.ping_node') @mock.patch('crmsh.utils.list_cluster_nodes') - def test_process_node_list(self, mock_list_nodes, mock_ping, mock_error): + def test_process_node_list(self, mock_list_nodes, mock_ping, mock_error, mock_local_mode): + mock_local_mode.return_value = False mock_ctx_inst = mock.Mock(node_list=["node1", "node2"], single=False, me="node1") mock_ping.side_effect = ValueError("error") core.process_node_list(mock_ctx_inst)