From 47d38185ac70f2c394030c4c6606f4985ad6f1c4 Mon Sep 17 00:00:00 2001 From: Erik Moeller Date: Mon, 13 Apr 2020 18:24:40 -0700 Subject: [PATCH] Use poweroff + poll instead of qvm-kill for forced shutdowns Towards #498 --- launcher/sdw_updater_gui/Updater.py | 94 +++++++++++++++++++++++++---- launcher/tests/test_updater.py | 69 +++------------------ 2 files changed, 92 insertions(+), 71 deletions(-) diff --git a/launcher/sdw_updater_gui/Updater.py b/launcher/sdw_updater_gui/Updater.py index 03fa2bf3b..63806181d 100644 --- a/launcher/sdw_updater_gui/Updater.py +++ b/launcher/sdw_updater_gui/Updater.py @@ -11,9 +11,19 @@ import logging import os import subprocess +import time from datetime import datetime, timedelta from enum import Enum +# Used for VM checks when run in Qubes dom0 +try: + import qubesadmin + + qubes = qubesadmin.Qubes() +except ImportError: + qubes = None + + DATE_FORMAT = "%Y-%m-%d %H:%M:%S" DEFAULT_HOME = ".securedrop_launcher" FLAG_FILE_STATUS_SD_APP = "/home/user/.securedrop_client/sdw-update-status" @@ -403,8 +413,8 @@ def shutdown_and_start_vms(): on, for example. All system AppVMs (sys-net, sys-firewall and sys-usb) need to be restarted. - We use qvm-kill for sys-firewall and sys-net, because a shutdown may fail - if they are currently in use as NetVMs by any of the user's other VMs. + We use a forced shutdown for sys-net and sys-firewall because a regular + shutdown command will return an error if they are in use as NetVMs. """ sdw_vms_in_order = [ @@ -433,17 +443,10 @@ def shutdown_and_start_vms(): sdlog.info("Safely shutting down system VM: {}".format(vm)) _safely_shutdown_vm(vm) - # TODO: Use of qvm-kill should be considered unsafe and may have unexpected - # side effects. We should aim for a more graceful shutdown strategy. unsafe_sys_vms_in_order = ["sys-firewall", "sys-net"] for vm in unsafe_sys_vms_in_order: - sdlog.info("Killing system VM: {}".format(vm)) - try: - subprocess.check_output(["qvm-kill", vm], stderr=subprocess.PIPE) - except subprocess.CalledProcessError as e: - sdlog.error("Error while killing system VM: {}".format(vm)) - sdlog.error(str(e)) - sdlog.error(str(e.stderr)) + sdlog.info("Forcing shutdown of system VM: {}".format(vm)) + _force_shutdown_vm(vm) all_sys_vms_in_order = safe_sys_vms_in_order + unsafe_sys_vms_in_order sdlog.info("Starting fedora-based system VMs after updates") @@ -465,6 +468,75 @@ def _safely_shutdown_vm(vm): return UpdateStatus.UPDATES_FAILED +def _force_shutdown_vm(vm): + """ + Qubes does not yet provide an option to force shutdown for VMs that act as + NetVMs. Issuing a poweroff command and polling for the VM to stop running + is the recommended workaround until then. + + Return value: + - True - if VM was successfully shut down or is not running + - False - if there was a problem shutting down the VM (logged as error) + """ + if vm not in qubes.domains: + sdlog.error( + "Error shutting down VM '{}'. No VM with this name found.".format(vm) + ) + return False + + if qubes.domains[vm].is_running() is False: + sdlog.info("VM '{}' is not running. No shutdown necessary.".format(vm)) + return True + + if qubes.domains[vm].is_paused(): + sdlog.info("VM '{}' is paused. Unpausing before shutdown.".format(vm)) + qubes.domains[vm].unpause() + + try: + qubes.domains[vm].run("poweroff", user="root") + except subprocess.CalledProcessError as e: + # Exit codes 1 and 143 may occur with successful shutdown; log others + if e.returncode != 1 and e.returncode != 143: + sdlog.error( + "Error shutting down VM '{}'. " + "poweroff command returned unexpected exit code {}.".format( + vm, e.returncode + ) + ) + return False + + return _wait_for_is_running(vm, False) + + +def _wait_for_is_running(vm, expected, timeout=60, interval=0.2): + """ + Poll for a VM to enter the given is_running state, and give up after a + timeout is reached. + + Return value: + - True if the VM reached the expeted state + - False if it did not + """ + start_time = time.time() + stop_time = start_time + timeout + while time.time() < stop_time: + state = qubes.domains[vm].is_running() + elapsed = time.time() - start_time + if state == expected: + sdlog.info( + "VM '{}' entered expected state (is_running() is {}) " + "after {:.2f} seconds of polling.".format(vm, expected, elapsed) + ) + return True + time.sleep(interval) + + sdlog.error( + "VM '{}' did not enter expected state (is_running() is {}) " + "in the provided timeout of {} seconds.".format(vm, expected, timeout) + ) + return False + + def _safely_start_vm(vm): try: running_vms = subprocess.check_output( diff --git a/launcher/tests/test_updater.py b/launcher/tests/test_updater.py index f886a546f..afd752085 100644 --- a/launcher/tests/test_updater.py +++ b/launcher/tests/test_updater.py @@ -668,15 +668,18 @@ def test_safely_shutdown_fails(mocked_info, mocked_error, mocked_call, vm): @mock.patch("subprocess.check_output") @mock.patch("Updater._safely_start_vm") @mock.patch("Updater._safely_shutdown_vm") +@mock.patch("Updater._force_shutdown_vm") @mock.patch("Updater.sdlog.error") @mock.patch("Updater.sdlog.info") def test_shutdown_and_start_vms( - mocked_info, mocked_error, mocked_shutdown, mocked_start, mocked_output + mocked_info, + mocked_error, + mocked_force_shutdown, + mocked_shutdown, + mocked_start, + mocked_output, ): - sys_vm_kill_calls = [ - call(["qvm-kill", "sys-firewall"], stderr=-1), - call(["qvm-kill", "sys-net"], stderr=-1), - ] + force_shutdown_calls = [call("sys-firewall"), call("sys-net")] sys_vm_shutdown_calls = [ call("sys-usb"), call("sys-whonix"), @@ -705,7 +708,7 @@ def test_shutdown_and_start_vms( call("sd-log"), ] updater.shutdown_and_start_vms() - mocked_output.assert_has_calls(sys_vm_kill_calls) + mocked_force_shutdown.assert_has_calls(force_shutdown_calls) mocked_shutdown.assert_has_calls( template_vm_calls + app_vm_calls + sys_vm_shutdown_calls ) @@ -714,60 +717,6 @@ def test_shutdown_and_start_vms( assert not mocked_error.called -@mock.patch( - "subprocess.check_output", - side_effect=subprocess.CalledProcessError(1, "check_output"), -) -@mock.patch("Updater._safely_start_vm") -@mock.patch("Updater._safely_shutdown_vm") -@mock.patch("Updater.sdlog.error") -@mock.patch("Updater.sdlog.info") -def test_shutdown_and_start_vms_sysvm_fail( - mocked_info, mocked_error, mocked_shutdown, mocked_start, mocked_output -): - sys_vm_kill_calls = [ - call(["qvm-kill", "sys-firewall"], stderr=-1), - call(["qvm-kill", "sys-net"], stderr=-1), - ] - sys_vm_start_calls = [ - call("sys-net"), - call("sys-firewall"), - call("sys-whonix"), - call("sys-usb"), - ] - app_vm_calls = [ - call("sd-app"), - call("sd-proxy"), - call("sd-whonix"), - call("sd-gpg"), - call("sd-log"), - ] - template_vm_calls = [ - call("fedora-30"), - call("sd-viewer-buster-template"), - call("sd-app-buster-template"), - call("sd-log-buster-template"), - call("sd-devices-buster-template"), - call("sd-proxy-buster-template"), - call("whonix-gw-15"), - call("securedrop-workstation-buster"), - ] - error_calls = [ - call("Error while killing system VM: sys-firewall"), - call("Command 'check_output' returned non-zero exit status 1."), - call("None"), - call("Error while killing system VM: sys-net"), - call("Command 'check_output' returned non-zero exit status 1."), - call("None"), - ] - updater.shutdown_and_start_vms() - mocked_output.assert_has_calls(sys_vm_kill_calls) - mocked_shutdown.assert_has_calls(template_vm_calls + app_vm_calls) - app_vm_calls_reversed = list(reversed(app_vm_calls)) - mocked_start.assert_has_calls(sys_vm_start_calls + app_vm_calls_reversed) - mocked_error.assert_has_calls(error_calls) - - @pytest.mark.parametrize("status", UpdateStatus) @mock.patch("subprocess.check_call") @mock.patch("os.path.expanduser", return_value=temp_dir)