Skip to content

Commit

Permalink
Fix: sbd: bootstrap parameters better cope with Azure for sbd and cor…
Browse files Browse the repository at this point in the history
…osync(bsc#1175896)
  • Loading branch information
liangxin1300 committed Jul 7, 2021
1 parent b2ba54f commit 2f678d7
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 2 deletions.
28 changes: 27 additions & 1 deletion crmsh/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from . import term
from . import lock
from . import userdir
from .constants import SSH_OPTION, QDEVICE_HELP_INFO
from .constants import SSH_OPTION, QDEVICE_HELP_INFO, PROFILES_DICT
from . import ocfs2
from . import qdevice

Expand Down Expand Up @@ -118,6 +118,7 @@ def __init__(self):
self.interfaces_inst = None
self.with_other_user = True
self.cluster_is_running = None
self.profiles_dict = {}
self.default_nic_list = []
self.default_ip_list = []
self.local_ip_list = []
Expand Down Expand Up @@ -186,6 +187,17 @@ def init_sbd_manager(self):
from .sbd import SBDManager
self.sbd_manager = SBDManager(self)

def load_profiles(self):
"""
Load profiles data for different cloud provider
"""
env_name = None
if utils.is_in_azure():
env_name = "Azure"
if not env_name:
return
self.profiles_dict = PROFILES_DICT[env_name]


_context = None

Expand Down Expand Up @@ -1164,6 +1176,18 @@ def gen_mcastaddr():
csync2_update(corosync.conf())


def profiles_adjust_for_corosync():
"""
Adjust corosync's parameters according profiles
"""
if not _context.profiles_dict:
return
corosync.set_value("totem.token", _context.profiles_dict["corosync_token"])
corosync.set_value("totem.consensus", _context.profiles_dict["corosync_consensus"])
corosync.set_value("totem.max_messages", _context.profiles_dict["corosync_max_messages"])
corosync.set_value("totem.token_retransmits_before_loss_const", _context.profiles_dict["corosync_token_retransmits_before_loss_const"])


def init_corosync():
"""
Configure corosync (unicast or multicast, encrypted?)
Expand All @@ -1184,6 +1208,7 @@ def requires_unicast():
init_corosync_unicast()
else:
init_corosync_multicast()
profiles_adjust_for_corosync()


def init_sbd():
Expand Down Expand Up @@ -2016,6 +2041,7 @@ def bootstrap_init(context):

_context.initialize_qdevice()
_context.validate_option()
_context.load_profiles()
_context.init_sbd_manager()

# Need hostname resolution to work, want NTP (but don't block ssh_remote or csync2_remote)
Expand Down
12 changes: 12 additions & 0 deletions crmsh/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,4 +490,16 @@


SSH_OPTION = "-o StrictHostKeyChecking=no"


PROFILES_DICT = {
"Azure": {
"corosync_token": 30000,
"corosync_consensus": 36000,
"corosync_max_messages": 20,
"corosync_token_retransmits_before_loss_const": 10,
"sbd_watchdog_timeout": 60,
"sbd_msgwait": 120
}
}
# vim:ts=4:sw=4:et:
32 changes: 31 additions & 1 deletion crmsh/sbd.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def __init__(self, context):
self._sbd_watchdog_timeout = 0
self._is_s390 = "390" in os.uname().machine
self._context = context
self._sbd_msgwait = None

@staticmethod
def _get_device_uuid(dev, node=None):
Expand Down Expand Up @@ -150,8 +151,12 @@ def _initialize_sbd(self):
"""
if self.diskless_sbd:
return
opt = ""
if self._context.profiles_dict:
self._sbd_msgwait = self._context.profiles_dict["sbd_msgwait"]
opt = "-4 {}".format(self._sbd_msgwait)
for dev in self._sbd_devices:
rc, _, err = bootstrap.invoke("sbd -d {} create".format(dev))
rc, _, err = bootstrap.invoke("sbd {} -d {} create".format(opt, dev))
if not rc:
bootstrap.error("Failed to initialize SBD device {}: {}".format(dev, err))

Expand Down Expand Up @@ -179,6 +184,8 @@ def _determine_sbd_watchdog_timeout(self):
When using diskless SBD, determine value of SBD_WATCHDOG_TIMEOUT
"""
if not self.diskless_sbd:
if self._context.profiles_dict:
self._sbd_watchdog_timeout = self._context.profiles_dict["sbd_watchdog_timeout"]
return
# add sbd after qdevice started
if utils.is_qdevice_configured() and utils.service_is_active("corosync-qdevice.service"):
Expand Down Expand Up @@ -241,6 +248,28 @@ def _enable_sbd_service(self):
# in init process
bootstrap.invoke("systemctl enable sbd.service")

def _adjust_systemd(self):
"""
Adjust start timeout for sbd when has profiles data
"""
if not self._context.profiles_dict:
return

# TimeoutStartUSec default is 1min 30s, need to parse as seconds
cmd = "systemctl show service -p TimeoutStartUSec|\
sed -e 's/min/ 60/; s/s//g; s/.*=//g'|\
awk '{if (NF == 3) a=$1*$2+$3; else if (NF == 1) a=$1;print a}'"
default_start_timeout = int(utils.get_stdout_or_raise_error(cmd))
if self._sbd_msgwait <= default_start_timeout:
return

systemd_sbd_dir = "/etc/systemd/system/sbd.service.d"
utils.mkdirp(systemd_sbd_dir)
systemd_timeout_sec = self._sbd_msgwait * 1.2
cmd = 'echo -e "[Service]\nTimeoutSec={}" | tee {}/sbd_delay_start.conf'.format(systemd_timeout_sec, systemd_sbd_dir)
utils.get_stdout_or_raise_error(cmd)
utils.get_stdout_or_raise_error("systemctl daemon-reload")

def _warn_diskless_sbd(self, peer=None):
"""
Give warning when configuring diskless sbd
Expand Down Expand Up @@ -278,6 +307,7 @@ def sbd_init(self):
self._update_configuration()
self._determine_stonith_watchdog_timeout()
self._enable_sbd_service()
self._adjust_systemd()

def configure_sbd_resource(self):
"""
Expand Down
7 changes: 7 additions & 0 deletions crmsh/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2067,6 +2067,13 @@ def detect_cloud():
return None


def is_in_azure():
"""
Detect if is in Azure
"""
return detect_cloud() == "microsoft-azure"


def debug_timestamp():
return datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')

Expand Down

0 comments on commit 2f678d7

Please sign in to comment.