Skip to content

Commit

Permalink
[#24758] YSQL: Remove time sync service requirement from the pre-reqs
Browse files Browse the repository at this point in the history
Summary:
Issue initially discovered  because of undefined function has_aws_time_sync_service.

However, we plan to remove clockbound as pre-req until the feature is Generally Available.

Fixes #24758.
Jira: DB-13848

Test Plan: Jenkins

Reviewers: nikhil, sanketh, sgarg-yb

Reviewed By: sgarg-yb

Subscribers: ybase, hsunder

Differential Revision: https://phorge.dev.yugabyte.com/D39473
  • Loading branch information
pao214 committed Nov 1, 2024
1 parent 35356b7 commit 2248409
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 39 deletions.
61 changes: 23 additions & 38 deletions bin/yugabyted
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ PREREQS_ERROR_MSGS = {
' please free the port and restart the node.',
'ycql_metric_port': 'YCQL metrics port {} is already in use. For accessing the YCQL metrics,' \
' please free the port and restart the node.',
'clockbound': 'Clockbound is recommended on AWS clusters. It can reduce read restart errors' \
' significantly in concurrent workloads.' \
'clockbound': 'Clockbound is recommended on AWS/Azure/GCP clusters.' \
' It can reduce read restart errors significantly in concurrent workloads.' \
' Relevant flag: --enhance_time_sync_via_clockbound.',
}
QUICK_START_LINKS = {
Expand Down Expand Up @@ -671,31 +671,19 @@ def using_time_sync_service():
allow_list = ['169.254.169.123', 'metadata.google.internal', 'PHC',
'aws.com', 'google.com']

try:
# Run the chronyc sources command and capture the output
result = subprocess.run(['chronyc', 'sources'], capture_output=True, text=True, timeout=1)

# Check if any allowed source is in the output
if result.returncode == 0:
for source in allow_list:
if source in result.stdout:
return True
except (subprocess.TimeoutExpired, FileNotFoundError):
return False
cmd = ['chronyc', 'sources']
out, err, ret_code = run_process(cmd, timeout=1, log_cmd=True)
if ret_code == 0:
for source in allow_list:
if source in out:
return True

return False

def is_phc_configured():
try:
# Run the chronyc sources command and capture the output
result = subprocess.run(['systemctl', 'status', 'clockbound'],
capture_output=True, text=True, timeout=1)

# Check if PHC is in the output
if result.returncode == 0 and 'PHC' in result.stdout:
return True
except (subprocess.TimeoutExpired, FileNotFoundError):
return False
cmd = ['systemctl', 'status', 'clockbound']
out, err, retcode = run_process(cmd, timeout=1, log_cmd=True)
return retcode == 0 and 'PHC' in out

class ControlScript(object):
def __init__(self):
Expand Down Expand Up @@ -2800,11 +2788,12 @@ class ControlScript(object):
prereqs_warn.add('ntp/chrony')
prereqs_warn_flag = True

# Configuring clockbound is strongly recommended for AWS clusters.
if has_aws_time_sync_service() and not self.configs.temp_data[
"enhance_time_sync_via_clockbound"]:
prereqs_warn.add('clockbound')
prereqs_warn_flag = True
# TODO: Uncomment this block when clockbound becomes GA.
# # Configuring clockbound is strongly recommended for AWS clusters.
# if using_time_sync_service() and not self.configs.temp_data[
# "enhance_time_sync_via_clockbound"]:
# prereqs_warn.add('clockbound')
# prereqs_warn_flag = True

(failed_ports, warning_ports, mandatory_port_available,
recommended_port_available) = self.check_ports()
Expand Down Expand Up @@ -4091,18 +4080,14 @@ class ControlScript(object):
Output.init_animation("Validating system config for clockbound...")
configure_clockbound_path = find_binary_location("configure_clockbound.sh")
cmd = ["bash", configure_clockbound_path, "--validate"]
try:
subprocess.check_call(cmd)
Output.update_animation("Clockbound configured successfully.")
except subprocess.CalledProcessError as e:
exit_code = e.returncode
Output.update_animation("Failed to validate clockbound configuration.",
out, err, retcode = run_process(cmd)
if retcode == 0:
Output.update_animation("System configured for clockbound.")
else:
Output.update_animation("Failed to validate system configuration for clockbound.",
status=Output.ANIMATION_FAIL)
Output.log_error_and_exit(
Output.make_red("ERROR") + f": Exit code: {exit_code}."
" Did you run configure_clockbound.sh script?"
)

Output.make_red("ERROR") + ": Did you run configure_clockbound.sh script?")

# Runs post_install script for linux computers.
def post_install_yb(self):
Expand Down
2 changes: 1 addition & 1 deletion src/yb/server/clockbound_clock.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ extern "C" {
static constexpr auto kAutoConfigNumClockboundCtxs = 0;

// There are multiple levels of time synchronization in increasing order
// of accuracy:
// of accuracy.
//
// 1. Random NTP servers for time synchronization:
// If the cluster nodes use this method for time sync, do NOT use
Expand Down

0 comments on commit 2248409

Please sign in to comment.