Skip to content

Commit

Permalink
Merge pull request #125 from hnousiainen/htn_pglookout_failover_prior…
Browse files Browse the repository at this point in the history
…ities

pglookout: support explicit failover priorities
rdunklau authored Jan 13, 2025

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
2 parents 60f65b2 + 56fc154 commit f486efe
Showing 5 changed files with 280 additions and 157 deletions.
8 changes: 8 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
@@ -295,6 +295,14 @@ over_warning_limit_command and to create a warning file.

Shell command to execute in case the node has deemed itself in need of promotion

``failover_priorities`` (default ``{}``)

Define priority of nodes for promotion, in case there are multiple candidates
with the same replication position. This allows to ensure all pglookout instances
would elect the same standby for promotion, while still allowing for topologies
with e.g. less preferred standbys in secondary network locations. By default,
pglookout uses remote connection ids for the same selection purpose.

``known_gone_nodes`` (default ``[]``)

Lists nodes that are explicitly known to have left the cluster. If the old
33 changes: 25 additions & 8 deletions pglookout/pglookout.py
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@
This file is under the Apache License, Version 2.0.
See the file `LICENSE` for details.
"""

from . import logutil, statsd, version
from .cluster_monitor import ClusterMonitor
from .common import convert_xlog_location_to_offset, get_iso_timestamp, parse_iso_datetime
@@ -643,19 +644,35 @@ def do_failover_decision(self, standby_nodes):
if not known_replication_positions:
self.log.warning("No known replication positions, canceling failover consideration")
return
# If there are multiple nodes with the same replication positions pick the one with the "highest" name
# to make sure pglookouts running on all standbys make the same decision. The rationale for picking
# the "highest" node is that there's no obvious way for pglookout to decide which of the nodes is
# "best" beyond looking at replication positions, but picking the highest id supports environments
# where nodes are assigned identifiers from an incrementing sequence identifiers and where we want to
# promote the latest and greatest node. In static environments node identifiers can be priority
# numbers, with the highest number being the one that should be preferred.
furthest_along_instance = max(known_replication_positions[max(known_replication_positions)])

# Find the instance that is furthest along.
# If there are multiple nodes with the same replication positions, try to identify one to promote either
# via explicit failover priority configuration or pick the one with the "highest" name by sort order.
# The rationale of this logic is to ensure all participating pglookouts running on all standbys make
# the same decision. The "highest" name works well in environments where nodes are assigned identifiers
# from an incrementing sequence and where we want to promote the latest and greatest node.

# First, find the list of instances that share the more recent replication position
furthest_along_instances = known_replication_positions[max(known_replication_positions)]
# Second, sort them by "instance name"
furthest_along_instances = sorted(furthest_along_instances, reverse=True)
# Third, if we have explicit failover priorities, use those for selecting the to be promoted instance
if "failover_priorities" in self.config:
highest_priority = max(
self.config["failover_priorities"].get(instance, 0) for instance in furthest_along_instances
)
furthest_along_instances = [
instance
for instance in furthest_along_instances
if self.config["failover_priorities"].get(instance) == highest_priority
]
furthest_along_instance = furthest_along_instances[0]
self.log.warning(
"Node that is furthest along is: %r, all replication positions were: %r",
furthest_along_instance,
sorted(known_replication_positions),
)

total_observers = len(self.connected_observer_nodes) + len(self.disconnected_observer_nodes)
# +1 in the calculation comes from the master node
total_amount_of_nodes = len(standby_nodes) + 1 - len(self.never_promote_these_nodes) + total_observers
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -47,6 +47,7 @@ exclude = [
'test/test_lookout.py',
'test/test_pgutil.py',
'test/test_webserver.py',
'test/utils.py',
# Other.
'setup.py',
'version.py',
309 changes: 160 additions & 149 deletions test/test_lookout.py

Large diffs are not rendered by default.

86 changes: 86 additions & 0 deletions test/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""
Utilities for pglookout tests
Copyright (c) 2015 Ohmu Ltd
Copyright (c) 2014 F-Secure
This file is under the Apache License, Version 2.0.
See the file `LICENSE` for details.
"""

from pglookout.common import get_iso_timestamp


def add_to_observer_state(
pgl,
observer_name,
db_name,
pg_last_xlog_receive_location=None,
pg_is_in_recovery=True,
connection=True,
replication_time_lag=None,
fetch_time=None,
db_time=None,
):
db_node_state = create_db_node_state(
pg_last_xlog_receive_location,
pg_is_in_recovery,
connection,
replication_time_lag,
fetch_time=fetch_time,
db_time=db_time,
)
update_dict = {
"fetch_time": get_iso_timestamp(), # type: ignore[no-untyped-call]
"connection": True,
db_name: db_node_state,
}
if observer_name in pgl.observer_state:
pgl.observer_state[observer_name].update(update_dict)
else:
pgl.observer_state[observer_name] = update_dict


def create_db_node_state(
pg_last_xlog_receive_location=None,
pg_is_in_recovery=True,
connection=True,
replication_time_lag=None,
fetch_time=None,
db_time=None,
):
return {
"connection": connection,
"db_time": get_iso_timestamp(db_time),
"fetch_time": get_iso_timestamp(fetch_time),
"pg_is_in_recovery": pg_is_in_recovery,
"pg_last_xact_replay_timestamp": None,
"pg_last_xlog_receive_location": pg_last_xlog_receive_location,
"pg_last_xlog_replay_location": None,
"replication_time_lag": replication_time_lag,
"min_replication_time_lag": 0, # simulate that we've been in sync once
}


def set_instance_cluster_state(
pgl,
*,
instance,
pg_last_xlog_receive_location=None,
pg_is_in_recovery=True,
connection=True,
replication_time_lag=None,
fetch_time=None,
db_time=None,
conn_info=None,
):
db_node_state = create_db_node_state(
pg_last_xlog_receive_location,
pg_is_in_recovery,
connection,
replication_time_lag,
fetch_time=fetch_time,
db_time=db_time,
)
pgl.cluster_state[instance] = db_node_state
pgl.config["remote_conns"][instance] = conn_info or {"host": instance}

0 comments on commit f486efe

Please sign in to comment.