Skip to content

Commit

Permalink
RoundRobinLoadBalancer: increase visibility into health-checking (#…
Browse files Browse the repository at this point in the history
…2182)

Motivation:

Currently, health-checking events are logged at `DEBUG` level and the
failure cause is not propagated. Users have to change the logging level
and reproduce in order to understand the root cause.

Modifications:

- Log each failed attempt to create a connection at `INFO` level for
visibility;
- Log at `WARN` level when the health-checking is triggered;
- Retain the last cause that triggered health-checking inside
`HealthCheck` state to use it for `toString()`;

Result:

Users see when a connectivity issue occurs, cause is logged and
propagated for subsequent selection attempts if there are no other
`ACTIVE` hosts.
  • Loading branch information
idelpivnitskiy authored Apr 8, 2022
1 parent 6fd32bb commit fce6b3e
Showing 1 changed file with 13 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -578,8 +578,8 @@ void markUnhealthy(final Throwable cause, final ConnectionFactory<Addr, ? extend
final ActiveState nextState = previousState.forNextFailedConnection();
if (connStateUpdater.compareAndSet(this, previous,
new ConnState(previous.connections, nextState))) {
LOGGER.debug("Load balancer for {}: failed to open a new connection to the host on address {}" +
" {} time(s) ({} consecutive failures will trigger health check).",
LOGGER.info("Load balancer for {}: failed to open a new connection to the host on address {}" +
" {} time(s) ({} consecutive failures will trigger health-checking).",
targetResource, address, nextState.failedConnections,
healthCheckConfig.failedThreshold, cause);
break;
Expand All @@ -588,11 +588,12 @@ void markUnhealthy(final Throwable cause, final ConnectionFactory<Addr, ? extend
continue;
}

final HealthCheck<Addr, C> healthCheck = new HealthCheck<>(connectionFactory, this);
final HealthCheck<Addr, C> healthCheck = new HealthCheck<>(connectionFactory, this, cause);
final ConnState nextState = new ConnState(previous.connections, healthCheck);
if (connStateUpdater.compareAndSet(this, previous, nextState)) {
LOGGER.debug("Load balancer for {}: failed to open a new connection to the host on address {}" +
" {} time(s). Threshold reached, triggering health check for this host.",
LOGGER.warn("Load balancer for {}: failed to open a new connection to the host on address {} " +
"{} time(s) in a row. Error counting threshold reached, marking this host as " +
"UNHEALTHY for the selection algorithm and triggering background health-checking.",
targetResource, address, healthCheckConfig.failedThreshold, cause);
healthCheck.schedule(cause);
break;
Expand Down Expand Up @@ -757,11 +758,13 @@ private static final class HealthCheck<ResolvedAddress, C extends LoadBalancedCo
extends DelayedCancellable {
private final ConnectionFactory<ResolvedAddress, ? extends C> connectionFactory;
private final Host<ResolvedAddress, C> host;
private final Throwable lastError;

private HealthCheck(final ConnectionFactory<ResolvedAddress, ? extends C> connectionFactory,
final Host<ResolvedAddress, C> host) {
final Host<ResolvedAddress, C> host, final Throwable lastError) {
this.connectionFactory = connectionFactory;
this.host = host;
this.lastError = lastError;
}

public void schedule(final Throwable originalCause) {
Expand All @@ -787,12 +790,13 @@ public void schedule(final Throwable originalCause) {
.flatMapCompletable(newCnx -> {
if (host.addConnection(newCnx)) {
host.markHealthy(this);
LOGGER.debug("Load balancer for {}: health check passed for {}.",
LOGGER.info("Load balancer for {}: health check passed for {}, marking this " +
"host as ACTIVE for the selection algorithm.",
host.targetResource, host);
return completed();
} else {
// This happens only if the host is closed, no need to mark as healthy.
LOGGER.debug("Load balancer for {}: health check finished for {}, but the " +
LOGGER.debug("Load balancer for {}: health check passed for {}, but the " +
"host rejected a new connection {}. Closing it now.",
host.targetResource, host, newCnx);
return newCnx.closeAsync();
Expand All @@ -812,7 +816,7 @@ public void schedule(final Throwable originalCause) {

@Override
public String toString() {
return "UNHEALTHY";
return "UNHEALTHY(" + lastError + ')';
}
}

Expand Down

0 comments on commit fce6b3e

Please sign in to comment.