Skip to content

Commit

Permalink
AsyncTimer monitoring: measure overshoot only when waiting for timer …
Browse files Browse the repository at this point in the history
…to fire (#8103)
  • Loading branch information
ReubenBond authored Nov 7, 2022
1 parent 0d434d1 commit 997e49e
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 55 deletions.
47 changes: 27 additions & 20 deletions src/Orleans.Runtime/GrainDirectory/ClientDirectory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ private void UpdateRoutingTable(ImmutableDictionary<SiloAddress, (ImmutableHashS
/// </summary>
private (ImmutableHashSet<GrainId> Clients, long Version) GetConnectedClients(ImmutableHashSet<GrainId> previousClients, long previousVersion)
{
var connectedClientsVersion = _connectedClients.Version;
var connectedClientsVersion = _connectedClients.Version;
if (connectedClientsVersion <= _observedConnectedClientsVersion)
{
return (previousClients, previousVersion);
Expand Down Expand Up @@ -354,35 +354,42 @@ private async Task Run()

while (true)
{
membershipTask ??= membershipUpdates.MoveNextAsync().AsTask();
timerTask ??= _refreshTimer.NextTick();
try
{
membershipTask ??= membershipUpdates.MoveNextAsync().AsTask();
timerTask ??= _refreshTimer.NextTick();

// Wait for either of the tasks to complete.
await Task.WhenAny(membershipTask, timerTask);
// Wait for either of the tasks to complete.
await Task.WhenAny(membershipTask, timerTask);

if (timerTask.IsCompleted)
{
if (!await timerTask)
if (timerTask.IsCompleted)
{
break;
}
if (!await timerTask)
{
break;
}

timerTask = null;
}
timerTask = null;
}

if (membershipTask.IsCompleted)
{
if (!await membershipTask)
if (membershipTask.IsCompleted)
{
break;
if (!await membershipTask)
{
break;
}

membershipTask = null;
}

membershipTask = null;
if (ShouldPublish())
{
await PublishUpdates();
}
}

if (ShouldPublish())
catch (Exception exception)
{
await PublishUpdates();
_logger.LogError(exception, "Exception publishing client routing table");
}
}
}
Expand Down
19 changes: 6 additions & 13 deletions src/Orleans.Runtime/MembershipService/LocalSiloHealthMonitor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -142,19 +142,12 @@ private int CheckThreadPoolQueueDelay(DateTime checkTime, List<string>? complain

if ((int)threadPoolDelaySeconds >= 1)
{
if ((int)threadPoolDelaySeconds >= 10)
{
// Log as an error if the delay is massive.
_log.LogError(
".NET Thread Pool is exhibiting delays of {ThreadPoolQueueDelaySeconds}s. This can indicate .NET Thread Pool starvation, very long .NET GC pauses, or other runtime or machine pauses.",
threadPoolDelaySeconds);
}
else
{
_log.LogWarning(
".NET Thread Pool is exhibiting delays of {ThreadPoolQueueDelaySeconds}s. This can indicate .NET Thread Pool starvation, very long .NET GC pauses, or other runtime or machine pauses.",
threadPoolDelaySeconds);
}
// Log as an error if the delay is massive.
var logLevel = (int)threadPoolDelaySeconds >= 10 ? LogLevel.Error : LogLevel.Warning;
_log.Log(
logLevel,
".NET Thread Pool is exhibiting delays of {ThreadPoolQueueDelaySeconds}s. This can indicate .NET Thread Pool starvation, very long .NET GC pauses, or other runtime or machine pauses.",
threadPoolDelaySeconds);

complaints?.Add(
$".NET Thread Pool is exhibiting delays of {threadPoolDelaySeconds}s. This can indicate .NET Thread Pool starvation, very long .NET GC pauses, or other runtime or machine pauses.");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ private async Task CleanupDefunctSilos()
var dateLimit = DateTime.UtcNow - this.clusterMembershipOptions.DefunctSiloExpiration;
await this.membershipTableProvider.CleanupDefunctSiloEntries(dateLimit);
}
catch (Exception exception) when (exception is NotImplementedException || exception is MissingMethodException)
catch (Exception exception) when (exception is NotImplementedException or MissingMethodException)
{
this.cleanupDefunctSilosTimer.Dispose();
this.log.LogWarning(
Expand Down
2 changes: 1 addition & 1 deletion src/Orleans.Runtime/MembershipService/SiloHealthMonitor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ internal class SiloHealthMonitor : ITestAccessor, IHealthCheckable
/// <summary>
/// The time since the last ping response was received from either the node being monitored or an intermediary.
/// </summary>
public TimeSpan? ElapsedSinceLastResponse => _elapsedSinceLastSuccessfulResponse.IsRunning ? (Nullable<TimeSpan>)_elapsedSinceLastSuccessfulResponse.Elapsed : null;
public TimeSpan? ElapsedSinceLastResponse => _elapsedSinceLastSuccessfulResponse.IsRunning ? (TimeSpan?)_elapsedSinceLastSuccessfulResponse.Elapsed : null;

/// <summary>
/// The duration of time measured from just prior to sending the last probe which received a response until just after receiving and processing the response.
Expand Down
33 changes: 13 additions & 20 deletions src/Orleans.Runtime/Timers/AsyncTimer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ internal class AsyncTimer : IAsyncTimer
private readonly string name;
private readonly ILogger log;
private DateTime lastFired = DateTime.MinValue;
private DateTime? expected;
private DateTime expected;

public AsyncTimer(TimeSpan period, string name, ILogger log)
{
Expand All @@ -36,22 +36,12 @@ public async Task<bool> NextTick(TimeSpan? overrideDelay = default)
if (cancellation.IsCancellationRequested) return false;

var start = DateTime.UtcNow;
TimeSpan delay;
if (overrideDelay.HasValue)
var delay = overrideDelay switch
{
delay = overrideDelay.Value;
}
else
{
if (this.lastFired == DateTime.MinValue)
{
delay = this.period;
}
else
{
delay = this.lastFired.Add(this.period).Subtract(start);
}
}
{ } value => value,
_ when lastFired == DateTime.MinValue => period,
_ => lastFired.Add(period).Subtract(start)
};

if (delay < TimeSpan.Zero) delay = TimeSpan.Zero;

Expand All @@ -68,6 +58,7 @@ public async Task<bool> NextTick(TimeSpan? overrideDelay = default)
if (task2.IsCanceled)
{
await Task.Yield();
expected = default;
return false;
}
}
Expand All @@ -76,6 +67,7 @@ public async Task<bool> NextTick(TimeSpan? overrideDelay = default)
if (task.IsCanceled)
{
await Task.Yield();
expected = default;
return false;
}
}
Expand All @@ -91,12 +83,13 @@ public async Task<bool> NextTick(TimeSpan? overrideDelay = default)
overshoot);
}

expected = default;
return true;
}

private static TimeSpan GetOvershootDelay(DateTime now, DateTime dueTime)
{
if (dueTime == DateTime.MinValue) return TimeSpan.Zero;
if (dueTime == default) return TimeSpan.Zero;
if (dueTime > now) return TimeSpan.Zero;

var overshoot = now.Subtract(dueTime);
Expand All @@ -108,11 +101,11 @@ private static TimeSpan GetOvershootDelay(DateTime now, DateTime dueTime)
public bool CheckHealth(DateTime lastCheckTime, out string reason)
{
var now = DateTime.UtcNow;
var dueTime = this.expected.GetValueOrDefault();
var overshoot = GetOvershootDelay(now, dueTime);
var due = this.expected;
var overshoot = GetOvershootDelay(now, due);
if (overshoot > TimeSpan.Zero)
{
reason = $"{this.name} timer should have fired at {dueTime}, which is {overshoot} ago";
reason = $"{this.name} timer should have fired at {due}, which is {overshoot} ago";
return false;
}

Expand Down

0 comments on commit 997e49e

Please sign in to comment.