Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make HealthChecks.ResourceUtilization use observable instruments #5798

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
<PropertyGroup>
<EnableConfigurationBindingGenerator>true</EnableConfigurationBindingGenerator>
<InjectSharedDataValidation>true</InjectSharedDataValidation>
<InjectSharedDiagnosticIds>true</InjectSharedDiagnosticIds>
<InjectExperimentalAttributeOnLegacy>true</InjectExperimentalAttributeOnLegacy>
<InjectObsoleteAttributeOnLegacy>true</InjectObsoleteAttributeOnLegacy>
</PropertyGroup>

<PropertyGroup>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Diagnostics.ResourceMonitoring;
using Microsoft.Shared.DiagnosticIds;
using Microsoft.Shared.Diagnostics;

namespace Microsoft.Extensions.Diagnostics.HealthChecks;

/// <summary>
/// Represents a health check for in-container resources <see cref="IHealthCheck"/>.
/// </summary>
internal sealed partial class ResourceUtilizationHealthCheck : IHealthCheck
{
#pragma warning disable CS0436 // Type conflicts with imported type
[Obsolete(DiagnosticIds.Obsoletions.NonObservableResourceMonitoringApiMessage,
DiagnosticId = DiagnosticIds.Obsoletions.NonObservableResourceMonitoringApiDiagId,
UrlFormat = DiagnosticIds.UrlFormat)]
public void ObsoleteConstructor(IResourceMonitor dataTracker) => _dataTracker = Throw.IfNull(dataTracker);

/// <summary>
/// Runs the health check.
/// </summary>
/// <param name="cancellationToken">A <see cref="CancellationToken"/> that can be used to cancel the health check.</param>
/// <returns>A <see cref="Task{HealthCheckResult}"/> that completes when the health check has finished, yielding the status of the component being checked.</returns>
#pragma warning disable IDE0060 // Remove unused parameter
[Obsolete(DiagnosticIds.Obsoletions.NonObservableResourceMonitoringApiMessage,
DiagnosticId = DiagnosticIds.Obsoletions.NonObservableResourceMonitoringApiDiagId,
UrlFormat = DiagnosticIds.UrlFormat)]
public Task<HealthCheckResult> ObsoleteCheckHealthAsync(CancellationToken cancellationToken = default)
{
var utilization = _dataTracker!.GetUtilization(_options.SamplingWindow);
return ResourceUtilizationHealthCheck.EvaluateHealthStatusAsync(utilization.CpuUsedPercentage, utilization.MemoryUsedPercentage, _options);
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Collections.Generic;
using System.Diagnostics.Metrics;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Diagnostics.ResourceMonitoring;
Expand All @@ -13,44 +15,30 @@ namespace Microsoft.Extensions.Diagnostics.HealthChecks;
/// <summary>
/// Represents a health check for in-container resources <see cref="IHealthCheck"/>.
/// </summary>
internal sealed class ResourceUtilizationHealthCheck : IHealthCheck
internal sealed partial class ResourceUtilizationHealthCheck : IHealthCheck, IDisposable
{
private readonly double _multiplier;
private readonly MeterListener? _meterListener;
private readonly ResourceUtilizationHealthCheckOptions _options;
private readonly IResourceMonitor _dataTracker;
private IResourceMonitor? _dataTracker;
private double _cpuUsedPercentage;
private double _memoryUsedPercentage;

/// <summary>
/// Initializes a new instance of the <see cref="ResourceUtilizationHealthCheck"/> class.
/// </summary>
/// <param name="options">The options.</param>
/// <param name="dataTracker">The datatracker.</param>
public ResourceUtilizationHealthCheck(IOptions<ResourceUtilizationHealthCheckOptions> options,
IResourceMonitor dataTracker)
{
_options = Throw.IfMemberNull(options, options.Value);
_dataTracker = Throw.IfNull(dataTracker);
}

/// <summary>
/// Runs the health check.
/// </summary>
/// <param name="context">A context object associated with the current execution.</param>
/// <param name="cancellationToken">A <see cref="CancellationToken"/> that can be used to cancel the health check.</param>
/// <returns>A <see cref="Task{HealthCheckResult}"/> that completes when the health check has finished, yielding the status of the component being checked.</returns>
public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
#pragma warning disable EA0014 // The async method doesn't support cancellation
public static Task<HealthCheckResult> EvaluateHealthStatusAsync(double cpuUsedPercentage, double memoryUsedPercentage, ResourceUtilizationHealthCheckOptions options)
{
var utilization = _dataTracker.GetUtilization(_options.SamplingWindow);
IReadOnlyDictionary<string, object> data = new Dictionary<string, object>
{
{ nameof(utilization.CpuUsedPercentage), utilization.CpuUsedPercentage },
{ nameof(utilization.MemoryUsedPercentage), utilization.MemoryUsedPercentage },
{ "CpuUsedPercentage", cpuUsedPercentage },
{ "MemoryUsedPercentage", memoryUsedPercentage },
};

bool cpuUnhealthy = utilization.CpuUsedPercentage > _options.CpuThresholds.UnhealthyUtilizationPercentage;
bool memoryUnhealthy = utilization.MemoryUsedPercentage > _options.MemoryThresholds.UnhealthyUtilizationPercentage;
bool cpuUnhealthy = cpuUsedPercentage > options.CpuThresholds.UnhealthyUtilizationPercentage;
bool memoryUnhealthy = memoryUsedPercentage > options.MemoryThresholds.UnhealthyUtilizationPercentage;

if (cpuUnhealthy || memoryUnhealthy)
{
string message = string.Empty;
string message;
if (cpuUnhealthy && memoryUnhealthy)
{
message = "CPU and memory usage is above the limit";
Expand All @@ -67,12 +55,12 @@ public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, Canc
return Task.FromResult(HealthCheckResult.Unhealthy(message, default, data));
}

bool cpuDegraded = utilization.CpuUsedPercentage > _options.CpuThresholds.DegradedUtilizationPercentage;
bool memoryDegraded = utilization.MemoryUsedPercentage > _options.MemoryThresholds.DegradedUtilizationPercentage;
bool cpuDegraded = cpuUsedPercentage > options.CpuThresholds.DegradedUtilizationPercentage;
bool memoryDegraded = memoryUsedPercentage > options.MemoryThresholds.DegradedUtilizationPercentage;

if (cpuDegraded || memoryDegraded)
{
string message = string.Empty;
string message;
if (cpuDegraded && memoryDegraded)
{
message = "CPU and memory usage is close to the limit";
Expand All @@ -91,4 +79,104 @@ public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, Canc

return Task.FromResult(HealthCheckResult.Healthy(default, data));
}
#pragma warning restore EA0014 // The async method doesn't support cancellation

/// <summary>
/// Initializes a new instance of the <see cref="ResourceUtilizationHealthCheck"/> class.
/// </summary>
/// <param name="options">The options.</param>
/// <param name="dataTracker">The datatracker.</param>
public ResourceUtilizationHealthCheck(IOptions<ResourceUtilizationHealthCheckOptions> options, IResourceMonitor dataTracker)
{
_options = Throw.IfMemberNull(options, options.Value);
if (!_options.UseObservableResourceMonitoringInstruments)
{
ObsoleteConstructor(dataTracker);
return;
}

#if NETFRAMEWORK
_multiplier = 1;
#else
// Due to a bug on Windows https://github.com/dotnet/extensions/issues/5472,
// the CPU utilization comes in the range [0, 100].
if (OperatingSystem.IsWindows())
{
_multiplier = 1;
}

// On Linux, the CPU utilization comes in the correct range [0, 1], which we will be converting to percentage.
else
{
#pragma warning disable S109 // Magic numbers should not be used
_multiplier = 100;
#pragma warning restore S109 // Magic numbers should not be used
}
#endif

_meterListener = new()
{
InstrumentPublished = OnInstrumentPublished
};

_meterListener.SetMeasurementEventCallback<double>(OnMeasurementRecorded);
_meterListener.Start();
}

/// <summary>
/// Runs the health check.
/// </summary>
/// <param name="context">A context object associated with the current execution.</param>
/// <param name="cancellationToken">A <see cref="CancellationToken"/> that can be used to cancel the health check.</param>
/// <returns>A <see cref="Task{HealthCheckResult}"/> that completes when the health check has finished, yielding the status of the component being checked.</returns>
public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
{
if (!_options.UseObservableResourceMonitoringInstruments)
{
return ObsoleteCheckHealthAsync(cancellationToken);
}

_meterListener!.RecordObservableInstruments();

return EvaluateHealthStatusAsync(_cpuUsedPercentage, _memoryUsedPercentage, _options);
}

/// <inheritdoc />
public void Dispose()
{
Dispose(true);
}

private void Dispose(bool disposing)
{
if (disposing)
{
_meterListener?.Dispose();
}
}

private void OnInstrumentPublished(Instrument instrument, MeterListener listener)
{
if (instrument.Meter.Name is "Microsoft.Extensions.Diagnostics.ResourceMonitoring")
{
listener.EnableMeasurementEvents(instrument);
}
}

private void OnMeasurementRecorded(
Instrument instrument, double measurement,
ReadOnlySpan<KeyValuePair<string, object?>> tags, object? state)
{
switch (instrument.Name)
{
case "process.cpu.utilization":
evgenyfedorov2 marked this conversation as resolved.
Show resolved Hide resolved
case "container.cpu.limit.utilization":
_cpuUsedPercentage = measurement * _multiplier;
break;
case "dotnet.process.memory.virtual.utilization":
case "container.memory.limit.utilization":
_memoryUsedPercentage = measurement * _multiplier;
break;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using Microsoft.Extensions.Diagnostics.ResourceMonitoring;
using System.Diagnostics.CodeAnalysis;
using Microsoft.Extensions.Options;
using Microsoft.Shared.Data.Validation;
using Microsoft.Shared.DiagnosticIds;

namespace Microsoft.Extensions.Diagnostics.HealthChecks;

Expand All @@ -20,8 +21,7 @@ public class ResourceUtilizationHealthCheckOptions
/// Gets or sets thresholds for CPU utilization.
/// </summary>
/// <remarks>
/// The thresholds are periodically compared against the utilization samples provided by
/// the registered <see cref="IResourceMonitor"/>.
/// The thresholds are periodically compared against the utilization samples provided by the Resource Monitoring library.
/// </remarks>
[ValidateObjectMembers]
public ResourceUsageThresholds CpuThresholds { get; set; } = new ResourceUsageThresholds();
Expand All @@ -30,18 +30,33 @@ public class ResourceUtilizationHealthCheckOptions
/// Gets or sets thresholds for memory utilization.
/// </summary>
/// <remarks>
/// The thresholds are periodically compared against the utilization samples provided by
/// the registered <see cref="IResourceMonitor"/>.
/// The thresholds are periodically compared against the utilization samples provided by the Resource Monitoring library.
/// </remarks>
[ValidateObjectMembers]
public ResourceUsageThresholds MemoryThresholds { get; set; } = new ResourceUsageThresholds();

/// <summary>
/// Gets or sets the time window for used for calculating CPU and memory utilization averages.
/// Gets or sets the time window used for calculating CPU and memory utilization averages.
/// </summary>
/// <value>
/// The default value is 5 seconds.
/// </value>
#pragma warning disable CS0436 // Type conflicts with imported type
[Obsolete(DiagnosticIds.Obsoletions.NonObservableResourceMonitoringApiMessage,
DiagnosticId = DiagnosticIds.Obsoletions.NonObservableResourceMonitoringApiDiagId,
UrlFormat = DiagnosticIds.UrlFormat)]
#pragma warning restore CS0436 // Type conflicts with imported type
[TimeSpan(MinimumSamplingWindow, int.MaxValue)]
public TimeSpan SamplingWindow { get; set; } = DefaultSamplingWindow;

/// <summary>
/// Gets or sets a value indicating whether the observable instruments will be used for getting CPU and Memory usage
/// as opposed to the default <see cref="Microsoft.Extensions.Diagnostics.ResourceMonitoring.IResourceMonitor"/> API which is obsolete.
/// </summary>
/// <value>
/// <see langword="true" /> if the observable instruments are used. The default is <see langword="false" />.
/// In the future the default will be <see langword="true" />.
/// </value>
[Experimental(diagnosticId: DiagnosticIds.Experiments.HealthChecks, UrlFormat = DiagnosticIds.UrlFormat)]
public bool UseObservableResourceMonitoringInstruments { get; set; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,5 +47,6 @@
<ItemGroup>
<InternalsVisibleToDynamicProxyGenAssembly2 Include="*" />
<InternalsVisibleToTest Include="$(AssemblyName).Tests" />
<InternalsVisibleToTest Include="Microsoft.Extensions.Diagnostics.HealthChecks.ResourceUtilization.Tests" />
</ItemGroup>
</Project>
Loading