Skip to content

Commit

Permalink
controller: jitter requeue interval
Browse files Browse the repository at this point in the history
This adds a `--interval-jitter-percentage` flag to the controller to
add a +/- percentage jitter to the interval defined in a HelmRelease
(defaults to 5%).

Effectively, this results in a reconciliation every 9.5 - 10.5 minutes
for a resource with an interval of 10 minutes.

Main reason to add this change is to mitigate spikes in memory and
CPU usage caused by many resources being configured with the same
interval.

Signed-off-by: Hidde Beydals <[email protected]>
  • Loading branch information
hiddeco committed Aug 9, 2023
1 parent 3a98126 commit d76f3a3
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 6 deletions.
2 changes: 2 additions & 0 deletions api/v2beta1/helmrelease_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ type HelmReleaseSpec struct {
Chart HelmChartTemplate `json:"chart"`

// Interval at which to reconcile the Helm release.
// This interval is approximate and may be subject to jitter to ensure
// efficient use of resources.
// +kubebuilder:validation:Type=string
// +kubebuilder:validation:Pattern="^([0-9]+(\\.[0-9]+)?(ms|s|m|h))+$"
// +required
Expand Down
4 changes: 3 additions & 1 deletion config/crd/bases/helm.toolkit.fluxcd.io_helmreleases.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,9 @@ spec:
type: string
type: object
interval:
description: Interval at which to reconcile the Helm release.
description: Interval at which to reconcile the Helm release. This
interval is approximate and may be subject to jitter to ensure efficient
use of resources.
pattern: ^([0-9]+(\.[0-9]+)?(ms|s|m|h))+$
type: string
kubeConfig:
Expand Down
8 changes: 6 additions & 2 deletions docs/api/v2beta1/helm.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,9 @@ Kubernetes meta/v1.Duration
</em>
</td>
<td>
<p>Interval at which to reconcile the Helm release.</p>
<p>Interval at which to reconcile the Helm release.
This interval is approximate and may be subject to jitter to ensure
efficient use of resources.</p>
</td>
</tr>
<tr>
Expand Down Expand Up @@ -901,7 +903,9 @@ Kubernetes meta/v1.Duration
</em>
</td>
<td>
<p>Interval at which to reconcile the Helm release.</p>
<p>Interval at which to reconcile the Helm release.
This interval is approximate and may be subject to jitter to ensure
efficient use of resources.</p>
</td>
</tr>
<tr>
Expand Down
7 changes: 7 additions & 0 deletions docs/spec/v2beta1/helmreleases.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ type HelmReleaseSpec struct {
Chart HelmChartTemplate `json:"chart"`

// Interval at which to reconcile the Helm release.
// This interval is approximate and may be subject to jitter to ensure
// efficient use of resources.
// +required
Interval metav1.Duration `json:"interval"`

Expand Down Expand Up @@ -822,6 +824,11 @@ desired state, so an upgrade is made in this case as well.
The `spec.interval` tells the reconciler at which interval to reconcile the release. The
interval time units are `s`, `m` and `h` e.g. `interval: 5m`, the minimum value should be 60 seconds.

**Note:** The controller can be configured to apply a jitter to the interval in
order to distribute the load more evenly when multiple HelmRelease objects are
set up with the same interval. For more information, please refer to the
[helm-controller configuration options](https://fluxcd.io/flux/components/helm/options/).

The reconciler can be told to reconcile the `HelmRelease` outside of the specified interval
by annotating the object with a `reconcile.fluxcd.io/requestedAt` annotation. For example:

Expand Down
7 changes: 4 additions & 3 deletions internal/controller/helmrelease_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ import (
"github.com/fluxcd/pkg/apis/meta"
"github.com/fluxcd/pkg/runtime/acl"
runtimeClient "github.com/fluxcd/pkg/runtime/client"
"github.com/fluxcd/pkg/runtime/jitter"
"github.com/fluxcd/pkg/runtime/metrics"
"github.com/fluxcd/pkg/runtime/predicates"
"github.com/fluxcd/pkg/runtime/transform"
Expand Down Expand Up @@ -233,7 +234,7 @@ func (r *HelmReleaseReconciler) reconcile(ctx context.Context, hr v2.HelmRelease
log.Error(reconcileErr, "access denied to cross-namespace source")
r.event(ctx, hr, hr.Status.LastAttemptedRevision, eventv1.EventSeverityError, reconcileErr.Error())
return v2.HelmReleaseNotReady(hr, apiacl.AccessDeniedReason, reconcileErr.Error()),
ctrl.Result{RequeueAfter: hr.Spec.Interval.Duration}, nil
jitter.JitteredRequeueInterval(ctrl.Result{RequeueAfter: hr.GetRequeueAfter()}), nil
}

msg := fmt.Sprintf("chart reconciliation failed: %s", reconcileErr.Error())
Expand All @@ -248,7 +249,7 @@ func (r *HelmReleaseReconciler) reconcile(ctx context.Context, hr v2.HelmRelease
log.Info(msg)
// Do not requeue immediately, when the artifact is created
// the watcher should trigger a reconciliation.
return v2.HelmReleaseNotReady(hr, v2.ArtifactFailedReason, msg), ctrl.Result{RequeueAfter: hc.Spec.Interval.Duration}, nil
return v2.HelmReleaseNotReady(hr, v2.ArtifactFailedReason, msg), jitter.JitteredRequeueInterval(ctrl.Result{RequeueAfter: hr.GetRequeueAfter()}), nil
}

// Check dependencies
Expand Down Expand Up @@ -287,7 +288,7 @@ func (r *HelmReleaseReconciler) reconcile(ctx context.Context, hr v2.HelmRelease
r.event(ctx, hr, hc.GetArtifact().Revision, eventv1.EventSeverityError,
fmt.Sprintf("reconciliation failed: %s", reconcileErr.Error()))
}
return reconciledHr, ctrl.Result{RequeueAfter: hr.Spec.Interval.Duration}, reconcileErr
return reconciledHr, jitter.JitteredRequeueInterval(ctrl.Result{RequeueAfter: hr.GetRequeueAfter()}), reconcileErr
}

type HelmReleaseReconcilerOptions struct {
Expand Down
8 changes: 8 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
helper "github.com/fluxcd/pkg/runtime/controller"
"github.com/fluxcd/pkg/runtime/events"
feathelper "github.com/fluxcd/pkg/runtime/features"
"github.com/fluxcd/pkg/runtime/jitter"
"github.com/fluxcd/pkg/runtime/leaderelection"
"github.com/fluxcd/pkg/runtime/logger"
"github.com/fluxcd/pkg/runtime/metrics"
Expand Down Expand Up @@ -89,6 +90,7 @@ func main() {
leaderElectionOptions leaderelection.Options
rateLimiterOptions helper.RateLimiterOptions
watchOptions helper.WatchOptions
intervalJitterOptions jitter.IntervalOptions
oomWatchInterval time.Duration
oomWatchMemoryThreshold uint8
oomWatchMaxMemoryPath string
Expand Down Expand Up @@ -128,6 +130,7 @@ func main() {
kubeConfigOpts.BindFlags(flag.CommandLine)
featureGates.BindFlags(flag.CommandLine)
watchOptions.BindFlags(flag.CommandLine)
intervalJitterOptions.BindFlags(flag.CommandLine)

flag.Parse()

Expand All @@ -143,6 +146,11 @@ func main() {
metricsRecorder := metrics.NewRecorder()
crtlmetrics.Registry.MustRegister(metricsRecorder.Collectors()...)

if err := intervalJitterOptions.SetGlobalJitter(nil); err != nil {
setupLog.Error(err, "unable to set global jitter")
os.Exit(1)
}

watchNamespace := ""
if !watchOptions.AllNamespaces {
watchNamespace = os.Getenv("RUNTIME_NAMESPACE")
Expand Down

0 comments on commit d76f3a3

Please sign in to comment.