Skip to content

Commit

Permalink
controller: jitter requeue interval
Browse files Browse the repository at this point in the history
This adds a `--interval-jitter-percentage` flag to the controller to
add a +/- percentage jitter to the interval defined in a HelmRelease
(defaults to 5%).

Effectively, this results in a reconciliation every 9.5 - 10.5 minutes
for a resource with an interval of 10 minutes.

Main reason to add this change is to mitigate spikes in memory and
CPU usage caused by many resources being configured with the same
interval.

Signed-off-by: Hidde Beydals <[email protected]>
  • Loading branch information
hiddeco committed Aug 9, 2023
1 parent 3a98126 commit 8c39a21
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 3 deletions.
2 changes: 2 additions & 0 deletions api/v2beta1/helmrelease_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ type HelmReleaseSpec struct {
Chart HelmChartTemplate `json:"chart"`

// Interval at which to reconcile the Helm release.
// This interval is approximate and may be subject to jitter to ensure
// efficient use of resources.
// +kubebuilder:validation:Type=string
// +kubebuilder:validation:Pattern="^([0-9]+(\\.[0-9]+)?(ms|s|m|h))+$"
// +required
Expand Down
7 changes: 7 additions & 0 deletions docs/spec/v2beta1/helmreleases.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ type HelmReleaseSpec struct {
Chart HelmChartTemplate `json:"chart"`

// Interval at which to reconcile the Helm release.
// This interval is approximate and may be subject to jitter to ensure
// efficient use of resources.
// +required
Interval metav1.Duration `json:"interval"`

Expand Down Expand Up @@ -822,6 +824,11 @@ desired state, so an upgrade is made in this case as well.
The `spec.interval` tells the reconciler at which interval to reconcile the release. The
interval time units are `s`, `m` and `h` e.g. `interval: 5m`, the minimum value should be 60 seconds.

**Note:** The controller can be configured to apply a jitter to the interval in
order to distribute the load more evenly when multiple HelmRelease objects are
set up with the same interval. For more information, please refer to the
[helm-controller configuration options](https://fluxcd.io/flux/components/helm/options/).

The reconciler can be told to reconcile the `HelmRelease` outside of the specified interval
by annotating the object with a `reconcile.fluxcd.io/requestedAt` annotation. For example:

Expand Down
7 changes: 4 additions & 3 deletions internal/controller/helmrelease_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ import (
"github.com/fluxcd/pkg/apis/meta"
"github.com/fluxcd/pkg/runtime/acl"
runtimeClient "github.com/fluxcd/pkg/runtime/client"
"github.com/fluxcd/pkg/runtime/jitter"
"github.com/fluxcd/pkg/runtime/metrics"
"github.com/fluxcd/pkg/runtime/predicates"
"github.com/fluxcd/pkg/runtime/transform"
Expand Down Expand Up @@ -233,7 +234,7 @@ func (r *HelmReleaseReconciler) reconcile(ctx context.Context, hr v2.HelmRelease
log.Error(reconcileErr, "access denied to cross-namespace source")
r.event(ctx, hr, hr.Status.LastAttemptedRevision, eventv1.EventSeverityError, reconcileErr.Error())
return v2.HelmReleaseNotReady(hr, apiacl.AccessDeniedReason, reconcileErr.Error()),
ctrl.Result{RequeueAfter: hr.Spec.Interval.Duration}, nil
jitter.JitteredRequeueInterval(ctrl.Result{RequeueAfter: hr.GetRequeueAfter()}), nil
}

msg := fmt.Sprintf("chart reconciliation failed: %s", reconcileErr.Error())
Expand All @@ -248,7 +249,7 @@ func (r *HelmReleaseReconciler) reconcile(ctx context.Context, hr v2.HelmRelease
log.Info(msg)
// Do not requeue immediately, when the artifact is created
// the watcher should trigger a reconciliation.
return v2.HelmReleaseNotReady(hr, v2.ArtifactFailedReason, msg), ctrl.Result{RequeueAfter: hc.Spec.Interval.Duration}, nil
return v2.HelmReleaseNotReady(hr, v2.ArtifactFailedReason, msg), jitter.JitteredRequeueInterval(ctrl.Result{RequeueAfter: hr.GetRequeueAfter()}), nil
}

// Check dependencies
Expand Down Expand Up @@ -287,7 +288,7 @@ func (r *HelmReleaseReconciler) reconcile(ctx context.Context, hr v2.HelmRelease
r.event(ctx, hr, hc.GetArtifact().Revision, eventv1.EventSeverityError,
fmt.Sprintf("reconciliation failed: %s", reconcileErr.Error()))
}
return reconciledHr, ctrl.Result{RequeueAfter: hr.Spec.Interval.Duration}, reconcileErr
return reconciledHr, jitter.JitteredRequeueInterval(ctrl.Result{RequeueAfter: hr.GetRequeueAfter()}), reconcileErr
}

type HelmReleaseReconcilerOptions struct {
Expand Down
7 changes: 7 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
helper "github.com/fluxcd/pkg/runtime/controller"
"github.com/fluxcd/pkg/runtime/events"
feathelper "github.com/fluxcd/pkg/runtime/features"
"github.com/fluxcd/pkg/runtime/jitter"
"github.com/fluxcd/pkg/runtime/leaderelection"
"github.com/fluxcd/pkg/runtime/logger"
"github.com/fluxcd/pkg/runtime/metrics"
Expand Down Expand Up @@ -89,6 +90,7 @@ func main() {
leaderElectionOptions leaderelection.Options
rateLimiterOptions helper.RateLimiterOptions
watchOptions helper.WatchOptions
intervalJitterOptions jitter.IntervalOptions
oomWatchInterval time.Duration
oomWatchMemoryThreshold uint8
oomWatchMaxMemoryPath string
Expand Down Expand Up @@ -143,6 +145,11 @@ func main() {
metricsRecorder := metrics.NewRecorder()
crtlmetrics.Registry.MustRegister(metricsRecorder.Collectors()...)

if err := intervalJitterOptions.SetGlobalJitter(nil); err != nil {
setupLog.Error(err, "unable to set global jitter")
os.Exit(1)
}

watchNamespace := ""
if !watchOptions.AllNamespaces {
watchNamespace = os.Getenv("RUNTIME_NAMESPACE")
Expand Down

0 comments on commit 8c39a21

Please sign in to comment.