From 9c06fd45fea20dd0505b95836c64e43282c4993f Mon Sep 17 00:00:00 2001 From: Mahdi Dibaiee Date: Wed, 10 Nov 2021 14:04:02 +0000 Subject: [PATCH] [feat] primaryReadyThreshold: allow configuring threshold for primary see #639 Signed-off-by: Mahdi Dibaiee --- artifacts/flagger/crd.yaml | 3 + charts/flagger/crds/crd.yaml | 3 + docs/gitbook/usage/how-it-works.md | 4 ++ kustomize/base/flagger/crd.yaml | 3 + pkg/apis/flagger/v1beta1/canary.go | 11 ++++ .../flagger/v1beta1/zz_generated.deepcopy.go | 5 ++ pkg/canary/deployment_ready.go | 16 +++--- pkg/canary/deployment_ready_test.go | 55 ++++++++++++++++--- 8 files changed, 86 insertions(+), 14 deletions(-) diff --git a/artifacts/flagger/crd.yaml b/artifacts/flagger/crd.yaml index ebde3b211..57921ede3 100644 --- a/artifacts/flagger/crd.yaml +++ b/artifacts/flagger/crd.yaml @@ -826,6 +826,9 @@ spec: mirrorWeight: description: Weight of traffic to be mirrored type: number + primaryReadyThreshold: + description: Percentage of pods that need to be available to consider primary as ready + type: number match: description: A/B testing match conditions type: array diff --git a/charts/flagger/crds/crd.yaml b/charts/flagger/crds/crd.yaml index ebde3b211..57921ede3 100644 --- a/charts/flagger/crds/crd.yaml +++ b/charts/flagger/crds/crd.yaml @@ -826,6 +826,9 @@ spec: mirrorWeight: description: Weight of traffic to be mirrored type: number + primaryReadyThreshold: + description: Percentage of pods that need to be available to consider primary as ready + type: number match: description: A/B testing match conditions type: array diff --git a/docs/gitbook/usage/how-it-works.md b/docs/gitbook/usage/how-it-works.md index d2a77c525..88f7baec9 100644 --- a/docs/gitbook/usage/how-it-works.md +++ b/docs/gitbook/usage/how-it-works.md @@ -335,6 +335,10 @@ Spec: # total number of iterations # used for A/B Testing and Blue/Green iterations: + # threshold of primary pods that need to be available to consider it ready + # before starting rollout. this is optional and the default is 100 + # percentage (0-100) + primaryReadyThreshold: 100 # canary match conditions # used for A/B Testing match: diff --git a/kustomize/base/flagger/crd.yaml b/kustomize/base/flagger/crd.yaml index ebde3b211..57921ede3 100644 --- a/kustomize/base/flagger/crd.yaml +++ b/kustomize/base/flagger/crd.yaml @@ -826,6 +826,9 @@ spec: mirrorWeight: description: Weight of traffic to be mirrored type: number + primaryReadyThreshold: + description: Percentage of pods that need to be available to consider primary as ready + type: number match: description: A/B testing match conditions type: array diff --git a/pkg/apis/flagger/v1beta1/canary.go b/pkg/apis/flagger/v1beta1/canary.go index 8c8c2be2e..702d4e53a 100644 --- a/pkg/apis/flagger/v1beta1/canary.go +++ b/pkg/apis/flagger/v1beta1/canary.go @@ -229,6 +229,9 @@ type CanaryAnalysis struct { // Max number of failed checks before the canary is terminated Threshold int `json:"threshold"` + // Percentage of pods that need to be available to consider primary as ready + PrimaryReadyThreshold *int `json:"primaryReadyThreshold,omitempty"` + // Alert list for this canary analysis Alerts []CanaryAlert `json:"alerts,omitempty"` @@ -440,6 +443,14 @@ func (c *Canary) GetAnalysisThreshold() int { return 1 } +// GetAnalysisPrimaryReadyThreshold returns the canary primaryReadyThreshold (default 100) +func (c *Canary) GetAnalysisPrimaryReadyThreshold() int { + if c.GetAnalysis().PrimaryReadyThreshold != nil { + return *c.GetAnalysis().PrimaryReadyThreshold + } + return 100 +} + // GetMetricInterval returns the metric interval default value (1m) func (c *Canary) GetMetricInterval() string { return MetricInterval diff --git a/pkg/apis/flagger/v1beta1/zz_generated.deepcopy.go b/pkg/apis/flagger/v1beta1/zz_generated.deepcopy.go index 28fa6465b..8bd1a4c30 100644 --- a/pkg/apis/flagger/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/flagger/v1beta1/zz_generated.deepcopy.go @@ -202,6 +202,11 @@ func (in *CanaryAnalysis) DeepCopyInto(out *CanaryAnalysis) { *out = make([]int, len(*in)) copy(*out, *in) } + if in.PrimaryReadyThreshold != nil { + in, out := &in.PrimaryReadyThreshold, &out.PrimaryReadyThreshold + *out = new(int) + **out = **in + } if in.Alerts != nil { in, out := &in.Alerts, &out.Alerts *out = make([]CanaryAlert, len(*in)) diff --git a/pkg/canary/deployment_ready.go b/pkg/canary/deployment_ready.go index 45a87c4d7..38728d36a 100644 --- a/pkg/canary/deployment_ready.go +++ b/pkg/canary/deployment_ready.go @@ -37,7 +37,7 @@ func (c *DeploymentController) IsPrimaryReady(cd *flaggerv1.Canary) error { return fmt.Errorf("deployment %s.%s get query error: %w", primaryName, cd.Namespace, err) } - _, err = c.isDeploymentReady(primary, cd.GetProgressDeadlineSeconds()) + _, err = c.isDeploymentReady(primary, cd.GetProgressDeadlineSeconds(), cd.GetAnalysisPrimaryReadyThreshold()) if err != nil { return fmt.Errorf("%s.%s not ready: %w", primaryName, cd.Namespace, err) } @@ -59,7 +59,7 @@ func (c *DeploymentController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error) return true, fmt.Errorf("deployment %s.%s get query error: %w", targetName, cd.Namespace, err) } - retryable, err := c.isDeploymentReady(canary, cd.GetProgressDeadlineSeconds()) + retryable, err := c.isDeploymentReady(canary, cd.GetProgressDeadlineSeconds(), 100) if err != nil { return retryable, fmt.Errorf( "canary deployment %s.%s not ready: %w", @@ -71,7 +71,7 @@ func (c *DeploymentController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error) // isDeploymentReady determines if a deployment is ready by checking the status conditions // if a deployment has exceeded the progress deadline it returns a non retriable error -func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment, deadline int) (bool, error) { +func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment, deadline int, readyThreshold int) (bool, error) { retriable := true if deployment.Generation <= deployment.Status.ObservedGeneration { progress := c.getDeploymentCondition(deployment.Status, appsv1.DeploymentProgressing) @@ -86,17 +86,19 @@ func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment, } } + readyThresholdRatio := float32(readyThreshold) / float32(100) + if progress != nil && progress.Reason == "ProgressDeadlineExceeded" { return false, fmt.Errorf("deployment %q exceeded its progress deadline", deployment.GetName()) } else if deployment.Spec.Replicas != nil && deployment.Status.UpdatedReplicas < *deployment.Spec.Replicas { - return retriable, fmt.Errorf("waiting for rollout to finish: %d out of %d new replicas have been updated", + return retriable, fmt.Errorf("waiting for rollout to finish: %d out of %d new replicas have been updated.", deployment.Status.UpdatedReplicas, *deployment.Spec.Replicas) } else if deployment.Status.Replicas > deployment.Status.UpdatedReplicas { return retriable, fmt.Errorf("waiting for rollout to finish: %d old replicas are pending termination", deployment.Status.Replicas-deployment.Status.UpdatedReplicas) - } else if deployment.Status.AvailableReplicas < deployment.Status.UpdatedReplicas { - return retriable, fmt.Errorf("waiting for rollout to finish: %d of %d updated replicas are available", - deployment.Status.AvailableReplicas, deployment.Status.UpdatedReplicas) + } else if deployment.Status.AvailableReplicas < int32(float32(deployment.Status.UpdatedReplicas)*readyThresholdRatio) { + return retriable, fmt.Errorf("waiting for rollout to finish: %d of %d (readyThreshold %d%%) updated replicas are available", + deployment.Status.AvailableReplicas, int32(float32(deployment.Status.UpdatedReplicas)*readyThresholdRatio), readyThreshold) } } else { return true, fmt.Errorf( diff --git a/pkg/canary/deployment_ready_test.go b/pkg/canary/deployment_ready_test.go index 9dfbc47fb..8719ee113 100644 --- a/pkg/canary/deployment_ready_test.go +++ b/pkg/canary/deployment_ready_test.go @@ -45,7 +45,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) { // observed generation is less than desired generation dp := &appsv1.Deployment{Status: appsv1.DeploymentStatus{ObservedGeneration: -1}} - retryable, err := mocks.controller.isDeploymentReady(dp, 0) + retryable, err := mocks.controller.isDeploymentReady(dp, 0, 100) assert.Error(t, err) assert.True(t, retryable) assert.True(t, strings.Contains(err.Error(), "generation")) @@ -57,7 +57,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) { ReadyReplicas: 1, AvailableReplicas: 1, }} - retryable, err = mocks.controller.isDeploymentReady(dp, 0) + retryable, err = mocks.controller.isDeploymentReady(dp, 0, 100) assert.NoError(t, err) assert.True(t, retryable) @@ -67,7 +67,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) { }, Spec: appsv1.DeploymentSpec{ Replicas: int32p(2), }} - _, err = mocks.controller.isDeploymentReady(dp, 0) + _, err = mocks.controller.isDeploymentReady(dp, 0, 100) assert.Error(t, err) assert.True(t, strings.Contains(err.Error(), "new replicas")) @@ -76,7 +76,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) { UpdatedReplicas: 1, Replicas: 2, }} - _, err = mocks.controller.isDeploymentReady(dp, 0) + _, err = mocks.controller.isDeploymentReady(dp, 0, 100) assert.Error(t, err) assert.True(t, strings.Contains(err.Error(), "termination")) @@ -85,7 +85,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) { UpdatedReplicas: 2, AvailableReplicas: 1, }} - _, err = mocks.controller.isDeploymentReady(dp, 0) + _, err = mocks.controller.isDeploymentReady(dp, 0, 100) assert.Error(t, err) assert.True(t, strings.Contains(err.Error(), "available")) @@ -94,7 +94,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) { Conditions: []appsv1.DeploymentCondition{ {Type: appsv1.DeploymentProgressing, Reason: "ProgressDeadlineExceeded"}}, }} - retryable, err = mocks.controller.isDeploymentReady(dp, 0) + retryable, err = mocks.controller.isDeploymentReady(dp, 0, 100) assert.Error(t, err) assert.False(t, retryable) assert.True(t, strings.Contains(err.Error(), "exceeded")) @@ -113,7 +113,48 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) { UpdatedReplicas: 2, AvailableReplicas: 1, }} - retryable, err = mocks.controller.isDeploymentReady(dp, 5) + retryable, err = mocks.controller.isDeploymentReady(dp, 5, 100) assert.Error(t, err) assert.False(t, retryable) } + +func TestDeploymentController_isDeploymentReady_readyThreshold(t *testing.T) { + dc := deploymentConfigs{name: "podinfo", label: "name", labelValue: "podinfo"} + mocks := newDeploymentFixture(dc) + + // observed generation is less than desired generation + dp := &appsv1.Deployment{Status: appsv1.DeploymentStatus{ObservedGeneration: -1}} + retryable, err := mocks.controller.isDeploymentReady(dp, 0, 50) + assert.Error(t, err) + assert.True(t, retryable) + assert.True(t, strings.Contains(err.Error(), "generation")) + + // ok + dp = &appsv1.Deployment{Status: appsv1.DeploymentStatus{ + Replicas: 1, + UpdatedReplicas: 1, + ReadyReplicas: 1, + AvailableReplicas: 1, + }} + retryable, err = mocks.controller.isDeploymentReady(dp, 0, 50) + assert.NoError(t, err) + assert.True(t, retryable) + + // waiting for updated ones to be available, 50% is available, ok + dp = &appsv1.Deployment{Status: appsv1.DeploymentStatus{ + UpdatedReplicas: 4, + AvailableReplicas: 2, + }} + retryable, err = mocks.controller.isDeploymentReady(dp, 0, 50) + assert.NoError(t, err) + assert.True(t, retryable) + + // waiting for updated ones to be available, less than 50% available + dp = &appsv1.Deployment{Status: appsv1.DeploymentStatus{ + UpdatedReplicas: 4, + AvailableReplicas: 1, + }} + retryable, err = mocks.controller.isDeploymentReady(dp, 0, 50) + assert.Error(t, err) + assert.True(t, strings.Contains(err.Error(), "available")) +}