Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feat] primaryReadyThreshold: allow configuring threshold for primary #1048

Merged
merged 1 commit into from
Nov 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions artifacts/flagger/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,9 @@ spec:
mirrorWeight:
description: Weight of traffic to be mirrored
type: number
primaryReadyThreshold:
description: Percentage of pods that need to be available to consider primary as ready
type: number
match:
description: A/B testing match conditions
type: array
Expand Down
3 changes: 3 additions & 0 deletions charts/flagger/crds/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,9 @@ spec:
mirrorWeight:
description: Weight of traffic to be mirrored
type: number
primaryReadyThreshold:
description: Percentage of pods that need to be available to consider primary as ready
type: number
match:
description: A/B testing match conditions
type: array
Expand Down
4 changes: 4 additions & 0 deletions docs/gitbook/usage/how-it-works.md
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,10 @@ Spec:
# total number of iterations
# used for A/B Testing and Blue/Green
iterations:
# threshold of primary pods that need to be available to consider it ready
# before starting rollout. this is optional and the default is 100
# percentage (0-100)
primaryReadyThreshold: 100
# canary match conditions
# used for A/B Testing
match:
Expand Down
3 changes: 3 additions & 0 deletions kustomize/base/flagger/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,9 @@ spec:
mirrorWeight:
description: Weight of traffic to be mirrored
type: number
primaryReadyThreshold:
description: Percentage of pods that need to be available to consider primary as ready
type: number
match:
description: A/B testing match conditions
type: array
Expand Down
12 changes: 12 additions & 0 deletions pkg/apis/flagger/v1beta1/canary.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ const (
CanaryKind = "Canary"
ProgressDeadlineSeconds = 600
AnalysisInterval = 60 * time.Second
PrimaryReadyThreshold = 100
MetricInterval = "1m"
)

Expand Down Expand Up @@ -229,6 +230,9 @@ type CanaryAnalysis struct {
// Max number of failed checks before the canary is terminated
Threshold int `json:"threshold"`

// Percentage of pods that need to be available to consider primary as ready
PrimaryReadyThreshold *int `json:"primaryReadyThreshold,omitempty"`

// Alert list for this canary analysis
Alerts []CanaryAlert `json:"alerts,omitempty"`

Expand Down Expand Up @@ -440,6 +444,14 @@ func (c *Canary) GetAnalysisThreshold() int {
return 1
}

// GetAnalysisPrimaryReadyThreshold returns the canary primaryReadyThreshold (default 100)
func (c *Canary) GetAnalysisPrimaryReadyThreshold() int {
if c.GetAnalysis().PrimaryReadyThreshold != nil {
return *c.GetAnalysis().PrimaryReadyThreshold
}
return PrimaryReadyThreshold
}

// GetMetricInterval returns the metric interval default value (1m)
func (c *Canary) GetMetricInterval() string {
return MetricInterval
Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/flagger/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pkg/canary/daemonset_fixture_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,7 @@ func newDaemonSetControllerTestCanary(dc daemonsetConfigs) *flaggerv1.Canary {
APIVersion: "apps/v1",
Kind: "DaemonSet",
},
Analysis: &flaggerv1.CanaryAnalysis{},
},
}
return cd
Expand Down
15 changes: 9 additions & 6 deletions pkg/canary/daemonset_ready.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func (c *DaemonSetController) IsPrimaryReady(cd *flaggerv1.Canary) error {
return fmt.Errorf("daemonset %s.%s get query error: %w", primaryName, cd.Namespace, err)
}

_, err = c.isDaemonSetReady(cd, primary)
_, err = c.isDaemonSetReady(cd, primary, cd.GetAnalysisPrimaryReadyThreshold())
if err != nil {
return fmt.Errorf("primary daemonset %s.%s not ready: %w", primaryName, cd.Namespace, err)
}
Expand All @@ -52,7 +52,7 @@ func (c *DaemonSetController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error)
return true, fmt.Errorf("daemonset %s.%s get query error: %w", targetName, cd.Namespace, err)
}

retryable, err := c.isDaemonSetReady(cd, canary)
retryable, err := c.isDaemonSetReady(cd, canary, 100)
if err != nil {
return retryable, fmt.Errorf("canary damonset %s.%s not ready with retryable %v: %w",
targetName, cd.Namespace, retryable, err)
Expand All @@ -62,11 +62,14 @@ func (c *DaemonSetController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error)

// isDaemonSetReady determines if a daemonset is ready by checking the number of old version daemons
// reference: https://github.com/kubernetes/kubernetes/blob/5232ad4a00ec93942d0b2c6359ee6cd1201b46bc/pkg/kubectl/rollout_status.go#L110
func (c *DaemonSetController) isDaemonSetReady(cd *flaggerv1.Canary, daemonSet *appsv1.DaemonSet) (bool, error) {
func (c *DaemonSetController) isDaemonSetReady(cd *flaggerv1.Canary, daemonSet *appsv1.DaemonSet, readyThreshold int) (bool, error) {
if daemonSet.Generation <= daemonSet.Status.ObservedGeneration {
readyThresholdRatio := float32(readyThreshold) / float32(100)

// calculate conditions
newCond := daemonSet.Status.UpdatedNumberScheduled < daemonSet.Status.DesiredNumberScheduled
availableCond := daemonSet.Status.NumberAvailable < daemonSet.Status.DesiredNumberScheduled
readyThresholdDesiredReplicas := int32(float32(daemonSet.Status.DesiredNumberScheduled) * readyThresholdRatio)
availableCond := daemonSet.Status.NumberAvailable < readyThresholdDesiredReplicas
if !newCond && !availableCond {
return true, nil
}
Expand All @@ -83,8 +86,8 @@ func (c *DaemonSetController) isDaemonSetReady(cd *flaggerv1.Canary, daemonSet *
return true, fmt.Errorf("waiting for rollout to finish: %d out of %d new pods have been updated",
daemonSet.Status.UpdatedNumberScheduled, daemonSet.Status.DesiredNumberScheduled)
} else if availableCond {
return true, fmt.Errorf("waiting for rollout to finish: %d of %d updated pods are available",
daemonSet.Status.NumberAvailable, daemonSet.Status.DesiredNumberScheduled)
return true, fmt.Errorf("waiting for rollout to finish: %d of %d (readyThreshold %d%%) updated pods are available",
daemonSet.Status.NumberAvailable, readyThresholdDesiredReplicas, readyThreshold)
}
}
return true, fmt.Errorf("waiting for rollout to finish: observed daemonset generation less than desired generation")
Expand Down
49 changes: 44 additions & 5 deletions pkg/canary/daemonset_ready_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) {
// observed generation is less than desired generation
ds := &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{}}
ds.Status.ObservedGeneration--
retryable, err := mocks.controller.isDaemonSetReady(cd, ds)
retryable, err := mocks.controller.isDaemonSetReady(cd, ds, 100)
require.Error(t, err)
require.True(t, retryable)

Expand All @@ -58,7 +58,7 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) {
DesiredNumberScheduled: 1,
NumberAvailable: 1,
}}
retryable, err = mocks.controller.isDaemonSetReady(cd, ds)
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 100)
require.NoError(t, err)
require.True(t, retryable)

Expand All @@ -69,7 +69,7 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) {
}}
cd.Status.LastTransitionTime = metav1.Now()
cd.Spec.ProgressDeadlineSeconds = int32p(-1e6)
retryable, err = mocks.controller.isDaemonSetReady(cd, ds)
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 100)
require.Error(t, err)
require.False(t, retryable)

Expand All @@ -80,7 +80,7 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) {
NumberAvailable: 1,
}}
cd.Spec.ProgressDeadlineSeconds = int32p(1e6)
retryable, err = mocks.controller.isDaemonSetReady(cd, ds)
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 100)
require.Error(t, err)
require.True(t, retryable)
require.True(t, strings.Contains(err.Error(), "new pods"))
Expand All @@ -91,7 +91,46 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) {
DesiredNumberScheduled: 1,
NumberAvailable: 0,
}}
retryable, err = mocks.controller.isDaemonSetReady(cd, ds)
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 100)
require.Error(t, err)
require.True(t, retryable)
require.True(t, strings.Contains(err.Error(), "available"))
}

func TestDaemonSetController_isDaemonSetReady_readyThreshold(t *testing.T) {
dc := daemonsetConfigs{name: "podinfo", label: "name", labelValue: "podinfo"}
mocks := newDaemonSetFixture(dc)
cd := &flaggerv1.Canary{}

// succeeded
ds := &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{
UpdatedNumberScheduled: 1,
DesiredNumberScheduled: 1,
NumberAvailable: 1,
}}
retryable, err := mocks.controller.isDaemonSetReady(cd, ds, 50)
require.NoError(t, err)
require.True(t, retryable)

// waiting for updated ones to be available, 50% available, ok
ds = &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{
UpdatedNumberScheduled: 4,
DesiredNumberScheduled: 4,
NumberAvailable: 2,
}}
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 50)
require.NoError(t, err)
require.True(t, retryable)

// waiting for updated ones to be available, less than 50% available
ds = &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{
UpdatedNumberScheduled: 4,
DesiredNumberScheduled: 4,
NumberAvailable: 1,
}}
cd.Status.LastTransitionTime = metav1.Now()
cd.Spec.ProgressDeadlineSeconds = int32p(1e6)
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 50)
require.Error(t, err)
require.True(t, retryable)
require.True(t, strings.Contains(err.Error(), "available"))
Expand Down
15 changes: 9 additions & 6 deletions pkg/canary/deployment_ready.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func (c *DeploymentController) IsPrimaryReady(cd *flaggerv1.Canary) error {
return fmt.Errorf("deployment %s.%s get query error: %w", primaryName, cd.Namespace, err)
}

_, err = c.isDeploymentReady(primary, cd.GetProgressDeadlineSeconds())
_, err = c.isDeploymentReady(primary, cd.GetProgressDeadlineSeconds(), cd.GetAnalysisPrimaryReadyThreshold())
if err != nil {
return fmt.Errorf("%s.%s not ready: %w", primaryName, cd.Namespace, err)
}
Expand All @@ -59,7 +59,7 @@ func (c *DeploymentController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error)
return true, fmt.Errorf("deployment %s.%s get query error: %w", targetName, cd.Namespace, err)
}

retryable, err := c.isDeploymentReady(canary, cd.GetProgressDeadlineSeconds())
retryable, err := c.isDeploymentReady(canary, cd.GetProgressDeadlineSeconds(), 100)
if err != nil {
return retryable, fmt.Errorf(
"canary deployment %s.%s not ready: %w",
Expand All @@ -71,7 +71,7 @@ func (c *DeploymentController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error)

// isDeploymentReady determines if a deployment is ready by checking the status conditions
// if a deployment has exceeded the progress deadline it returns a non retriable error
func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment, deadline int) (bool, error) {
func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment, deadline int, readyThreshold int) (bool, error) {
retriable := true
if deployment.Generation <= deployment.Status.ObservedGeneration {
progress := c.getDeploymentCondition(deployment.Status, appsv1.DeploymentProgressing)
Expand All @@ -86,6 +86,9 @@ func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment,
}
}

readyThresholdRatio := float32(readyThreshold) / float32(100)
readyThresholdUpdatedReplicas := int32(float32(deployment.Status.UpdatedReplicas) * readyThresholdRatio)

if progress != nil && progress.Reason == "ProgressDeadlineExceeded" {
return false, fmt.Errorf("deployment %q exceeded its progress deadline", deployment.GetName())
} else if deployment.Spec.Replicas != nil && deployment.Status.UpdatedReplicas < *deployment.Spec.Replicas {
Expand All @@ -94,9 +97,9 @@ func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment,
} else if deployment.Status.Replicas > deployment.Status.UpdatedReplicas {
return retriable, fmt.Errorf("waiting for rollout to finish: %d old replicas are pending termination",
deployment.Status.Replicas-deployment.Status.UpdatedReplicas)
} else if deployment.Status.AvailableReplicas < deployment.Status.UpdatedReplicas {
return retriable, fmt.Errorf("waiting for rollout to finish: %d of %d updated replicas are available",
deployment.Status.AvailableReplicas, deployment.Status.UpdatedReplicas)
} else if deployment.Status.AvailableReplicas < readyThresholdUpdatedReplicas {
return retriable, fmt.Errorf("waiting for rollout to finish: %d of %d (readyThreshold %d%%) updated replicas are available",
deployment.Status.AvailableReplicas, readyThresholdUpdatedReplicas, readyThreshold)
}
} else {
return true, fmt.Errorf(
Expand Down
55 changes: 48 additions & 7 deletions pkg/canary/deployment_ready_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {

// observed generation is less than desired generation
dp := &appsv1.Deployment{Status: appsv1.DeploymentStatus{ObservedGeneration: -1}}
retryable, err := mocks.controller.isDeploymentReady(dp, 0)
retryable, err := mocks.controller.isDeploymentReady(dp, 0, 100)
assert.Error(t, err)
assert.True(t, retryable)
assert.True(t, strings.Contains(err.Error(), "generation"))
Expand All @@ -57,7 +57,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
ReadyReplicas: 1,
AvailableReplicas: 1,
}}
retryable, err = mocks.controller.isDeploymentReady(dp, 0)
retryable, err = mocks.controller.isDeploymentReady(dp, 0, 100)
assert.NoError(t, err)
assert.True(t, retryable)

Expand All @@ -67,7 +67,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
}, Spec: appsv1.DeploymentSpec{
Replicas: int32p(2),
}}
_, err = mocks.controller.isDeploymentReady(dp, 0)
_, err = mocks.controller.isDeploymentReady(dp, 0, 100)
assert.Error(t, err)
assert.True(t, strings.Contains(err.Error(), "new replicas"))

Expand All @@ -76,7 +76,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
UpdatedReplicas: 1,
Replicas: 2,
}}
_, err = mocks.controller.isDeploymentReady(dp, 0)
_, err = mocks.controller.isDeploymentReady(dp, 0, 100)
assert.Error(t, err)
assert.True(t, strings.Contains(err.Error(), "termination"))

Expand All @@ -85,7 +85,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
UpdatedReplicas: 2,
AvailableReplicas: 1,
}}
_, err = mocks.controller.isDeploymentReady(dp, 0)
_, err = mocks.controller.isDeploymentReady(dp, 0, 100)
assert.Error(t, err)
assert.True(t, strings.Contains(err.Error(), "available"))

Expand All @@ -94,7 +94,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
Conditions: []appsv1.DeploymentCondition{
{Type: appsv1.DeploymentProgressing, Reason: "ProgressDeadlineExceeded"}},
}}
retryable, err = mocks.controller.isDeploymentReady(dp, 0)
retryable, err = mocks.controller.isDeploymentReady(dp, 0, 100)
assert.Error(t, err)
assert.False(t, retryable)
assert.True(t, strings.Contains(err.Error(), "exceeded"))
Expand All @@ -113,7 +113,48 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
UpdatedReplicas: 2,
AvailableReplicas: 1,
}}
retryable, err = mocks.controller.isDeploymentReady(dp, 5)
retryable, err = mocks.controller.isDeploymentReady(dp, 5, 100)
assert.Error(t, err)
assert.False(t, retryable)
}

func TestDeploymentController_isDeploymentReady_readyThreshold(t *testing.T) {
dc := deploymentConfigs{name: "podinfo", label: "name", labelValue: "podinfo"}
mocks := newDeploymentFixture(dc)

// observed generation is less than desired generation
dp := &appsv1.Deployment{Status: appsv1.DeploymentStatus{ObservedGeneration: -1}}
retryable, err := mocks.controller.isDeploymentReady(dp, 0, 50)
assert.Error(t, err)
assert.True(t, retryable)
assert.True(t, strings.Contains(err.Error(), "generation"))

// ok
dp = &appsv1.Deployment{Status: appsv1.DeploymentStatus{
Replicas: 1,
UpdatedReplicas: 1,
ReadyReplicas: 1,
AvailableReplicas: 1,
}}
retryable, err = mocks.controller.isDeploymentReady(dp, 0, 50)
assert.NoError(t, err)
assert.True(t, retryable)

// waiting for updated ones to be available, 50% is available, ok
dp = &appsv1.Deployment{Status: appsv1.DeploymentStatus{
UpdatedReplicas: 4,
AvailableReplicas: 2,
}}
retryable, err = mocks.controller.isDeploymentReady(dp, 0, 50)
assert.NoError(t, err)
assert.True(t, retryable)

// waiting for updated ones to be available, less than 50% available
dp = &appsv1.Deployment{Status: appsv1.DeploymentStatus{
UpdatedReplicas: 4,
AvailableReplicas: 1,
}}
retryable, err = mocks.controller.isDeploymentReady(dp, 0, 50)
assert.Error(t, err)
assert.True(t, strings.Contains(err.Error(), "available"))
}