diff --git a/docs/operations/rolling-update.md b/docs/operations/rolling-update.md index 22ba50dcc383d..0789d044d4ceb 100644 --- a/docs/operations/rolling-update.md +++ b/docs/operations/rolling-update.md @@ -144,12 +144,14 @@ then creates any remaining surge instances. #### Disabling rolling updates -Rolling updates may be disabled for an instance group by setting both `maxSurge` and `maxUnavailable` -to `0`. +Rolling updates may be disabled for an instance group by setting the `enabled` field +to `false`. ```yaml spec: rollingUpdate: - maxSurge: 0 - maxUnavailable: 0 + enabled: false ``` + +Nodes needing update will still be tainted. If `maxSurge` is nonzero, up to that many extra +nodes will still be created. diff --git a/k8s/crds/kops.k8s.io_clusters.yaml b/k8s/crds/kops.k8s.io_clusters.yaml index 00f99c50264da..cb153b5d95ab5 100644 --- a/k8s/crds/kops.k8s.io_clusters.yaml +++ b/k8s/crds/kops.k8s.io_clusters.yaml @@ -3360,6 +3360,9 @@ spec: description: RollingUpdate defines the default rolling-update settings for instance groups properties: + enabled: + description: Enabled enables rolling updates. Defaults to true. + type: boolean maxSurge: anyOf: - type: integer @@ -3368,12 +3371,11 @@ spec: can be created during the update. The value can be an absolute number (for example 5) or a percentage of desired machines (for example 10%). The absolute number is calculated from a percentage - by rounding up. A value of 0 for both this and MaxUnavailable - disables rolling updates. Has no effect on instance groups with - role "Master". Defaults to 1 on AWS, 0 otherwise. Example: when - this is set to 30%, the InstanceGroup can be scaled up immediately - when the rolling update starts, such that the total number of - old and new nodes do not exceed 130% of desired nodes.' + by rounding up. Has no effect on instance groups with role "Master". + Defaults to 1 on AWS, 0 otherwise. Example: when this is set + to 30%, the InstanceGroup can be scaled up immediately when + the rolling update starts, such that the total number of old + and new nodes do not exceed 130% of desired nodes.' x-kubernetes-int-or-string: true maxUnavailable: anyOf: @@ -3383,13 +3385,13 @@ spec: can be unavailable during the update. The value can be an absolute number (for example 5) or a percentage of desired nodes (for example 10%). The absolute number is calculated from a percentage - by rounding down. A value of 0 for both this and MaxSurge disables - rolling updates. Defaults to 1 if MaxSurge is 0, otherwise defaults - to 0. Example: when this is set to 30%, the InstanceGroup can - be scaled down to 70% of desired nodes immediately when the - rolling update starts. Once new nodes are ready, more old nodes - can be drained, ensuring that the total number of nodes available - at all times during the update is at least 70% of desired nodes.' + by rounding down. Defaults to 1 if MaxSurge is 0, otherwise + defaults to 0. Example: when this is set to 30%, the InstanceGroup + can be scaled down to 70% of desired nodes immediately when + the rolling update starts. Once new nodes are ready, more old + nodes can be drained, ensuring that the total number of nodes + available at all times during the update is at least 70% of + desired nodes.' x-kubernetes-int-or-string: true type: object secretStore: diff --git a/k8s/crds/kops.k8s.io_instancegroups.yaml b/k8s/crds/kops.k8s.io_instancegroups.yaml index 90c135011559c..3e3baa0a4e479 100644 --- a/k8s/crds/kops.k8s.io_instancegroups.yaml +++ b/k8s/crds/kops.k8s.io_instancegroups.yaml @@ -653,6 +653,9 @@ spec: rollingUpdate: description: RollingUpdate defines the rolling-update behavior properties: + enabled: + description: Enabled enables rolling updates. Defaults to true. + type: boolean maxSurge: anyOf: - type: integer @@ -661,12 +664,11 @@ spec: can be created during the update. The value can be an absolute number (for example 5) or a percentage of desired machines (for example 10%). The absolute number is calculated from a percentage - by rounding up. A value of 0 for both this and MaxUnavailable - disables rolling updates. Has no effect on instance groups with - role "Master". Defaults to 1 on AWS, 0 otherwise. Example: when - this is set to 30%, the InstanceGroup can be scaled up immediately - when the rolling update starts, such that the total number of - old and new nodes do not exceed 130% of desired nodes.' + by rounding up. Has no effect on instance groups with role "Master". + Defaults to 1 on AWS, 0 otherwise. Example: when this is set + to 30%, the InstanceGroup can be scaled up immediately when + the rolling update starts, such that the total number of old + and new nodes do not exceed 130% of desired nodes.' x-kubernetes-int-or-string: true maxUnavailable: anyOf: @@ -676,13 +678,13 @@ spec: can be unavailable during the update. The value can be an absolute number (for example 5) or a percentage of desired nodes (for example 10%). The absolute number is calculated from a percentage - by rounding down. A value of 0 for both this and MaxSurge disables - rolling updates. Defaults to 1 if MaxSurge is 0, otherwise defaults - to 0. Example: when this is set to 30%, the InstanceGroup can - be scaled down to 70% of desired nodes immediately when the - rolling update starts. Once new nodes are ready, more old nodes - can be drained, ensuring that the total number of nodes available - at all times during the update is at least 70% of desired nodes.' + by rounding down. Defaults to 1 if MaxSurge is 0, otherwise + defaults to 0. Example: when this is set to 30%, the InstanceGroup + can be scaled down to 70% of desired nodes immediately when + the rolling update starts. Once new nodes are ready, more old + nodes can be drained, ensuring that the total number of nodes + available at all times during the update is at least 70% of + desired nodes.' x-kubernetes-int-or-string: true type: object rootVolumeDeleteOnTermination: diff --git a/pkg/apis/kops/cluster.go b/pkg/apis/kops/cluster.go index ddf8b50ae0b8d..9b0d2e6310c81 100644 --- a/pkg/apis/kops/cluster.go +++ b/pkg/apis/kops/cluster.go @@ -695,11 +695,13 @@ type DNSControllerGossipConfig struct { } type RollingUpdate struct { + // Enabled enables rolling updates. + // Defaults to true. + Enabled *bool `json:"enabled,omitempty"` // MaxUnavailable is the maximum number of nodes that can be unavailable during the update. // The value can be an absolute number (for example 5) or a percentage of desired // nodes (for example 10%). // The absolute number is calculated from a percentage by rounding down. - // A value of 0 for both this and MaxSurge disables rolling updates. // Defaults to 1 if MaxSurge is 0, otherwise defaults to 0. // Example: when this is set to 30%, the InstanceGroup can be scaled // down to 70% of desired nodes immediately when the rolling update @@ -713,7 +715,6 @@ type RollingUpdate struct { // The value can be an absolute number (for example 5) or a percentage of // desired machines (for example 10%). // The absolute number is calculated from a percentage by rounding up. - // A value of 0 for both this and MaxUnavailable disables rolling updates. // Has no effect on instance groups with role "Master". // Defaults to 1 on AWS, 0 otherwise. // Example: when this is set to 30%, the InstanceGroup can be scaled diff --git a/pkg/apis/kops/v1alpha2/cluster.go b/pkg/apis/kops/v1alpha2/cluster.go index ed4bb70d31bed..3b64e8c923a3a 100644 --- a/pkg/apis/kops/v1alpha2/cluster.go +++ b/pkg/apis/kops/v1alpha2/cluster.go @@ -590,11 +590,13 @@ type DNSControllerGossipConfig struct { } type RollingUpdate struct { + // Enabled enables rolling updates. + // Defaults to true. + Enabled *bool `json:"enabled,omitempty"` // MaxUnavailable is the maximum number of nodes that can be unavailable during the update. // The value can be an absolute number (for example 5) or a percentage of desired // nodes (for example 10%). // The absolute number is calculated from a percentage by rounding down. - // A value of 0 for both this and MaxSurge disables rolling updates. // Defaults to 1 if MaxSurge is 0, otherwise defaults to 0. // Example: when this is set to 30%, the InstanceGroup can be scaled // down to 70% of desired nodes immediately when the rolling update @@ -608,7 +610,6 @@ type RollingUpdate struct { // The value can be an absolute number (for example 5) or a percentage of // desired machines (for example 10%). // The absolute number is calculated from a percentage by rounding up. - // A value of 0 for both this and MaxUnavailable disables rolling updates. // Has no effect on instance groups with role "Master". // Defaults to 1 on AWS, 0 otherwise. // Example: when this is set to 30%, the InstanceGroup can be scaled diff --git a/pkg/apis/kops/v1alpha2/zz_generated.conversion.go b/pkg/apis/kops/v1alpha2/zz_generated.conversion.go index 3aa6ef8aa4e52..35e722e563aed 100644 --- a/pkg/apis/kops/v1alpha2/zz_generated.conversion.go +++ b/pkg/apis/kops/v1alpha2/zz_generated.conversion.go @@ -5147,6 +5147,7 @@ func Convert_kops_RBACAuthorizationSpec_To_v1alpha2_RBACAuthorizationSpec(in *ko } func autoConvert_v1alpha2_RollingUpdate_To_kops_RollingUpdate(in *RollingUpdate, out *kops.RollingUpdate, s conversion.Scope) error { + out.Enabled = in.Enabled out.MaxUnavailable = in.MaxUnavailable out.MaxSurge = in.MaxSurge return nil @@ -5158,6 +5159,7 @@ func Convert_v1alpha2_RollingUpdate_To_kops_RollingUpdate(in *RollingUpdate, out } func autoConvert_kops_RollingUpdate_To_v1alpha2_RollingUpdate(in *kops.RollingUpdate, out *RollingUpdate, s conversion.Scope) error { + out.Enabled = in.Enabled out.MaxUnavailable = in.MaxUnavailable out.MaxSurge = in.MaxSurge return nil diff --git a/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go b/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go index 1374634f17ad8..5f43110f9b298 100644 --- a/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go +++ b/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go @@ -3481,6 +3481,11 @@ func (in *RBACAuthorizationSpec) DeepCopy() *RBACAuthorizationSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RollingUpdate) DeepCopyInto(out *RollingUpdate) { *out = *in + if in.Enabled != nil { + in, out := &in.Enabled, &out.Enabled + *out = new(bool) + **out = **in + } if in.MaxUnavailable != nil { in, out := &in.MaxUnavailable, &out.MaxUnavailable *out = new(intstr.IntOrString) diff --git a/pkg/apis/kops/validation/validation.go b/pkg/apis/kops/validation/validation.go index 494180f355b01..14789f19e55be 100644 --- a/pkg/apis/kops/validation/validation.go +++ b/pkg/apis/kops/validation/validation.go @@ -1000,8 +1000,10 @@ func validateDockerConfig(config *kops.DockerConfig, fldPath *field.Path) field. func validateRollingUpdate(rollingUpdate *kops.RollingUpdate, fldpath *field.Path, onMasterInstanceGroup bool) field.ErrorList { allErrs := field.ErrorList{} + var err error + unavailable := 1 if rollingUpdate.MaxUnavailable != nil { - unavailable, err := intstr.GetValueFromIntOrPercent(rollingUpdate.MaxUnavailable, 1, false) + unavailable, err = intstr.GetValueFromIntOrPercent(rollingUpdate.MaxUnavailable, 1, false) if err != nil { allErrs = append(allErrs, field.Invalid(fldpath.Child("maxUnavailable"), rollingUpdate.MaxUnavailable, fmt.Sprintf("Unable to parse: %v", err))) @@ -1021,6 +1023,9 @@ func validateRollingUpdate(rollingUpdate *kops.RollingUpdate, fldpath *field.Pat } else if surge < 0 { allErrs = append(allErrs, field.Invalid(fldpath.Child("maxSurge"), rollingUpdate.MaxSurge, "Cannot be negative")) } + if unavailable == 0 && surge == 0 { + allErrs = append(allErrs, field.Forbidden(fldpath.Child("maxSurge"), "Cannot be zero if maxUnavailable is zero")) + } } return allErrs diff --git a/pkg/apis/kops/validation/validation_test.go b/pkg/apis/kops/validation/validation_test.go index 2d94659f0399d..a62871431beb7 100644 --- a/pkg/apis/kops/validation/validation_test.go +++ b/pkg/apis/kops/validation/validation_test.go @@ -711,6 +711,13 @@ func Test_Validate_RollingUpdate(t *testing.T) { OnMasterIG: true, ExpectedErrors: []string{"Forbidden::testField.maxSurge"}, }, + { + Input: kops.RollingUpdate{ + MaxUnavailable: intStr(intstr.FromInt(0)), + MaxSurge: intStr(intstr.FromInt(0)), + }, + ExpectedErrors: []string{"Forbidden::testField.maxSurge"}, + }, } for _, g := range grid { errs := validateRollingUpdate(&g.Input, field.NewPath("testField"), g.OnMasterIG) diff --git a/pkg/apis/kops/zz_generated.deepcopy.go b/pkg/apis/kops/zz_generated.deepcopy.go index 3502b2bf6efde..5bd1dc7665791 100644 --- a/pkg/apis/kops/zz_generated.deepcopy.go +++ b/pkg/apis/kops/zz_generated.deepcopy.go @@ -3695,6 +3695,11 @@ func (in *RBACAuthorizationSpec) DeepCopy() *RBACAuthorizationSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RollingUpdate) DeepCopyInto(out *RollingUpdate) { *out = *in + if in.Enabled != nil { + in, out := &in.Enabled, &out.Enabled + *out = new(bool) + **out = **in + } if in.MaxUnavailable != nil { in, out := &in.MaxUnavailable, &out.MaxUnavailable *out = new(intstr.IntOrString) diff --git a/pkg/instancegroups/instancegroups.go b/pkg/instancegroups/instancegroups.go index a4c94ead80f5d..d9a9828963c82 100644 --- a/pkg/instancegroups/instancegroups.go +++ b/pkg/instancegroups/instancegroups.go @@ -108,11 +108,6 @@ func (c *RollingUpdateCluster) rollingUpdateInstanceGroup(ctx context.Context, c } maxConcurrency := maxSurge + settings.MaxUnavailable.IntValue() - if maxConcurrency == 0 { - klog.Infof("Rolling updates for InstanceGroup %s are disabled", group.InstanceGroup.Name) - return nil - } - if group.InstanceGroup.Spec.Role == api.InstanceGroupRoleMaster && maxSurge != 0 { // Masters are incapable of surging because they rely on registering themselves through // the local apiserver. That apiserver depends on the local etcd, which relies on being @@ -157,6 +152,11 @@ func (c *RollingUpdateCluster) rollingUpdateInstanceGroup(ctx context.Context, c } } + if !*settings.Enabled { + klog.Infof("Rolling updates for InstanceGroup %s are disabled", group.InstanceGroup.Name) + return nil + } + terminateChan := make(chan error, maxConcurrency) for uIdx, u := range update { diff --git a/pkg/instancegroups/rollingupdate_test.go b/pkg/instancegroups/rollingupdate_test.go index 53ab1637981cc..661edf82fb698 100644 --- a/pkg/instancegroups/rollingupdate_test.go +++ b/pkg/instancegroups/rollingupdate_test.go @@ -40,6 +40,7 @@ import ( kopsapi "k8s.io/kops/pkg/apis/kops" "k8s.io/kops/pkg/cloudinstances" "k8s.io/kops/pkg/validation" + "k8s.io/kops/upup/pkg/fi" "k8s.io/kops/upup/pkg/fi/cloudup/awsup" ) @@ -800,9 +801,8 @@ func TestRollingUpdateDisabled(t *testing.T) { c, cloud, cluster := getTestSetup() - zero := intstr.FromInt(0) cluster.Spec.RollingUpdate = &kopsapi.RollingUpdate{ - MaxUnavailable: &zero, + Enabled: fi.Bool(false), } groups := getGroupsAllNeedUpdate(c.K8sClient, cloud) @@ -821,9 +821,8 @@ func TestRollingUpdateDisabledCloudonly(t *testing.T) { c, cloud, cluster := getTestSetup() c.CloudOnly = true - zero := intstr.FromInt(0) cluster.Spec.RollingUpdate = &kopsapi.RollingUpdate{ - MaxUnavailable: &zero, + Enabled: fi.Bool(false), } groups := getGroupsAllNeedUpdate(c.K8sClient, cloud) @@ -836,6 +835,54 @@ func TestRollingUpdateDisabledCloudonly(t *testing.T) { assertGroupInstanceCount(t, cloud, "bastion-1", 1) } +type disabledSurgeTest struct { + autoscalingiface.AutoScalingAPI + t *testing.T + mutex sync.Mutex + numDetached int +} + +func (m *disabledSurgeTest) DetachInstances(input *autoscaling.DetachInstancesInput) (*autoscaling.DetachInstancesOutput, error) { + m.mutex.Lock() + defer m.mutex.Unlock() + + for _, id := range input.InstanceIds { + assert.NotContains(m.t, *id, "master") + m.numDetached++ + } + return &autoscaling.DetachInstancesOutput{}, nil +} + +func TestRollingUpdateDisabledSurge(t *testing.T) { + ctx := context.Background() + + c, cloud, cluster := getTestSetup() + + disabledSurgeTest := &disabledSurgeTest{ + AutoScalingAPI: cloud.MockAutoscaling, + t: t, + } + cloud.MockAutoscaling = disabledSurgeTest + cloud.MockEC2 = &ec2IgnoreTags{EC2API: cloud.MockEC2} + + one := intstr.FromInt(1) + cluster.Spec.RollingUpdate = &kopsapi.RollingUpdate{ + Enabled: fi.Bool(false), + MaxSurge: &one, + } + + groups := getGroupsAllNeedUpdate(c.K8sClient, cloud) + err := c.RollingUpdate(ctx, groups, cluster, &kopsapi.InstanceGroupList{}) + assert.NoError(t, err, "rolling update") + + assertGroupInstanceCount(t, cloud, "node-1", 3) + assertGroupInstanceCount(t, cloud, "node-2", 3) + assertGroupInstanceCount(t, cloud, "master-1", 2) + assertGroupInstanceCount(t, cloud, "bastion-1", 1) + + assert.Equal(t, 3, disabledSurgeTest.numDetached) +} + // The concurrent update tests attempt to induce the following expected update sequence: // // (Only for surging "all need update" test, to verify the toe-dipping behavior) diff --git a/pkg/instancegroups/settings.go b/pkg/instancegroups/settings.go index 74f44e996507b..6ab455bceb634 100644 --- a/pkg/instancegroups/settings.go +++ b/pkg/instancegroups/settings.go @@ -20,6 +20,7 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/kops/pkg/apis/kops" "k8s.io/kops/pkg/featureflag" + "k8s.io/kops/upup/pkg/fi" ) func resolveSettings(cluster *kops.Cluster, group *kops.InstanceGroup, numInstances int) kops.RollingUpdate { @@ -29,6 +30,9 @@ func resolveSettings(cluster *kops.Cluster, group *kops.InstanceGroup, numInstan } if def := cluster.Spec.RollingUpdate; def != nil { + if rollingUpdate.Enabled == nil { + rollingUpdate.Enabled = def.Enabled + } if rollingUpdate.MaxUnavailable == nil { rollingUpdate.MaxUnavailable = def.MaxUnavailable } @@ -37,6 +41,10 @@ func resolveSettings(cluster *kops.Cluster, group *kops.InstanceGroup, numInstan } } + if rollingUpdate.Enabled == nil { + rollingUpdate.Enabled = fi.Bool(true) + } + if rollingUpdate.MaxSurge == nil { val := intstr.FromInt(0) if kops.CloudProviderID(cluster.Spec.CloudProvider) == kops.CloudProviderAWS && !featureflag.Spotinst.Enabled() { diff --git a/pkg/instancegroups/settings_test.go b/pkg/instancegroups/settings_test.go index 5fb2ef6049552..4bca2290cc916 100644 --- a/pkg/instancegroups/settings_test.go +++ b/pkg/instancegroups/settings_test.go @@ -32,6 +32,11 @@ func TestSettings(t *testing.T) { defaultValue interface{} nonDefaultValue interface{} }{ + { + name: "Enabled", + defaultValue: true, + nonDefaultValue: false, + }, { name: "MaxUnavailable", defaultValue: intstr.FromInt(1),