From 086ef8dc04062570121cd7d8bc1b179f8c92974c Mon Sep 17 00:00:00 2001 From: Zach Zhu Date: Fri, 12 Nov 2021 16:37:38 +0800 Subject: [PATCH 01/21] introduce cluster config malformed condition to kubefedcluster --- pkg/apis/core/common/constants.go | 2 + .../core/v1beta1/validation/validation.go | 2 +- .../kubefedcluster/clusterclient.go | 40 +++++++++++++------ pkg/controller/kubefedcluster/controller.go | 7 ++-- 4 files changed, 33 insertions(+), 18 deletions(-) diff --git a/pkg/apis/core/common/constants.go b/pkg/apis/core/common/constants.go index 3f3c0a6746..dd8879288a 100644 --- a/pkg/apis/core/common/constants.go +++ b/pkg/apis/core/common/constants.go @@ -24,6 +24,8 @@ const ( ClusterReady ClusterConditionType = "Ready" // ClusterOffline means the cluster is temporarily down or not reachable ClusterOffline ClusterConditionType = "Offline" + // ClusterConfigMalformed means the cluster's configuration may be malformed. + ClusterConfigMalformed ClusterConditionType = "ConfigMalformed" ) const ( diff --git a/pkg/apis/core/v1beta1/validation/validation.go b/pkg/apis/core/v1beta1/validation/validation.go index 283f0189a9..e9ab57be6b 100644 --- a/pkg/apis/core/v1beta1/validation/validation.go +++ b/pkg/apis/core/v1beta1/validation/validation.go @@ -255,7 +255,7 @@ func validateDisabledTLSValidations(disabledTLSValidations []v1beta1.TLSValidati func validateClusterCondition(cc *v1beta1.ClusterCondition, path *field.Path) field.ErrorList { var allErrs field.ErrorList - allErrs = append(allErrs, validateEnumStrings(path.Child("type"), string(cc.Type), []string{string(common.ClusterReady), string(common.ClusterOffline)})...) + allErrs = append(allErrs, validateEnumStrings(path.Child("type"), string(cc.Type), []string{string(common.ClusterReady), string(common.ClusterOffline), string(common.ClusterConfigMalformed)})...) allErrs = append(allErrs, validateEnumStrings(path.Child("status"), string(cc.Status), []string{string(corev1.ConditionTrue), string(corev1.ConditionFalse), string(corev1.ConditionUnknown)})...) if cc.LastProbeTime.IsZero() { diff --git a/pkg/controller/kubefedcluster/clusterclient.go b/pkg/controller/kubefedcluster/clusterclient.go index 03066f716a..60b6342aed 100644 --- a/pkg/controller/kubefedcluster/clusterclient.go +++ b/pkg/controller/kubefedcluster/clusterclient.go @@ -46,14 +46,16 @@ const ( LabelZoneRegion = "failure-domain.beta.kubernetes.io/region" // Common ClusterConditions for KubeFedClusterStatus - ClusterReady = "ClusterReady" - HealthzOk = "/healthz responded with ok" - ClusterNotReady = "ClusterNotReady" - HealthzNotOk = "/healthz responded without ok" - ClusterNotReachableReason = "ClusterNotReachable" - ClusterNotReachableMsg = "cluster is not reachable" - ClusterReachableReason = "ClusterReachable" - ClusterReachableMsg = "cluster is reachable" + ClusterReady = "ClusterReady" + HealthzOk = "/healthz responded with ok" + ClusterNotReady = "ClusterNotReady" + HealthzNotOk = "/healthz responded without ok" + ClusterNotReachableReason = "ClusterNotReachable" + ClusterNotReachableMsg = "cluster is not reachable" + ClusterReachableReason = "ClusterReachable" + ClusterReachableMsg = "cluster is reachable" + ClusterConfigMalformedReason = "ClusterConfigMalformed" + ClusterConfigMalformedMsg = "cluster's configuration may be malformed" ) // ClusterClient provides methods for determining the status and zones of a @@ -67,16 +69,13 @@ type ClusterClient struct { // The kubeClient is used to configure the ClusterClient's internal client // with information from a kubeconfig stored in a kubernetes secret. func NewClusterClientSet(c *fedv1b1.KubeFedCluster, client generic.Client, fedNamespace string, timeout time.Duration) (*ClusterClient, error) { + var clusterClientSet = ClusterClient{clusterName: c.Name} clusterConfig, err := util.BuildClusterConfig(c, client, fedNamespace) if err != nil { - return nil, err + return &clusterClientSet, err } clusterConfig.Timeout = timeout - var clusterClientSet = ClusterClient{clusterName: c.Name} clusterClientSet.kubeClient = kubeclientset.NewForConfigOrDie((restclient.AddUserAgent(clusterConfig, UserAgentName))) - if clusterClientSet.kubeClient == nil { - return nil, nil - } return &clusterClientSet, nil } @@ -124,6 +123,21 @@ func (c *ClusterClient) GetClusterHealthStatus() (*fedv1b1.KubeFedClusterStatus, LastProbeTime: currentTime, LastTransitionTime: ¤tTime, } + clusterConfigMalformedReason := ClusterConfigMalformedReason + clusterConfigMalformedMsg := ClusterConfigMalformedMsg + newClusterConfigMalformedCondition := fedv1b1.ClusterCondition{ + Type: fedcommon.ClusterConfigMalformed, + Status: corev1.ConditionTrue, + Reason: &clusterConfigMalformedReason, + Message: &clusterConfigMalformedMsg, + LastProbeTime: currentTime, + LastTransitionTime: ¤tTime, + } + if c.kubeClient == nil { + clusterStatus.Conditions = append(clusterStatus.Conditions, newClusterConfigMalformedCondition) + metrics.RegisterKubefedClusterTotal(metrics.ClusterNotReady, c.clusterName) + return &clusterStatus, nil + } body, err := c.kubeClient.DiscoveryClient.RESTClient().Get().AbsPath("/healthz").Do(context.Background()).Raw() if err != nil { runtime.HandleError(errors.Wrapf(err, "Failed to do cluster health check for cluster %q", c.clusterName)) diff --git a/pkg/controller/kubefedcluster/controller.go b/pkg/controller/kubefedcluster/controller.go index 797c70e0e8..950bf14c78 100644 --- a/pkg/controller/kubefedcluster/controller.go +++ b/pkg/controller/kubefedcluster/controller.go @@ -175,7 +175,7 @@ func (cc *ClusterController) addToClusterSet(obj *fedv1b1.KubeFedCluster) { cc.mu.Lock() defer cc.mu.Unlock() clusterData := cc.clusterDataMap[obj.Name] - if clusterData != nil && clusterData.clusterKubeClient != nil { + if clusterData != nil && clusterData.clusterKubeClient.kubeClient != nil { return } @@ -183,10 +183,9 @@ func (cc *ClusterController) addToClusterSet(obj *fedv1b1.KubeFedCluster) { // create the restclient of cluster restClient, err := NewClusterClientSet(obj, cc.client, cc.fedNamespace, cc.clusterHealthCheckConfig.Timeout) - if err != nil || restClient == nil { + if err != nil || restClient.kubeClient == nil { cc.RecordError(obj, "MalformedClusterConfig", errors.Wrap(err, "The configuration for this cluster may be malformed")) klog.Errorf("The configuration for cluster %s may be malformed", obj.Name) - return } cc.clusterDataMap[obj.Name] = &ClusterData{clusterKubeClient: restClient, cachedObj: obj.DeepCopy()} } @@ -217,7 +216,7 @@ func (cc *ClusterController) updateClusterStatus() error { cluster := obj.DeepCopy() clusterData := cc.clusterDataMap[cluster.Name] cc.mu.RUnlock() - if clusterData == nil { + if clusterData == nil || clusterData.clusterKubeClient.kubeClient == nil { // Retry adding cluster client cc.addToClusterSet(cluster) cc.mu.RLock() From 7dbc2f6798bf55c04b9b6c0b7096a04a0315e03d Mon Sep 17 00:00:00 2001 From: Zach Zhu Date: Mon, 15 Nov 2021 17:34:10 +0800 Subject: [PATCH 02/21] mark ClientRetrievalFailed as recoverable error recoverable when a KubeFedCluster's malformed Secret get fixed --- pkg/controller/sync/status/status.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/controller/sync/status/status.go b/pkg/controller/sync/status/status.go index 7e34da6c11..c22daf48c1 100644 --- a/pkg/controller/sync/status/status.go +++ b/pkg/controller/sync/status/status.go @@ -173,6 +173,7 @@ func IsRecoverableError(status PropagationStatus) bool { DeletionFailed, LabelRemovalFailed, RetrievalFailed, + ClientRetrievalFailed, CreationTimedOut, UpdateTimedOut, DeletionTimedOut, From c05ce64e8871d69aef947063575112269aecff25 Mon Sep 17 00:00:00 2001 From: Zach Zhu Date: Thu, 18 Nov 2021 15:30:37 +0800 Subject: [PATCH 03/21] fix panic when encoutering malformed cluster config --- pkg/controller/kubefedcluster/clusterclient.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/controller/kubefedcluster/clusterclient.go b/pkg/controller/kubefedcluster/clusterclient.go index 60b6342aed..b936109569 100644 --- a/pkg/controller/kubefedcluster/clusterclient.go +++ b/pkg/controller/kubefedcluster/clusterclient.go @@ -75,8 +75,8 @@ func NewClusterClientSet(c *fedv1b1.KubeFedCluster, client generic.Client, fedNa return &clusterClientSet, err } clusterConfig.Timeout = timeout - clusterClientSet.kubeClient = kubeclientset.NewForConfigOrDie((restclient.AddUserAgent(clusterConfig, UserAgentName))) - return &clusterClientSet, nil + clusterClientSet.kubeClient, err = kubeclientset.NewForConfig(restclient.AddUserAgent(clusterConfig, UserAgentName)) + return &clusterClientSet, err } // GetClusterHealthStatus gets the kubernetes cluster health status by requesting "/healthz" From 809c271a1b9f5e52d277a80e5ee8d76557ac2eb7 Mon Sep 17 00:00:00 2001 From: Zach Zhu Date: Thu, 23 Dec 2021 20:24:25 +0800 Subject: [PATCH 04/21] log error of malformed cluster configuration --- pkg/controller/kubefedcluster/controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/kubefedcluster/controller.go b/pkg/controller/kubefedcluster/controller.go index 950bf14c78..c0a94f38f0 100644 --- a/pkg/controller/kubefedcluster/controller.go +++ b/pkg/controller/kubefedcluster/controller.go @@ -185,7 +185,7 @@ func (cc *ClusterController) addToClusterSet(obj *fedv1b1.KubeFedCluster) { restClient, err := NewClusterClientSet(obj, cc.client, cc.fedNamespace, cc.clusterHealthCheckConfig.Timeout) if err != nil || restClient.kubeClient == nil { cc.RecordError(obj, "MalformedClusterConfig", errors.Wrap(err, "The configuration for this cluster may be malformed")) - klog.Errorf("The configuration for cluster %s may be malformed", obj.Name) + klog.Errorf("The configuration for cluster %q may be malformed: %v", obj.Name, err) } cc.clusterDataMap[obj.Name] = &ClusterData{clusterKubeClient: restClient, cachedObj: obj.DeepCopy()} } From a1214b209dae9dca231cfe2bfd44fafabb50f91b Mon Sep 17 00:00:00 2001 From: Martin Hrabovcin Date: Wed, 9 Feb 2022 15:31:25 +0100 Subject: [PATCH 05/21] Fix mutatingwebhookconfiguration CA cert when cert-manager is enabled --- .../kubefed/charts/controllermanager/templates/webhook.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/charts/kubefed/charts/controllermanager/templates/webhook.yaml b/charts/kubefed/charts/controllermanager/templates/webhook.yaml index 23730536e7..ce28e97341 100644 --- a/charts/kubefed/charts/controllermanager/templates/webhook.yaml +++ b/charts/kubefed/charts/controllermanager/templates/webhook.yaml @@ -123,6 +123,10 @@ metadata: {{- else }} name: mutation.core.kubefed.io {{- end }} + annotations: + {{- if .Values.certManager.enabled }} + cert-manager.io/inject-ca-from: {{ printf "%s/%s%s" .Release.Namespace .Release.Name "-root-certificate" | quote }} + {{- end }} webhooks: - name: kubefedconfigs.core.kubefed.io admissionReviewVersions: From 252d1369facfde3e8b1da2adefb21d3b145c4ef3 Mon Sep 17 00:00:00 2001 From: Martin Hrabovcin Date: Wed, 9 Feb 2022 15:44:11 +0100 Subject: [PATCH 06/21] Bump controllermanager chart version --- charts/kubefed/Chart.yaml | 4 ++-- charts/kubefed/charts/controllermanager/Chart.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/charts/kubefed/Chart.yaml b/charts/kubefed/Chart.yaml index 8154f2aa50..568468c4a8 100644 --- a/charts/kubefed/Chart.yaml +++ b/charts/kubefed/Chart.yaml @@ -1,11 +1,11 @@ apiVersion: v2 description: KubeFed helm chart name: kubefed -version: 0.0.4 +version: 0.0.5 kubeVersion: ">= 1.16.0-0" dependencies: - name: controllermanager - version: 0.0.4 + version: 0.0.5 repository: "https://localhost/" # Required but unused. condition: controllermanager.enabled diff --git a/charts/kubefed/charts/controllermanager/Chart.yaml b/charts/kubefed/charts/controllermanager/Chart.yaml index e74b79d213..8f9ef554ff 100644 --- a/charts/kubefed/charts/controllermanager/Chart.yaml +++ b/charts/kubefed/charts/controllermanager/Chart.yaml @@ -2,4 +2,4 @@ apiVersion: v2 appVersion: "0.8.1" description: A Helm chart for KubeFed Controller Manager name: controllermanager -version: 0.0.4 +version: 0.0.5 From faee22fba9df122d97e39fecaa768a41d0385bc4 Mon Sep 17 00:00:00 2001 From: Jimmi Dyson Date: Tue, 15 Feb 2022 13:18:13 +0000 Subject: [PATCH 07/21] Update repo for release v0.9.1 --- CHANGELOG.md | 4 ++++ charts/index.yaml | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c63f8ba3cb..edd8e283e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Unreleased +# v0.9.1 +- [#1490](https://github.com/kubernetes-sigs/kubefed/pull/1490) + fix: Fix mutatingwebhookconfiguration CA cert when cert-manager is enabled + # v0.9.0 - [#1460](https://github.com/kubernetes-sigs/kubefed/pull/1460) feat: introduce informer cache sync timeout diff --git a/charts/index.yaml b/charts/index.yaml index fd5355e942..1fe0ed7971 100644 --- a/charts/index.yaml +++ b/charts/index.yaml @@ -1,6 +1,20 @@ apiVersion: v1 entries: kubefed: + - apiVersion: v2 + created: "2022-02-15T10:39:01.477937005Z" + dependencies: + - condition: controllermanager.enabled + name: controllermanager + repository: https://localhost/ + version: 0.9.1 + description: KubeFed helm chart + digest: a0100b39f82affb29601343214661cfebcc67c3f00fac7e38eb9983bb1359f88 + kubeVersion: '>= 1.16.0-0' + name: kubefed + urls: + - https://github.com/kubernetes-sigs/kubefed/releases/download/v0.9.1/kubefed-0.9.1.tgz + version: 0.9.1 - apiVersion: v2 created: "2021-11-18T10:24:18.266620592Z" dependencies: @@ -211,4 +225,4 @@ entries: urls: - https://github.com/kubernetes-sigs/kubefed/releases/download/v0.1.0-rc1/kubefed-0.1.0-rc1.tgz version: 0.1.0-rc1 -generated: "2021-11-18T10:24:18.264159378Z" +generated: "2022-02-15T10:39:01.476784537Z" From f3fe78cbddce24c3063928ecaaf83a87e25722b2 Mon Sep 17 00:00:00 2001 From: Irfan Ur Rehman Date: Sun, 20 Mar 2022 23:39:43 +1100 Subject: [PATCH 08/21] Handle possible conflict in updating test labels on cluster --- test/e2e/scheduling.go | 53 ++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/test/e2e/scheduling.go b/test/e2e/scheduling.go index 26b8162d5f..bab30cd32d 100644 --- a/test/e2e/scheduling.go +++ b/test/e2e/scheduling.go @@ -19,6 +19,7 @@ package e2e import ( "context" "fmt" + "time" "github.com/pkg/errors" @@ -326,23 +327,7 @@ func waitForMatchingFederatedObject(tl common.TestLogger, typeConfig typeconfig. } func createIntersectionEnvironment(tl common.TestLogger, client genericclient.Client, kubefedNamespace string, clusterName string) { - fedCluster := &unstructured.Unstructured{} - fedCluster.SetGroupVersionKind(schema.GroupVersionKind{ - Kind: "KubeFedCluster", - Group: fedv1b1.SchemeGroupVersion.Group, - Version: fedv1b1.SchemeGroupVersion.Version, - }) - - err := client.Get(context.Background(), fedCluster, kubefedNamespace, clusterName) - if err != nil { - tl.Fatalf("Cannot get KubeFedCluster %q from namespace %q: %v", clusterName, kubefedNamespace, err) - } - - addLabel(fedCluster, "foo", "bar") - err = client.Update(context.TODO(), fedCluster) - if err != nil { - tl.Fatalf("Error updating label %q to KubeFedCluster %q: %v", "foo:bar", clusterName, err) - } + updateClusterLabel(tl, client, kubefedNamespace, clusterName, true) } func destroyIntersectionEnvironment(tl common.TestLogger, client genericclient.Client, testNamespace *unstructured.Unstructured, kubefedNamespace string, clusterName string) { @@ -352,22 +337,40 @@ func destroyIntersectionEnvironment(tl common.TestLogger, client genericclient.C tl.Fatalf("Error deleting FederatedNamespace %q: %v", testNamespaceKey, err) } + updateClusterLabel(tl, client, kubefedNamespace, clusterName, false) +} + +func updateClusterLabel(tl common.TestLogger, client genericclient.Client, kubefedNamespace string, clusterName string, addTestLabel bool) { fedCluster := &unstructured.Unstructured{} fedCluster.SetGroupVersionKind(schema.GroupVersionKind{ Kind: "KubeFedCluster", Group: fedv1b1.SchemeGroupVersion.Group, Version: fedv1b1.SchemeGroupVersion.Version, }) + // We retry couple of times on conflict + err := wait.PollImmediate(1*time.Second, 10*time.Second, func() (bool, error) { + err := client.Get(context.Background(), fedCluster, kubefedNamespace, clusterName) + if err != nil { + tl.Fatalf("Cannot get KubeFedCluster %q from namespace %q: %v", clusterName, kubefedNamespace, err) + } - err = client.Get(context.Background(), fedCluster, kubefedNamespace, clusterName) - if err != nil { - tl.Fatalf("Cannot get KubeFedCluster %q from namespace %q: %v", clusterName, kubefedNamespace, err) - } - - removeLabel(fedCluster, "foo", "bar") - err = client.Update(context.TODO(), fedCluster) + if addTestLabel { + addLabel(fedCluster, "foo", "bar") + } else { + removeLabel(fedCluster, "foo", "bar") + } + err = client.Update(context.TODO(), fedCluster) + if err == nil { + return true, nil + } + if apierrors.IsConflict(err) { + tl.Logf("Got conflit updating label %q (add=%t) to KubeFedCluster %q", "foo:bar. Will Retry.", addTestLabel, clusterName) + return false, nil + } + return false, errors.Wrapf(err, "failed to update resource") + }) if err != nil { - tl.Fatalf("Error deleting label %q of KubeFedCluster %q: %v", "foo:bar", clusterName, err) + tl.Fatalf("Error updating label %q (add=%t) to KubeFedCluster %q: %v", "foo:bar", addTestLabel, clusterName, err) } } From 729ba1a944c74615efb4df3e6b7615cd31f5533c Mon Sep 17 00:00:00 2001 From: Zach Zhu Date: Fri, 8 Apr 2022 14:11:15 +0800 Subject: [PATCH 09/21] record waiting for removal event --- pkg/controller/sync/controller.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/controller/sync/controller.go b/pkg/controller/sync/controller.go index 8611f7696f..3a8d9fb21b 100644 --- a/pkg/controller/sync/controller.go +++ b/pkg/controller/sync/controller.go @@ -589,7 +589,9 @@ func (s *KubeFedSyncController) deleteFromClusters(fedResource FederatedResource if len(remainingClusters) > 0 { fedKind := fedResource.FederatedKind() fedName := fedResource.FederatedName() - klog.V(2).Infof("Waiting for resources managed by %s %q to be removed from the following clusters: %s", fedKind, fedName, strings.Join(remainingClusters, ", ")) + remainingClustersStr := strings.Join(remainingClusters, ", ") + klog.V(2).Infof("Waiting for resources managed by %s %q to be removed from the following clusters: %s", fedKind, fedName, remainingClustersStr) + fedResource.RecordEvent("WaitForRemovalInCluster", "Waiting for managed resources to be removed from the following clusters: %s", remainingClustersStr) return true, nil } err = s.ensureRemovedOrUnmanaged(fedResource) From 06973f08cc7858798ff9e2335f489fb6e51b2592 Mon Sep 17 00:00:00 2001 From: Zach Zhu Date: Fri, 8 Apr 2022 14:18:21 +0800 Subject: [PATCH 10/21] tweak: use field constants --- pkg/controller/sync/dispatch/retain.go | 18 +++++++++--------- pkg/controller/util/constants.go | 6 ++++++ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/pkg/controller/sync/dispatch/retain.go b/pkg/controller/sync/dispatch/retain.go index c22aabbbcc..0225dfcd87 100644 --- a/pkg/controller/sync/dispatch/retain.go +++ b/pkg/controller/sync/dispatch/retain.go @@ -47,12 +47,12 @@ func RetainClusterFields(targetKind string, desiredObj, clusterObj, fedObj *unst func retainServiceFields(desiredObj, clusterObj *unstructured.Unstructured) error { // healthCheckNodePort is allocated by APIServer and unchangeable, so it should be retained while updating - healthCheckNodePort, ok, err := unstructured.NestedInt64(clusterObj.Object, "spec", "healthCheckNodePort") + healthCheckNodePort, ok, err := unstructured.NestedInt64(clusterObj.Object, util.SpecField, util.HealthCheckNodePortField) if err != nil { return errors.Wrap(err, "Error retrieving healthCheckNodePort from service") } if ok && healthCheckNodePort > 0 { - if err = unstructured.SetNestedField(desiredObj.Object, healthCheckNodePort, "spec", "healthCheckNodePort"); err != nil { + if err = unstructured.SetNestedField(desiredObj.Object, healthCheckNodePort, util.SpecField, util.HealthCheckNodePortField); err != nil { return errors.Wrap(err, "Error setting healthCheckNodePort for service") } } @@ -60,31 +60,31 @@ func retainServiceFields(desiredObj, clusterObj *unstructured.Unstructured) erro // ClusterIP and NodePort are allocated to Service by cluster, so retain the same if any while updating // Retain clusterip and clusterips - clusterIP, ok, err := unstructured.NestedString(clusterObj.Object, "spec", "clusterIP") + clusterIP, ok, err := unstructured.NestedString(clusterObj.Object, util.SpecField, util.ClusterIPField) if err != nil { return errors.Wrap(err, "Error retrieving clusterIP from cluster service") } // !ok could indicate that a cluster ip was not assigned if ok && clusterIP != "" { - err := unstructured.SetNestedField(desiredObj.Object, clusterIP, "spec", "clusterIP") + err := unstructured.SetNestedField(desiredObj.Object, clusterIP, util.SpecField, util.ClusterIPField) if err != nil { return errors.Wrap(err, "Error setting clusterIP for service") } } - clusterIPs, ok, err := unstructured.NestedStringSlice(clusterObj.Object, "spec", "clusterIPs") + clusterIPs, ok, err := unstructured.NestedStringSlice(clusterObj.Object, util.SpecField, util.ClusterIPsField) if err != nil { return errors.Wrap(err, "Error retrieving clusterIPs from cluster service") } // !ok could indicate that cluster ips was not assigned if ok && len(clusterIPs) > 0 { - err := unstructured.SetNestedStringSlice(desiredObj.Object, clusterIPs, "spec", "clusterIPs") + err := unstructured.SetNestedStringSlice(desiredObj.Object, clusterIPs, util.SpecField, util.ClusterIPsField) if err != nil { return errors.Wrap(err, "Error setting clusterIPs for service") } } // Retain nodeports - clusterPorts, ok, err := unstructured.NestedSlice(clusterObj.Object, "spec", "ports") + clusterPorts, ok, err := unstructured.NestedSlice(clusterObj.Object, util.SpecField, util.PortsField) if err != nil { return errors.Wrap(err, "Error retrieving ports from cluster service") } @@ -92,7 +92,7 @@ func retainServiceFields(desiredObj, clusterObj *unstructured.Unstructured) erro return nil } var desiredPorts []interface{} - desiredPorts, ok, err = unstructured.NestedSlice(desiredObj.Object, "spec", "ports") + desiredPorts, ok, err = unstructured.NestedSlice(desiredObj.Object, util.SpecField, util.PortsField) if err != nil { return errors.Wrap(err, "Error retrieving ports from service") } @@ -112,7 +112,7 @@ func retainServiceFields(desiredObj, clusterObj *unstructured.Unstructured) erro } } } - err = unstructured.SetNestedSlice(desiredObj.Object, desiredPorts, "spec", "ports") + err = unstructured.SetNestedSlice(desiredObj.Object, desiredPorts, util.SpecField, util.PortsField) if err != nil { return errors.Wrap(err, "Error setting ports for service") } diff --git a/pkg/controller/util/constants.go b/pkg/controller/util/constants.go index afffd13582..284c7ff6a6 100644 --- a/pkg/controller/util/constants.go +++ b/pkg/controller/util/constants.go @@ -41,6 +41,12 @@ const ( StatusField = "status" MetadataField = "metadata" + // Service fields + HealthCheckNodePortField = "healthCheckNodePort" + ClusterIPField = "clusterIP" + ClusterIPsField = "clusterIPs" + PortsField = "ports" + // ServiceAccount fields SecretsField = "secrets" From d197c6083bde4b7d10b9e5441d411958461e20ef Mon Sep 17 00:00:00 2001 From: Zach Zhu Date: Fri, 8 Apr 2022 14:26:18 +0800 Subject: [PATCH 11/21] introduce Kubernetes version to status of KubeFedCluster --- pkg/apis/core/v1beta1/kubefedcluster_types.go | 4 ++++ pkg/controller/kubefedcluster/clusterclient.go | 11 +++++++++-- pkg/controller/kubefedcluster/controller.go | 4 ++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/pkg/apis/core/v1beta1/kubefedcluster_types.go b/pkg/apis/core/v1beta1/kubefedcluster_types.go index c3c59bc957..3f5e41940f 100644 --- a/pkg/apis/core/v1beta1/kubefedcluster_types.go +++ b/pkg/apis/core/v1beta1/kubefedcluster_types.go @@ -70,6 +70,9 @@ type LocalSecretReference struct { type KubeFedClusterStatus struct { // Conditions is an array of current cluster conditions. Conditions []ClusterCondition `json:"conditions"` + // KubernetesVersion is the Kubernetes git version of the cluster. + // +optional + KubernetesVersion string `json:"kubernetesVersion,omitempty"` // Zones are the names of availability zones in which the nodes of the cluster exist, e.g. 'us-east1-a'. // +optional Zones []string `json:"zones,omitempty"` @@ -81,6 +84,7 @@ type KubeFedClusterStatus struct { // +kubebuilder:object:root=true // +kubebuilder:printcolumn:name=age,type=date,JSONPath=.metadata.creationTimestamp // +kubebuilder:printcolumn:name=ready,type=string,JSONPath=.status.conditions[?(@.type=='Ready')].status +// +kubebuilder:printcolumn:name=kubernetes-version,type=string,JSONPath=.status.kubernetesVersion // +kubebuilder:resource:path=kubefedclusters // +kubebuilder:subresource:status diff --git a/pkg/controller/kubefedcluster/clusterclient.go b/pkg/controller/kubefedcluster/clusterclient.go index b936109569..33b4658d4e 100644 --- a/pkg/controller/kubefedcluster/clusterclient.go +++ b/pkg/controller/kubefedcluster/clusterclient.go @@ -79,8 +79,8 @@ func NewClusterClientSet(c *fedv1b1.KubeFedCluster, client generic.Client, fedNa return &clusterClientSet, err } -// GetClusterHealthStatus gets the kubernetes cluster health status by requesting "/healthz" -func (c *ClusterClient) GetClusterHealthStatus() (*fedv1b1.KubeFedClusterStatus, error) { +// GetClusterStatus gets the kubernetes cluster's health and version status +func (c *ClusterClient) GetClusterStatus() (*fedv1b1.KubeFedClusterStatus, error) { clusterStatus := fedv1b1.KubeFedClusterStatus{} currentTime := metav1.Now() clusterReady := ClusterReady @@ -150,6 +150,13 @@ func (c *ClusterClient) GetClusterHealthStatus() (*fedv1b1.KubeFedClusterStatus, } else { metrics.RegisterKubefedClusterTotal(metrics.ClusterReady, c.clusterName) clusterStatus.Conditions = append(clusterStatus.Conditions, newClusterReadyCondition) + + version, err := c.kubeClient.DiscoveryClient.ServerVersion() + if err != nil { + runtime.HandleError(errors.Wrapf(err, "Failed to get Kubernetes version of cluster %q", c.clusterName)) + } else { + clusterStatus.KubernetesVersion = version.GitVersion + } } } diff --git a/pkg/controller/kubefedcluster/controller.go b/pkg/controller/kubefedcluster/controller.go index c0a94f38f0..f39c5601ca 100644 --- a/pkg/controller/kubefedcluster/controller.go +++ b/pkg/controller/kubefedcluster/controller.go @@ -242,7 +242,7 @@ func (cc *ClusterController) updateIndividualClusterStatus(cluster *fedv1b1.Kube clusterClient := storedData.clusterKubeClient - currentClusterStatus, err := clusterClient.GetClusterHealthStatus() + currentClusterStatus, err := clusterClient.GetClusterStatus() if err != nil { cc.RecordError(cluster, "RetrievingClusterHealthFailed", errors.Wrap(err, "Failed to retrieve health of the cluster")) klog.Errorf("Failed to retrieve health of the cluster %s: %v", cluster.Name, err) @@ -278,7 +278,7 @@ func thresholdAdjustedClusterStatus(clusterStatus *fedv1b1.KubeFedClusterStatus, if storedData.resultRun < threshold { // Success/Failure is below threshold - leave the probe state unchanged. probeTime := clusterStatus.Conditions[0].LastProbeTime - clusterStatus = storedData.clusterStatus + clusterStatus.Conditions = storedData.clusterStatus.Conditions setProbeTime(clusterStatus, probeTime) } else if clusterStatusEqual(clusterStatus, storedData.clusterStatus) { // preserve the last transition time From 17b2063099de74a4f4fcc84bf72f57d05b018516 Mon Sep 17 00:00:00 2001 From: Zach Zhu Date: Fri, 8 Apr 2022 14:35:02 +0800 Subject: [PATCH 12/21] add short name kfc for kubefedcluster --- pkg/apis/core/v1beta1/kubefedcluster_types.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/apis/core/v1beta1/kubefedcluster_types.go b/pkg/apis/core/v1beta1/kubefedcluster_types.go index 3f5e41940f..1ff55a7fc6 100644 --- a/pkg/apis/core/v1beta1/kubefedcluster_types.go +++ b/pkg/apis/core/v1beta1/kubefedcluster_types.go @@ -85,7 +85,7 @@ type KubeFedClusterStatus struct { // +kubebuilder:printcolumn:name=age,type=date,JSONPath=.metadata.creationTimestamp // +kubebuilder:printcolumn:name=ready,type=string,JSONPath=.status.conditions[?(@.type=='Ready')].status // +kubebuilder:printcolumn:name=kubernetes-version,type=string,JSONPath=.status.kubernetesVersion -// +kubebuilder:resource:path=kubefedclusters +// +kubebuilder:resource:path=kubefedclusters,shortName=kfc // +kubebuilder:subresource:status // KubeFedCluster configures KubeFed to be aware of a Kubernetes From 2a878283eecbcfeda4abd25a86a2e209f4ed9d4d Mon Sep 17 00:00:00 2001 From: Zach Zhu Date: Fri, 8 Apr 2022 16:10:13 +0800 Subject: [PATCH 13/21] update kubefedcluster crd --- charts/kubefed/charts/controllermanager/crds/crds.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/charts/kubefed/charts/controllermanager/crds/crds.yaml b/charts/kubefed/charts/controllermanager/crds/crds.yaml index 67d3327f2c..f6cd7ce8cf 100644 --- a/charts/kubefed/charts/controllermanager/crds/crds.yaml +++ b/charts/kubefed/charts/controllermanager/crds/crds.yaml @@ -457,6 +457,8 @@ spec: kind: KubeFedCluster listKind: KubeFedClusterList plural: kubefedclusters + shortNames: + - kfc singular: kubefedcluster scope: Namespaced versions: @@ -467,6 +469,9 @@ spec: - jsonPath: .status.conditions[?(@.type=='Ready')].status name: ready type: string + - jsonPath: .status.kubernetesVersion + name: kubernetes-version + type: string name: v1beta1 schema: openAPIV3Schema: @@ -559,6 +564,10 @@ spec: - type type: object type: array + kubernetesVersion: + description: KubernetesVersion is the Kubernetes git version of the + cluster. + type: string region: description: Region is the name of the region in which all of the nodes in the cluster exist. e.g. 'us-east1'. From 7d6a55d12390f910d61cb1fe399226feb18e089d Mon Sep 17 00:00:00 2001 From: Zach Zhu Date: Fri, 8 Apr 2022 15:55:24 +0800 Subject: [PATCH 14/21] pin test go version to 1.16 to avoid test breaks --- .github/workflows/build-and-test.yml | 2 +- .github/workflows/test-and-push.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 587f2b0a46..d94d9b80f1 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -11,7 +11,7 @@ jobs: - uses: actions/setup-go@v2 with: - go-version: '^1.16.6' + go-version: '~1.16' - name: Run tests run: | diff --git a/.github/workflows/test-and-push.yml b/.github/workflows/test-and-push.yml index 04f1dc9368..0ab19319fd 100644 --- a/.github/workflows/test-and-push.yml +++ b/.github/workflows/test-and-push.yml @@ -16,7 +16,7 @@ jobs: - uses: actions/setup-go@v2 with: - go-version: '^1.16.6' + go-version: '~1.16' - name: Run tests run: DOWNLOAD_BINARIES=y bash -x ./scripts/pre-commit.sh From 9c7c111219e20122897679af1748a9d4fd3c819f Mon Sep 17 00:00:00 2001 From: Zach Zhu Date: Fri, 8 Apr 2022 16:12:06 +0800 Subject: [PATCH 15/21] update generated code --- pkg/apis/core/v1alpha1/zz_generated.deepcopy.go | 1 - pkg/apis/core/v1beta1/zz_generated.deepcopy.go | 1 - pkg/apis/scheduling/v1alpha1/zz_generated.deepcopy.go | 1 - 3 files changed, 3 deletions(-) diff --git a/pkg/apis/core/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/core/v1alpha1/zz_generated.deepcopy.go index 42636c7f8d..36cb36cd28 100644 --- a/pkg/apis/core/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/core/v1alpha1/zz_generated.deepcopy.go @@ -1,4 +1,3 @@ -//go:build !ignore_autogenerated // +build !ignore_autogenerated /* diff --git a/pkg/apis/core/v1beta1/zz_generated.deepcopy.go b/pkg/apis/core/v1beta1/zz_generated.deepcopy.go index c1e586fec1..9e77639701 100644 --- a/pkg/apis/core/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/core/v1beta1/zz_generated.deepcopy.go @@ -1,4 +1,3 @@ -//go:build !ignore_autogenerated // +build !ignore_autogenerated /* diff --git a/pkg/apis/scheduling/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/scheduling/v1alpha1/zz_generated.deepcopy.go index f7d39351a5..bbb678e28e 100644 --- a/pkg/apis/scheduling/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/scheduling/v1alpha1/zz_generated.deepcopy.go @@ -1,4 +1,3 @@ -//go:build !ignore_autogenerated // +build !ignore_autogenerated /* From 2880adde2149a3f568de18125d39252ceb50a76f Mon Sep 17 00:00:00 2001 From: Zach Zhu Date: Sun, 17 Apr 2022 15:07:44 +0800 Subject: [PATCH 16/21] expose cluster health check error to condition message --- pkg/controller/kubefedcluster/clusterclient.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/controller/kubefedcluster/clusterclient.go b/pkg/controller/kubefedcluster/clusterclient.go index 33b4658d4e..fc3c57965d 100644 --- a/pkg/controller/kubefedcluster/clusterclient.go +++ b/pkg/controller/kubefedcluster/clusterclient.go @@ -18,6 +18,7 @@ package kubefedcluster import ( "context" + "fmt" "strings" "time" @@ -141,6 +142,8 @@ func (c *ClusterClient) GetClusterStatus() (*fedv1b1.KubeFedClusterStatus, error body, err := c.kubeClient.DiscoveryClient.RESTClient().Get().AbsPath("/healthz").Do(context.Background()).Raw() if err != nil { runtime.HandleError(errors.Wrapf(err, "Failed to do cluster health check for cluster %q", c.clusterName)) + msg := fmt.Sprintf("%s: %v", ClusterNotReachableMsg, err) + newClusterOfflineCondition.Message = &msg clusterStatus.Conditions = append(clusterStatus.Conditions, newClusterOfflineCondition) metrics.RegisterKubefedClusterTotal(metrics.ClusterOffline, c.clusterName) } else { From 73231b233a58a5bcae2b35a5e9d3eb51b5191f67 Mon Sep 17 00:00:00 2001 From: Zach Zhu Date: Sat, 16 Apr 2022 15:28:54 +0800 Subject: [PATCH 17/21] fix some clients missing user agent --- .../app/leaderelection/leaderelection.go | 5 +++-- pkg/controller/kubefedcluster/controller.go | 3 ++- pkg/controller/schedulingpreference/controller.go | 2 +- pkg/controller/status/controller.go | 9 ++++++--- pkg/controller/sync/controller.go | 8 +++++--- pkg/schedulingtypes/plugin.go | 9 ++++++--- 6 files changed, 23 insertions(+), 13 deletions(-) diff --git a/cmd/controller-manager/app/leaderelection/leaderelection.go b/cmd/controller-manager/app/leaderelection/leaderelection.go index 2e3db4994d..f19f104e04 100644 --- a/cmd/controller-manager/app/leaderelection/leaderelection.go +++ b/cmd/controller-manager/app/leaderelection/leaderelection.go @@ -36,8 +36,9 @@ import ( func NewKubeFedLeaderElector(opts *options.Options, fnStartControllers func(*options.Options, <-chan struct{})) (*leaderelection.LeaderElector, error) { const component = "kubefed-controller-manager" - restclient.AddUserAgent(opts.Config.KubeConfig, "kubefed-leader-election") - leaderElectionClient := kubeclient.NewForConfigOrDie(opts.Config.KubeConfig) + kubeConfig := restclient.CopyConfig(opts.Config.KubeConfig) + restclient.AddUserAgent(kubeConfig, "kubefed-leader-election") + leaderElectionClient := kubeclient.NewForConfigOrDie(kubeConfig) hostname, err := os.Hostname() if err != nil { diff --git a/pkg/controller/kubefedcluster/controller.go b/pkg/controller/kubefedcluster/controller.go index f39c5601ca..69d5e9de46 100644 --- a/pkg/controller/kubefedcluster/controller.go +++ b/pkg/controller/kubefedcluster/controller.go @@ -96,7 +96,8 @@ func StartClusterController(config *util.ControllerConfig, clusterHealthCheckCon func newClusterController(config *util.ControllerConfig, clusterHealthCheckConfig *util.ClusterHealthCheckConfig) (*ClusterController, error) { kubeConfig := restclient.CopyConfig(config.KubeConfig) kubeConfig.Timeout = clusterHealthCheckConfig.Timeout - client := genericclient.NewForConfigOrDieWithUserAgent(kubeConfig, "cluster-controller") + restclient.AddUserAgent(kubeConfig, "cluster-controller") + client := genericclient.NewForConfigOrDie(kubeConfig) cc := &ClusterController{ client: client, diff --git a/pkg/controller/schedulingpreference/controller.go b/pkg/controller/schedulingpreference/controller.go index 6f562c6ff0..31fadd140e 100644 --- a/pkg/controller/schedulingpreference/controller.go +++ b/pkg/controller/schedulingpreference/controller.go @@ -137,7 +137,7 @@ func newSchedulingPreferenceController(config *util.ControllerConfig, scheduling s.clusterDeliverer = util.NewDelayingDeliverer() s.store, s.controller, err = util.NewGenericInformer( - config.KubeConfig, + kubeConfig, config.TargetNamespace, s.scheduler.ObjectType(), util.NoResyncPeriod, diff --git a/pkg/controller/status/controller.go b/pkg/controller/status/controller.go index bf898849e5..d932b3bcef 100644 --- a/pkg/controller/status/controller.go +++ b/pkg/controller/status/controller.go @@ -25,6 +25,7 @@ import ( "time" "k8s.io/apimachinery/pkg/util/wait" + restclient "k8s.io/client-go/rest" "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -104,14 +105,16 @@ func newKubeFedStatusController(controllerConfig *util.ControllerConfig, typeCon return nil, errors.Errorf("Status collection is not supported for %q", federatedAPIResource.Kind) } userAgent := fmt.Sprintf("%s-controller", strings.ToLower(statusAPIResource.Kind)) - client := genericclient.NewForConfigOrDieWithUserAgent(controllerConfig.KubeConfig, userAgent) + kubeConfig := restclient.CopyConfig(controllerConfig.KubeConfig) + restclient.AddUserAgent(kubeConfig, userAgent) + client := genericclient.NewForConfigOrDie(kubeConfig) - federatedTypeClient, err := util.NewResourceClient(controllerConfig.KubeConfig, &federatedAPIResource) + federatedTypeClient, err := util.NewResourceClient(kubeConfig, &federatedAPIResource) if err != nil { return nil, err } - statusClient, err := util.NewResourceClient(controllerConfig.KubeConfig, statusAPIResource) + statusClient, err := util.NewResourceClient(kubeConfig, statusAPIResource) if err != nil { return nil, err } diff --git a/pkg/controller/sync/controller.go b/pkg/controller/sync/controller.go index 3a8d9fb21b..8627875174 100644 --- a/pkg/controller/sync/controller.go +++ b/pkg/controller/sync/controller.go @@ -23,6 +23,7 @@ import ( "time" "github.com/pkg/errors" + restclient "k8s.io/client-go/rest" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -113,9 +114,10 @@ func newKubeFedSyncController(controllerConfig *util.ControllerConfig, typeConfi federatedTypeAPIResource := typeConfig.GetFederatedType() userAgent := fmt.Sprintf("%s-controller", strings.ToLower(federatedTypeAPIResource.Kind)) - // Initialize non-dynamic clients first to avoid polluting config - client := genericclient.NewForConfigOrDieWithUserAgent(controllerConfig.KubeConfig, userAgent) - kubeClient := kubeclient.NewForConfigOrDie(controllerConfig.KubeConfig) + kubeConfig := restclient.CopyConfig(controllerConfig.KubeConfig) + restclient.AddUserAgent(kubeConfig, userAgent) + client := genericclient.NewForConfigOrDie(kubeConfig) + kubeClient := kubeclient.NewForConfigOrDie(kubeConfig) broadcaster := record.NewBroadcaster() broadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) diff --git a/pkg/schedulingtypes/plugin.go b/pkg/schedulingtypes/plugin.go index 8708aa34fa..f7209534bf 100644 --- a/pkg/schedulingtypes/plugin.go +++ b/pkg/schedulingtypes/plugin.go @@ -29,6 +29,7 @@ import ( "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" + restclient "k8s.io/client-go/rest" "k8s.io/client-go/tools/cache" "k8s.io/klog/v2" @@ -60,7 +61,9 @@ type Plugin struct { func NewPlugin(controllerConfig *util.ControllerConfig, eventHandlers SchedulerEventHandlers, typeConfig typeconfig.Interface, nsAPIResource *metav1.APIResource) (*Plugin, error) { targetAPIResource := typeConfig.GetTargetType() userAgent := fmt.Sprintf("%s-replica-scheduler", strings.ToLower(targetAPIResource.Kind)) - client := genericclient.NewForConfigOrDieWithUserAgent(controllerConfig.KubeConfig, userAgent) + kubeConfig := restclient.CopyConfig(controllerConfig.KubeConfig) + restclient.AddUserAgent(kubeConfig, userAgent) + client := genericclient.NewForConfigOrDie(kubeConfig) targetInformer, err := util.NewFederatedInformer( controllerConfig, @@ -84,13 +87,13 @@ func NewPlugin(controllerConfig *util.ControllerConfig, eventHandlers SchedulerE kubeFedEventHandler := eventHandlers.KubeFedEventHandler federatedTypeAPIResource := typeConfig.GetFederatedType() - p.federatedTypeClient, err = util.NewResourceClient(controllerConfig.KubeConfig, &federatedTypeAPIResource) + p.federatedTypeClient, err = util.NewResourceClient(kubeConfig, &federatedTypeAPIResource) if err != nil { return nil, err } p.federatedStore, p.federatedController = util.NewResourceInformer(p.federatedTypeClient, targetNamespace, &federatedTypeAPIResource, kubeFedEventHandler) - p.fedNsClient, err = util.NewResourceClient(controllerConfig.KubeConfig, nsAPIResource) + p.fedNsClient, err = util.NewResourceClient(kubeConfig, nsAPIResource) if err != nil { return nil, err } From 1d537fc17cb22b816d68d3cab9f0763453707eb2 Mon Sep 17 00:00:00 2001 From: Zach Zhu Date: Sun, 17 Apr 2022 15:06:29 +0800 Subject: [PATCH 18/21] add test for k8s version status of KubeFedCluster --- test/e2e/kubefedcluster.go | 43 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 test/e2e/kubefedcluster.go diff --git a/test/e2e/kubefedcluster.go b/test/e2e/kubefedcluster.go new file mode 100644 index 0000000000..412f869c3a --- /dev/null +++ b/test/e2e/kubefedcluster.go @@ -0,0 +1,43 @@ +package e2e + +import ( + "fmt" + + "k8s.io/client-go/discovery" + restclient "k8s.io/client-go/rest" + + "sigs.k8s.io/kubefed/pkg/controller/util" + "sigs.k8s.io/kubefed/test/e2e/framework" + + . "github.com/onsi/ginkgo" //nolint:stylecheck + . "github.com/onsi/gomega" //nolint:stylecheck +) + +var _ = Describe("KubeFedCluster", func() { + f := framework.NewKubeFedFramework("kubefedcluster") + + tl := framework.NewE2ELogger() + + It("should correctly report the Kubernetes git version of the cluster", func() { + userAgent := "test-kubefedcluster-kubernetes-version" + client := f.Client(userAgent) + clusterList := framework.ListKubeFedClusters(tl, client, framework.TestContext.KubeFedSystemNamespace) + + for _, cluster := range clusterList.Items { + config, err := util.BuildClusterConfig(&cluster, client, framework.TestContext.KubeFedSystemNamespace) + Expect(err).NotTo(HaveOccurred()) + restclient.AddUserAgent(config, userAgent) + + client, err := discovery.NewDiscoveryClientForConfig(config) + if err != nil { + tl.Fatalf("Error creating discovery client for cluster %q", cluster.Name) + } + version, err := client.ServerVersion() + if err != nil { + tl.Fatalf("Error retrieving Kubernetes version of cluster %q", cluster.Name) + } + Expect(cluster.Status.KubernetesVersion).To(Equal(version.GitVersion), fmt.Sprintf( + "the KubernetesVersion field of KubeFedCluster %q should be equal to the Kubernetes git version of the cluster", cluster.Name)) + } + }) +}) From c56c6248e5103b5ad49e0dff2462d9aa5737b9a4 Mon Sep 17 00:00:00 2001 From: Zach Zhu Date: Sun, 17 Apr 2022 15:26:41 +0800 Subject: [PATCH 19/21] doc the Kubernetes version feature of KubeFedCluster --- docs/cluster-registration.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/cluster-registration.md b/docs/cluster-registration.md index 52e41859d7..6f376f0187 100644 --- a/docs/cluster-registration.md +++ b/docs/cluster-registration.md @@ -34,12 +34,14 @@ Check the status of the joined clusters by using the following command. ```bash kubectl -n kube-federation-system get kubefedclusters -NAME READY AGE -cluster1 True 1m -cluster2 True 1m +NAME AGE READY KUBERNETES-VERSION +cluster1 1m True v1.21.2 +cluster2 1m True v1.22.0 ``` +The Kubernetes version is checked periodically along with the cluster health check so that it would be automatically updated within the cluster health check period after a Kubernetes upgrade/downgrade of the cluster. + # Joining kind clusters on MacOS A Kubernetes cluster deployed with [kind](https://sigs.k8s.io/kind) on Docker From 3157e456e92820708a0e828261ee4dbc35351bd9 Mon Sep 17 00:00:00 2001 From: Jonathan Juares Beber Date: Thu, 17 Mar 2022 22:23:42 +0100 Subject: [PATCH 20/21] fix: ignore non-targeted clusters during deletion During the deletion of resources, the controller errors if any cluster is marked as non-ready. That leads to objects being stuck during deletion while these clusters are present, even if the resource is not deployed in such clusters. This commit makes the reconciliation for deletion to compute the placement of the federated resources and ignore other clusters. This way, the deletion will fail just if the non-ready clusters are the clusters where the object is federated. --- pkg/controller/sync/controller.go | 60 ++++++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 8 deletions(-) diff --git a/pkg/controller/sync/controller.go b/pkg/controller/sync/controller.go index 8627875174..a6284cca1f 100644 --- a/pkg/controller/sync/controller.go +++ b/pkg/controller/sync/controller.go @@ -268,7 +268,19 @@ func (s *KubeFedSyncController) reconcile(qualifiedName util.QualifiedName) util apiResource := s.typeConfig.GetTargetType() gvk := apiResourceToGVK(&apiResource) klog.V(2).Infof("Ensuring the removal of the label %q from %s %q in member clusters.", util.ManagedByKubeFedLabelKey, gvk.Kind, qualifiedName) - err = s.removeManagedLabel(gvk, qualifiedName) + // We can't compute resource placement, therefore we try to + // remove it from all member clusters. + clusters, err := s.informer.GetClusters() + if err != nil { + wrappedErr := errors.Wrap(err, "failed to get member clusters") + runtime.HandleError(wrappedErr) + return util.StatusError + } + clusterNames := sets.NewString() + for _, cluster := range clusters { + clusterNames = clusterNames.Insert(cluster.Name) + } + err = s.removeManagedLabel(gvk, qualifiedName, clusterNames) if err != nil { wrappedErr := errors.Wrapf(err, "failed to remove the label %q from %s %q in member clusters", util.ManagedByKubeFedLabelKey, gvk.Kind, qualifiedName) runtime.HandleError(wrappedErr) @@ -501,7 +513,19 @@ func (s *KubeFedSyncController) ensureDeletion(fedResource FederatedResource) ut return util.StatusError } klog.V(2).Infof("Initiating the removal of the label %q from resources previously managed by %s %q.", util.ManagedByKubeFedLabelKey, kind, key) - err = s.removeManagedLabel(fedResource.TargetGVK(), fedResource.TargetName()) + clusters, err := s.informer.GetClusters() + if err != nil { + wrappedErr := errors.Wrap(err, "failed to get member clusters") + runtime.HandleError(wrappedErr) + return util.StatusError + } + targetClusters, err := fedResource.ComputePlacement(clusters) + if err != nil { + wrappedErr := errors.Wrapf(err, "failed to compute placement for %s %q", kind, key) + runtime.HandleError(wrappedErr) + return util.StatusError + } + err = s.removeManagedLabel(fedResource.TargetGVK(), fedResource.TargetName(), targetClusters) if err != nil { wrappedErr := errors.Wrapf(err, "failed to remove the label %q from all resources previously managed by %s %q", util.ManagedByKubeFedLabelKey, kind, key) runtime.HandleError(wrappedErr) @@ -533,8 +557,8 @@ func (s *KubeFedSyncController) ensureDeletion(fedResource FederatedResource) ut // removeManagedLabel attempts to remove the managed label from // resources with the given name in member clusters. -func (s *KubeFedSyncController) removeManagedLabel(gvk schema.GroupVersionKind, qualifiedName util.QualifiedName) error { - ok, err := s.handleDeletionInClusters(gvk, qualifiedName, func(dispatcher dispatch.UnmanagedDispatcher, clusterName string, clusterObj *unstructured.Unstructured) { +func (s *KubeFedSyncController) removeManagedLabel(gvk schema.GroupVersionKind, qualifiedName util.QualifiedName, clusters sets.String) error { + ok, err := s.handleDeletionInClusters(gvk, qualifiedName, clusters, func(dispatcher dispatch.UnmanagedDispatcher, clusterName string, clusterObj *unstructured.Unstructured) { if clusterObj.GetDeletionTimestamp() != nil { return } @@ -554,8 +578,17 @@ func (s *KubeFedSyncController) deleteFromClusters(fedResource FederatedResource gvk := fedResource.TargetGVK() qualifiedName := fedResource.TargetName() + clusters, err := s.informer.GetClusters() + if err != nil { + return false, err + } + targetClusters, err := fedResource.ComputePlacement(clusters) + if err != nil { + return false, err + } + remainingClusters := []string{} - ok, err := s.handleDeletionInClusters(gvk, qualifiedName, func(dispatcher dispatch.UnmanagedDispatcher, clusterName string, clusterObj *unstructured.Unstructured) { + ok, err := s.handleDeletionInClusters(gvk, qualifiedName, targetClusters, func(dispatcher dispatch.UnmanagedDispatcher, clusterName string, clusterObj *unstructured.Unstructured) { // If the containing namespace of a FederatedNamespace is // marked for deletion, it is impossible to require the // removal of the namespace in advance of removal of the sync @@ -615,9 +648,17 @@ func (s *KubeFedSyncController) ensureRemovedOrUnmanaged(fedResource FederatedRe return errors.Wrap(err, "failed to get a list of clusters") } + targetClusters, err := fedResource.ComputePlacement(clusters) + if err != nil { + return errors.Wrapf(err, "failed to compute placement for %s %q", fedResource.FederatedKind(), fedResource.FederatedName().Name) + } + dispatcher := dispatch.NewCheckUnmanagedDispatcher(s.informer.GetClientForCluster, fedResource.TargetGVK(), fedResource.TargetName()) unreadyClusters := []string{} for _, cluster := range clusters { + if !targetClusters.Has(cluster.Name) { + continue + } if !util.IsClusterReady(&cluster.Status) { unreadyClusters = append(unreadyClusters, cluster.Name) continue @@ -639,9 +680,9 @@ func (s *KubeFedSyncController) ensureRemovedOrUnmanaged(fedResource FederatedRe // handleDeletionInClusters invokes the provided deletion handler for // each managed resource in member clusters. -func (s *KubeFedSyncController) handleDeletionInClusters(gvk schema.GroupVersionKind, qualifiedName util.QualifiedName, +func (s *KubeFedSyncController) handleDeletionInClusters(gvk schema.GroupVersionKind, qualifiedName util.QualifiedName, clusters sets.String, deletionFunc func(dispatcher dispatch.UnmanagedDispatcher, clusterName string, clusterObj *unstructured.Unstructured)) (bool, error) { - clusters, err := s.informer.GetClusters() + memberClusters, err := s.informer.GetClusters() if err != nil { return false, errors.Wrap(err, "failed to get a list of clusters") } @@ -649,8 +690,11 @@ func (s *KubeFedSyncController) handleDeletionInClusters(gvk schema.GroupVersion dispatcher := dispatch.NewUnmanagedDispatcher(s.informer.GetClientForCluster, gvk, qualifiedName) retrievalFailureClusters := []string{} unreadyClusters := []string{} - for _, cluster := range clusters { + for _, cluster := range memberClusters { clusterName := cluster.Name + if !clusters.Has(clusterName) { + continue + } if !util.IsClusterReady(&cluster.Status) { unreadyClusters = append(unreadyClusters, clusterName) From d19b80cd3b7bfb9e938b429f6a1764cc81af54d2 Mon Sep 17 00:00:00 2001 From: Jonathan Juares Beber Date: Fri, 25 Mar 2022 19:03:23 +0100 Subject: [PATCH 21/21] Add not-ready clusters E2E test The e2e tests do not consider any unhealthy cluster. This commit adds a test case where the cluster federates itself twice and makes one the virtual federation not-ready by changing the API endpoint to an invalid address. The ready cluster is labelled and a complete CRUD test guarantees that the not-ready cluster does not impact operations if the the cluster is not targeted. --- scripts/pre-commit.sh | 16 +++ test/common/crudtester.go | 72 ++++++++--- test/e2e/crd.go | 6 +- test/e2e/crud.go | 24 ++-- test/e2e/deleteoptions.go | 4 +- test/e2e/e2e.go | 6 +- test/e2e/framework/test_context.go | 5 + test/e2e/not_ready.go | 201 +++++++++++++++++++++++++++++ test/e2e/placement.go | 4 +- test/e2e/scale.go | 4 +- 10 files changed, 297 insertions(+), 45 deletions(-) create mode 100644 test/e2e/not_ready.go diff --git a/scripts/pre-commit.sh b/scripts/pre-commit.sh index dfc6fd390e..5378382bf1 100755 --- a/scripts/pre-commit.sh +++ b/scripts/pre-commit.sh @@ -105,6 +105,19 @@ function run-e2e-tests-with-in-memory-controllers() { ${IN_MEMORY_E2E_TEST_CMD} } +function run-e2e-tests-with-not-ready-clusters() { + # Run the tests without any verbosity. The unhealthy nodes generate + # too much logs. + go test -timeout 900s ./test/e2e \ + -args -kubeconfig=${HOME}/.kube/config \ + -single-call-timeout=2m \ + -ginkgo.randomizeAllSpecs \ + -limited-scope=true \ + -in-memory-controllers=true \ + -simulate-federation=true \ + -ginkgo.focus='\[NOT_READY\]' +} + function run-namespaced-e2e-tests() { local namespaced_e2e_test_cmd="${E2E_TEST_CMD} -kubefed-namespace=foo -limited-scope=true" # Run the placement test separately to avoid crud failures if @@ -200,6 +213,9 @@ kubectl scale deployments kubefed-controller-manager -n kube-federation-system - echo "Running e2e tests with race detector against cluster-scoped kubefed with in-memory controllers" run-e2e-tests-with-in-memory-controllers +echo "Running e2e tests with not-ready clusters" +run-e2e-tests-with-not-ready-clusters + # FederatedTypeConfig controller is needed to remove finalizers from # FederatedTypeConfigs in order to successfully delete the KubeFed # control plane in the next step. diff --git a/test/common/crudtester.go b/test/common/crudtester.go index d4a2693859..4903e026c8 100644 --- a/test/common/crudtester.go +++ b/test/common/crudtester.go @@ -41,6 +41,7 @@ import ( "sigs.k8s.io/kubefed/pkg/apis/core/common" "sigs.k8s.io/kubefed/pkg/apis/core/typeconfig" fedv1a1 "sigs.k8s.io/kubefed/pkg/apis/core/v1alpha1" + "sigs.k8s.io/kubefed/pkg/apis/core/v1beta1" genericclient "sigs.k8s.io/kubefed/pkg/client/generic" "sigs.k8s.io/kubefed/pkg/controller/sync" "sigs.k8s.io/kubefed/pkg/controller/sync/status" @@ -65,6 +66,7 @@ type FederatedTypeCrudTester struct { // operation that involves member clusters may take longer due to // propagation latency. clusterWaitTimeout time.Duration + clustersNamespace string } type TestClusterConfig struct { @@ -77,7 +79,7 @@ type TestCluster struct { Client util.ResourceClient } -func NewFederatedTypeCrudTester(testLogger TestLogger, typeConfig typeconfig.Interface, kubeConfig *rest.Config, testClusters map[string]TestCluster, waitInterval, clusterWaitTimeout time.Duration) (*FederatedTypeCrudTester, error) { +func NewFederatedTypeCrudTester(testLogger TestLogger, typeConfig typeconfig.Interface, kubeConfig *rest.Config, testClusters map[string]TestCluster, clustersNamespace string, waitInterval, clusterWaitTimeout time.Duration) (*FederatedTypeCrudTester, error) { return &FederatedTypeCrudTester{ tl: testLogger, typeConfig: typeConfig, @@ -87,11 +89,12 @@ func NewFederatedTypeCrudTester(testLogger TestLogger, typeConfig typeconfig.Int testClusters: testClusters, waitInterval: waitInterval, clusterWaitTimeout: clusterWaitTimeout, + clustersNamespace: clustersNamespace, }, nil } -func (c *FederatedTypeCrudTester) CheckLifecycle(targetObject *unstructured.Unstructured, overrides []interface{}) { - fedObject := c.CheckCreate(targetObject, overrides) +func (c *FederatedTypeCrudTester) CheckLifecycle(targetObject *unstructured.Unstructured, overrides []interface{}, selectors map[string]string) { + fedObject := c.CheckCreate(targetObject, overrides, selectors) c.CheckStatusCreated(util.NewQualifiedName(fedObject)) @@ -104,7 +107,7 @@ func (c *FederatedTypeCrudTester) CheckLifecycle(targetObject *unstructured.Unst c.CheckDelete(fedObject, false) } -func (c *FederatedTypeCrudTester) Create(targetObject *unstructured.Unstructured, overrides []interface{}) *unstructured.Unstructured { +func (c *FederatedTypeCrudTester) Create(targetObject *unstructured.Unstructured, overrides []interface{}, selectors map[string]string) *unstructured.Unstructured { qualifiedName := util.NewQualifiedName(targetObject) kind := c.typeConfig.GetTargetType().Kind fedKind := c.typeConfig.GetFederatedType().Kind @@ -113,10 +116,7 @@ func (c *FederatedTypeCrudTester) Create(targetObject *unstructured.Unstructured c.tl.Fatalf("Error obtaining %s from %s %q: %v", fedKind, kind, qualifiedName, err) } - fedObject, err = c.setAdditionalTestData(fedObject, overrides, targetObject.GetGenerateName()) - if err != nil { - c.tl.Fatalf("Error setting overrides and placement on %s %q: %v", fedKind, qualifiedName, err) - } + fedObject = c.setAdditionalTestData(fedObject, overrides, selectors, targetObject.GetGenerateName()) return c.createResource(c.typeConfig.GetFederatedType(), fedObject) } @@ -141,15 +141,15 @@ func (c *FederatedTypeCrudTester) resourceClient(apiResource metav1.APIResource) return client } -func (c *FederatedTypeCrudTester) CheckCreate(targetObject *unstructured.Unstructured, overrides []interface{}) *unstructured.Unstructured { - fedObject := c.Create(targetObject, overrides) +func (c *FederatedTypeCrudTester) CheckCreate(targetObject *unstructured.Unstructured, overrides []interface{}, selectors map[string]string) *unstructured.Unstructured { + fedObject := c.Create(targetObject, overrides, selectors) c.CheckPropagation(fedObject) return fedObject } // AdditionalTestData additionally sets fixture overrides and placement clusternames into federated object -func (c *FederatedTypeCrudTester) setAdditionalTestData(fedObject *unstructured.Unstructured, overrides []interface{}, generateName string) (*unstructured.Unstructured, error) { +func (c *FederatedTypeCrudTester) setAdditionalTestData(fedObject *unstructured.Unstructured, overrides []interface{}, selectors map[string]string, generateName string) *unstructured.Unstructured { fedKind := c.typeConfig.GetFederatedType().Kind qualifiedName := util.NewQualifiedName(fedObject) @@ -159,17 +159,23 @@ func (c *FederatedTypeCrudTester) setAdditionalTestData(fedObject *unstructured. c.tl.Fatalf("Error updating overrides in %s %q: %v", fedKind, qualifiedName, err) } } - clusterNames := []string{} - for name := range c.testClusters { - clusterNames = append(clusterNames, name) - } - err := util.SetClusterNames(fedObject, clusterNames) - if err != nil { - c.tl.Fatalf("Error setting cluster names in %s %q: %v", fedKind, qualifiedName, err) + if selectors != nil { + if err := util.SetClusterSelector(fedObject, selectors); err != nil { + c.tl.Fatalf("Error setting cluster selectors for %s/%s: %v", fedObject.GetKind(), fedObject.GetName(), err) + } + } else { + clusterNames := []string{} + for name := range c.testClusters { + clusterNames = append(clusterNames, name) + } + err := util.SetClusterNames(fedObject, clusterNames) + if err != nil { + c.tl.Fatalf("Error setting cluster names in %s %q: %v", fedKind, qualifiedName, err) + } } fedObject.SetGenerateName(generateName) - return fedObject, err + return fedObject } func (c *FederatedTypeCrudTester) CheckUpdate(fedObject *unstructured.Unstructured) { @@ -334,7 +340,14 @@ func (c *FederatedTypeCrudTester) CheckDelete(fedObject *unstructured.Unstructur if deletingInCluster { stateMsg = "not present" } + clusters, err := util.ComputePlacement(fedObject, c.getClusters(), false) + if err != nil { + c.tl.Fatalf("Couldn't retrieve clusters for %s/%s: %v", federatedKind, name, err) + } for clusterName, testCluster := range c.testClusters { + if !clusters.Has(clusterName) { + continue + } namespace = util.QualifiedNameForCluster(clusterName, qualifiedName).Namespace err = wait.PollImmediate(c.waitInterval, waitTimeout, func() (bool, error) { obj, err := testCluster.Client.Resources(namespace).Get(context.Background(), name, metav1.GetOptions{}) @@ -425,16 +438,33 @@ func (c *FederatedTypeCrudTester) CheckReplicaSet(fedObject *unstructured.Unstru } } +func (c *FederatedTypeCrudTester) getClusters() []*v1beta1.KubeFedCluster { + client, err := genericclient.New(c.kubeConfig) + if err != nil { + c.tl.Fatalf("Failed to get kubefed clientset: %v", err) + } + + fedClusters := []*v1beta1.KubeFedCluster{} + for cluster := range c.testClusters { + clusterResource := &v1beta1.KubeFedCluster{} + err = client.Get(context.Background(), clusterResource, c.clustersNamespace, cluster) + if err != nil { + c.tl.Fatalf("Cannot get cluster %s: %v", cluster, err) + } + fedClusters = append(fedClusters, clusterResource) + } + return fedClusters +} + // CheckPropagation checks propagation for the crud tester's clients func (c *FederatedTypeCrudTester) CheckPropagation(fedObject *unstructured.Unstructured) { federatedKind := c.typeConfig.GetFederatedType().Kind qualifiedName := util.NewQualifiedName(fedObject) - clusterNames, err := util.GetClusterNames(fedObject) + selectedClusters, err := util.ComputePlacement(fedObject, c.getClusters(), false) if err != nil { c.tl.Fatalf("Error retrieving cluster names for %s %q: %v", federatedKind, qualifiedName, err) } - selectedClusters := sets.NewString(clusterNames...) templateVersion, err := sync.GetTemplateHash(fedObject.Object) if err != nil { diff --git a/test/e2e/crd.go b/test/e2e/crd.go index 38d7c8c8d0..c114a9dd66 100644 --- a/test/e2e/crd.go +++ b/test/e2e/crd.go @@ -215,10 +215,10 @@ overrides: return targetObj, overrides, nil } - crudTester, targetObject, overrides := initCrudTest(f, tl, typeConfig, testObjectsFunc) + crudTester, targetObject, overrides := initCrudTest(f, tl, f.KubeFedSystemNamespace(), typeConfig, testObjectsFunc) // Make a copy for use in the orphan check. deletionTargetObject := targetObject.DeepCopy() - crudTester.CheckLifecycle(targetObject, overrides) + crudTester.CheckLifecycle(targetObject, overrides, nil) if namespaced { // This check should not fail so long as the main test loop @@ -228,7 +228,7 @@ overrides: tl.Fatalf("Test of orphaned deletion assumes deletion of the containing namespace") } // Perform a check of orphan deletion. - fedObject := crudTester.CheckCreate(deletionTargetObject, nil) + fedObject := crudTester.CheckCreate(deletionTargetObject, nil, nil) orphanDeletion := true crudTester.CheckDelete(fedObject, orphanDeletion) } diff --git a/test/e2e/crud.go b/test/e2e/crud.go index b76129888e..b8746977f0 100644 --- a/test/e2e/crud.go +++ b/test/e2e/crud.go @@ -61,8 +61,8 @@ var _ = Describe("Federated", func() { Describe(fmt.Sprintf("%q", typeConfigName), func() { It("should be created, read, updated and deleted successfully", func() { typeConfig, testObjectsFunc := getCrudTestInput(f, tl, typeConfigName, fixture) - crudTester, targetObject, overrides := initCrudTest(f, tl, typeConfig, testObjectsFunc) - crudTester.CheckLifecycle(targetObject, overrides) + crudTester, targetObject, overrides := initCrudTest(f, tl, f.KubeFedSystemNamespace(), typeConfig, testObjectsFunc) + crudTester.CheckLifecycle(targetObject, overrides, nil) }) for _, remoteStatusTypeName := range containedTypeNames { @@ -78,8 +78,8 @@ var _ = Describe("Federated", func() { tl.Logf("Show the content of the kubefedconfig file: '%v'", kubeFedConfig) typeConfig, testObjectsFunc := getCrudTestInput(f, tl, typeConfigName, fixture) - crudTester, targetObject, overrides := initCrudTest(f, tl, typeConfig, testObjectsFunc) - fedObject := crudTester.CheckCreate(targetObject, overrides) + crudTester, targetObject, overrides := initCrudTest(f, tl, f.KubeFedSystemNamespace(), typeConfig, testObjectsFunc) + fedObject := crudTester.CheckCreate(targetObject, overrides, nil) By("Checking the remote status filled for each federated resource for every cluster") tl.Logf("Checking the existence of a remote status for each fedObj in every cluster: %v", fedObject) @@ -105,12 +105,12 @@ var _ = Describe("Federated", func() { typeConfig, testObjectsFunc := getCrudTestInput(f, tl, typeConfigName, fixture) // Initialize the test without creating a federated namespace. - crudTester, targetObject, overrides := initCrudTestWithPropagation(f, tl, typeConfig, testObjectsFunc, false) + crudTester, targetObject, overrides := initCrudTestWithPropagation(f, tl, f.KubeFedSystemNamespace(), typeConfig, testObjectsFunc, false) kind := typeConfig.GetFederatedType().Kind By(fmt.Sprintf("Creating a %s whose containing namespace is not federated", kind)) - fedObject := crudTester.Create(targetObject, overrides) + fedObject := crudTester.Create(targetObject, overrides, nil) qualifiedName := util.NewQualifiedName(fedObject) @@ -143,7 +143,7 @@ var _ = Describe("Federated", func() { It("should have the managed label removed if not managed", func() { typeConfig, testObjectsFunc := getCrudTestInput(f, tl, typeConfigName, fixture) - crudTester, targetObject, _ := initCrudTest(f, tl, typeConfig, testObjectsFunc) + crudTester, targetObject, _ := initCrudTest(f, tl, f.KubeFedSystemNamespace(), typeConfig, testObjectsFunc) testClusters := crudTester.TestClusters() @@ -192,7 +192,7 @@ var _ = Describe("Federated", func() { It("should not be deleted if unlabeled", func() { typeConfig, testObjectsFunc := getCrudTestInput(f, tl, typeConfigName, fixture) - crudTester, targetObject, _ := initCrudTest(f, tl, typeConfig, testObjectsFunc) + crudTester, targetObject, _ := initCrudTest(f, tl, f.KubeFedSystemNamespace(), typeConfig, testObjectsFunc) testClusters := crudTester.TestClusters() @@ -315,13 +315,13 @@ func getCrudTestInput(f framework.KubeFedFramework, tl common.TestLogger, return typeConfig, testObjectsFunc } -func initCrudTest(f framework.KubeFedFramework, tl common.TestLogger, +func initCrudTest(f framework.KubeFedFramework, tl common.TestLogger, clustersNamespace string, typeConfig typeconfig.Interface, testObjectsFunc testObjectsAccessor) ( *common.FederatedTypeCrudTester, *unstructured.Unstructured, []interface{}) { - return initCrudTestWithPropagation(f, tl, typeConfig, testObjectsFunc, true) + return initCrudTestWithPropagation(f, tl, clustersNamespace, typeConfig, testObjectsFunc, true) } -func initCrudTestWithPropagation(f framework.KubeFedFramework, tl common.TestLogger, +func initCrudTestWithPropagation(f framework.KubeFedFramework, tl common.TestLogger, clustersNamespace string, typeConfig typeconfig.Interface, testObjectsFunc testObjectsAccessor, ensureNamespacePropagation bool) ( *common.FederatedTypeCrudTester, *unstructured.Unstructured, []interface{}) { @@ -343,7 +343,7 @@ func initCrudTestWithPropagation(f framework.KubeFedFramework, tl common.TestLog targetAPIResource := typeConfig.GetTargetType() testClusters := f.ClusterDynamicClients(&targetAPIResource, userAgent) - crudTester, err := common.NewFederatedTypeCrudTester(tl, typeConfig, kubeConfig, testClusters, framework.PollInterval, framework.TestContext.SingleCallTimeout) + crudTester, err := common.NewFederatedTypeCrudTester(tl, typeConfig, kubeConfig, testClusters, clustersNamespace, framework.PollInterval, framework.TestContext.SingleCallTimeout) if err != nil { tl.Fatalf("Error creating crudtester for %q: %v", federatedKind, err) } diff --git a/test/e2e/deleteoptions.go b/test/e2e/deleteoptions.go index 9abd7cb6bc..98c0c9ca70 100644 --- a/test/e2e/deleteoptions.go +++ b/test/e2e/deleteoptions.go @@ -39,8 +39,8 @@ var _ = Describe("DeleteOptions", func() { It("Deployment should be created and deleted successfully, but ReplicaSet that created by Deployment won't be deleted", func() { typeConfig, testObjectsFunc := getCrudTestInput(f, tl, typeConfigName, fixture) - crudTester, targetObject, overrides := initCrudTest(f, tl, typeConfig, testObjectsFunc) - fedObject := crudTester.CheckCreate(targetObject, overrides) + crudTester, targetObject, overrides := initCrudTest(f, tl, f.KubeFedSystemNamespace(), typeConfig, testObjectsFunc) + fedObject := crudTester.CheckCreate(targetObject, overrides, nil) By("Set PropagationPolicy property as 'Orphan' on the DeleteOptions for Federated Deployment") orphan := metav1.DeletePropagationOrphan diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index ac762ace64..9b7f103fdf 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -49,9 +49,9 @@ func RunE2ETests(t *testing.T) { var _ = ginkgo.SynchronizedBeforeSuite(func() []byte { // Run only on Ginkgo node 1 - if framework.TestContext.ScaleTest { - // Scale testing will initialize an in-memory control plane - // after the creation of a simulated federation. + // Some tests require simulated federation and will initialize an + // in-memory control plane. + if framework.TestContext.SimulateFederation { return nil } diff --git a/test/e2e/framework/test_context.go b/test/e2e/framework/test_context.go index 8ab5676d67..74bbd2cb02 100644 --- a/test/e2e/framework/test_context.go +++ b/test/e2e/framework/test_context.go @@ -38,6 +38,7 @@ type TestContextType struct { WaitForFinalization bool ScaleTest bool ScaleClusterCount int + SimulateFederation bool } func (t *TestContextType) RunControllers() bool { @@ -72,6 +73,7 @@ func registerFlags(t *TestContextType) { flag.BoolVar(&t.WaitForFinalization, "wait-for-finalization", true, "Whether the test suite should wait for finalization before stopping fixtures or exiting. Setting this to false will speed up test execution but likely result in wedged namespaces and is only recommended for disposeable clusters.") flag.BoolVar(&t.ScaleTest, "scale-test", false, "Whether the test suite should be configured for scale testing. Not compatible with most tests.") + flag.BoolVar(&t.SimulateFederation, "simulate-federation", false, "Whether the tests require a simulated federation.") flag.IntVar(&t.ScaleClusterCount, "scale-cluster-count", 1, "How many member clusters to simulate when scale testing.") } @@ -83,6 +85,9 @@ func validateFlags(t *TestContextType) { if t.ScaleTest { t.InMemoryControllers = true t.LimitedScope = true + // Scale testing will initialize an in-memory control plane + // after the creation of a simulated federation. + t.SimulateFederation = true // Scale testing will create a namespace per simulated cluster // and for large numbers of such namespaces the finalization // wait could be considerable. diff --git a/test/e2e/not_ready.go b/test/e2e/not_ready.go new file mode 100644 index 0000000000..1be30c890f --- /dev/null +++ b/test/e2e/not_ready.go @@ -0,0 +1,201 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "time" + + apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + runtimeclient "sigs.k8s.io/controller-runtime/pkg/client" + + fedv1b1 "sigs.k8s.io/kubefed/pkg/apis/core/v1beta1" + genericclient "sigs.k8s.io/kubefed/pkg/client/generic" + "sigs.k8s.io/kubefed/pkg/controller/util" + "sigs.k8s.io/kubefed/pkg/kubefedctl" + "sigs.k8s.io/kubefed/test/common" + "sigs.k8s.io/kubefed/test/e2e/framework" + + . "github.com/onsi/ginkgo" //nolint:stylecheck +) + +// WARNING This test modifies the runtime behavior of the sync +// controller. Running it concurrently with other tests that use the +// sync controller is likely to result in unexpected behavior. + +// This test is intended to validate CRUD operations even in the +// presence of not ready federated clusters if they are not the target +// of the operations. The test creates multiple self joins to simulate +// healthy and unhealthy clusters. +// +// Usually joining a cluster creates a namespace with the same name as +// the kubefed system namespace in the host cluster. To support +// multiple self-joins, the kubefed namespace in member clusters needs +// to vary by join. +// +// This test needs to run namespaced controllers since each cluster +// will be simulated by a single namespace in the host cluster. The +// name of each member cluster's namespace should match the name of +// the member cluster. +// +var _ = Describe("[NOT_READY] Simulated not-ready nodes", func() { + baseName := "unhealthy-test" + f := framework.NewKubeFedFramework(baseName) + + tl := framework.NewE2ELogger() + + typeConfigFixtures := common.TypeConfigFixturesOrDie(tl) + + It("should simulate unhealthy clusters", func() { + if !framework.TestContext.LimitedScope { + framework.Skipf("Simulated scale testing is not compatible with cluster-scoped federation.") + } + if !framework.TestContext.InMemoryControllers { + framework.Skipf("Simulated scale testing requires in-process controllers.") + } + + client := f.KubeClient(baseName) + + // Create the host cluster namespace + generateName := "unhealthy-host-" + hostNamespace, err := framework.CreateNamespace(client, generateName) + if err != nil { + tl.Fatalf("Error creating namespace: %v", err) + } + defer framework.DeleteNamespace(client, hostNamespace) + + // Reconfigure the test context to ensure the fixture setup + // will work correctly with the simulated federation. + framework.TestContext.KubeFedSystemNamespace = hostNamespace + hostConfig := f.KubeConfig() + + unhealthyCluster := "unhealthy" + _, err = kubefedctl.TestOnlyJoinClusterForNamespace(hostConfig, hostConfig, hostNamespace, unhealthyCluster, hostNamespace, unhealthyCluster, "", apiextv1.NamespaceScoped, false, false) + if err != nil { + tl.Fatalf("Error joining unhealthy cluster: %v", err) + } + + healthyCluster := "healthy" + _, err = kubefedctl.TestOnlyJoinClusterForNamespace(hostConfig, hostConfig, hostNamespace, healthyCluster, hostNamespace, healthyCluster, "", apiextv1.NamespaceScoped, false, false) + if err != nil { + tl.Fatalf("Error joining healthy cluster: %v", err) + } + hostClient, err := genericclient.New(hostConfig) + if err != nil { + tl.Fatalf("Failed to get kubefed clientset: %v", err) + } + healthyFedCluster := &unstructured.Unstructured{} + healthyFedCluster.SetGroupVersionKind(schema.GroupVersionKind{ + Kind: "KubeFedCluster", + Group: fedv1b1.SchemeGroupVersion.Group, + Version: fedv1b1.SchemeGroupVersion.Version, + }) + + err = hostClient.Get(context.Background(), healthyFedCluster, hostNamespace, healthyCluster) + if err != nil { + tl.Fatalf("Cannot get healthyCluster: %v", err) + } + addLabel(healthyFedCluster, "healthy", "true") + err = hostClient.Update(context.TODO(), healthyFedCluster) + if err != nil { + tl.Fatalf("Error updating label for healthy cluster: %v", err) + } + + // Override naming methods to allow the sync controller to + // work with a simulated federation environment. + oldNamespaceForCluster := util.NamespaceForCluster + util.NamespaceForCluster = func(clusterName, namespace string) string { + return clusterName + } + defer func() { + util.NamespaceForCluster = oldNamespaceForCluster + }() + + oldNamespaceForResource := util.NamespaceForResource + util.NamespaceForResource = func(resourceNamespace, fedNamespace string) string { + return fedNamespace + } + defer func() { + util.NamespaceForResource = oldNamespaceForResource + }() + oldQualifiedNameForCluster := util.QualifiedNameForCluster + util.QualifiedNameForCluster = func(clusterName string, qualifiedName util.QualifiedName) util.QualifiedName { + return util.QualifiedName{ + Name: qualifiedName.Name, + Namespace: clusterName, + } + } + defer func() { + util.QualifiedNameForCluster = oldQualifiedNameForCluster + }() + + // Ensure that the cluster controller is able to successfully + // health check the simulated clusters. + framework.SetUpControlPlane() + framework.WaitForUnmanagedClusterReadiness() + + fedCluster := &fedv1b1.KubeFedCluster{ + ObjectMeta: v1.ObjectMeta{ + Name: unhealthyCluster, + Namespace: hostNamespace, + }, + } + err = hostClient.Patch(context.Background(), fedCluster, runtimeclient.RawPatch(types.MergePatchType, []byte(`{"spec": {"apiEndpoint": "http://invalid_adress"}}`))) + if err != nil { + tl.Fatalf("Failed to patch kubefed cluster: %v", err) + } + + err = wait.Poll(time.Second*5, time.Second*30, func() (bool, error) { + cluster := &fedv1b1.KubeFedCluster{} + err := hostClient.Get(context.TODO(), cluster, hostNamespace, unhealthyCluster) + if err != nil { + tl.Fatalf("Failed to retrieve unhealthy cluster: %v", err) + } + return !util.IsClusterReady(&cluster.Status), nil + }) + if err != nil { + tl.Fatalf("Error waiting for unhealthy cluster: %v", err) + } + + // Enable federation of namespaces to ensure that the sync + // controller for a namespaced type will be able to start. + enableTypeFederation(tl, hostConfig, hostNamespace, "namespaces") + + // Enable federation of a namespaced type. + targetType := "secrets" + typeConfig := enableTypeFederation(tl, hostConfig, hostNamespace, targetType) + // Perform crud testing for the type + testObjectsFunc := func(namespace string, clusterNames []string) (*unstructured.Unstructured, []interface{}, error) { + fixture := typeConfigFixtures[targetType] + targetObject, err := common.NewTestTargetObject(typeConfig, namespace, fixture) + if err != nil { + return nil, nil, err + } + return targetObject, nil, err + } + crudTester, targetObject, overrides := initCrudTestWithPropagation(f, tl, hostNamespace, typeConfig, testObjectsFunc, false) + fedObject := crudTester.CheckCreate(targetObject, overrides, map[string]string{"healthy": "true"}) + crudTester.CheckStatusCreated(util.NewQualifiedName(fedObject)) + crudTester.CheckUpdate(fedObject) + crudTester.CheckDelete(fedObject, false) + }) +}) diff --git a/test/e2e/placement.go b/test/e2e/placement.go index 0ad650a86c..af0965ff65 100644 --- a/test/e2e/placement.go +++ b/test/e2e/placement.go @@ -88,8 +88,8 @@ var _ = Describe("Placement", func() { } return targetObject, nil, err } - crudTester, desiredTargetObject, _ := initCrudTest(f, tl, selectedTypeConfig, testObjectsFunc) - fedObject := crudTester.CheckCreate(desiredTargetObject, nil) + crudTester, desiredTargetObject, _ := initCrudTest(f, tl, f.KubeFedSystemNamespace(), selectedTypeConfig, testObjectsFunc) + fedObject := crudTester.CheckCreate(desiredTargetObject, nil, nil) defer func() { crudTester.CheckDelete(fedObject, false) }() diff --git a/test/e2e/scale.go b/test/e2e/scale.go index 6432cd573e..dbab8bde85 100644 --- a/test/e2e/scale.go +++ b/test/e2e/scale.go @@ -280,8 +280,8 @@ var _ = Describe("Simulated Scale", func() { } return targetObject, nil, err } - crudTester, targetObject, overrides := initCrudTestWithPropagation(f, tl, typeConfig, testObjectsFunc, false) - crudTester.CheckLifecycle(targetObject, overrides) + crudTester, targetObject, overrides := initCrudTestWithPropagation(f, tl, f.KubeFedSystemNamespace(), typeConfig, testObjectsFunc, false) + crudTester.CheckLifecycle(targetObject, overrides, nil) // Delete clusters to minimize errors logged by the cluster // controller.