Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(bug) better handle deleted cluster #734

Merged
merged 1 commit into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions controllers/clustercache/clustercache_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ var _ = BeforeSuite(func() {
Expect(testEnv.Create(context.TODO(), sveltosCRD)).To(Succeed())
Expect(waitForObject(context.TODO(), testEnv, sveltosCRD)).To(Succeed())

// Wait for synchronization
// Sometimes we otherwise get "no matches for kind "..." in version "lib.projectsveltos.io/v1beta1"
time.Sleep(2 * time.Second)

if synced := testEnv.GetCache().WaitForCacheSync(ctx); !synced {
time.Sleep(time.Second)
}
Expand Down
16 changes: 11 additions & 5 deletions controllers/clustersummary_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,12 +245,18 @@ func (r *ClusterSummaryReconciler) reconcileDelete(
return reconcile.Result{}, nil
}

err = r.removeResourceSummary(ctx, clusterSummaryScope, logger)
if err != nil {
logger.V(logs.LogInfo).Error(err, "failed to remove ResourceSummary.")
return reconcile.Result{Requeue: true, RequeueAfter: deleteRequeueAfter}, nil
if !isDeleted {
// if cluster is marked for deletion do not try to remove ResourceSummaries.
// those are only deployed in the managed cluster so no need to cleanup on a deleted cluster
err = r.removeResourceSummary(ctx, clusterSummaryScope, logger)
if err != nil {
logger.V(logs.LogInfo).Error(err, "failed to remove ResourceSummary.")
return reconcile.Result{Requeue: true, RequeueAfter: deleteRequeueAfter}, nil
}
}

// still call undeploy even if cluster is deleted. Sveltos might have deployed resources
// in the management cluster and those need to be removed.
err = r.undeploy(ctx, clusterSummaryScope, logger)
if err != nil {
// In DryRun mode it is expected to always get an error back
Expand Down Expand Up @@ -1062,7 +1068,7 @@ func (r *ClusterSummaryReconciler) canRemoveFinalizer(ctx context.Context,
}

if clusterSummaryScope.IsDryRunSync() {
logger.V(logs.LogInfo).Info("DryRun mode. Can only be deleted if ClusterProfile is marked for deletion.")
logger.V(logs.LogInfo).Info("DryRun mode. Can only be deleted if Profile/ClusterProfile is marked for deletion.")
// A ClusterSummary in DryRun mode can only be removed if also ClusterProfile is marked
// for deletion. Otherwise ClusterSummary has to stay and list what would happen if owning
// ClusterProfile is moved away from DryRun mode.
Expand Down
2 changes: 0 additions & 2 deletions controllers/clustersummary_deployer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,6 @@ var _ = Describe("ClustersummaryDeployer", func() {
var clusterName string

BeforeEach(func() {
logger = textlogger.NewLogger(textlogger.NewConfig())

namespace = randomString()

logger = textlogger.NewLogger(textlogger.NewConfig())
Expand Down
2 changes: 1 addition & 1 deletion controllers/controllers_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ var _ = BeforeSuite(func() {
Expect(waitForObject(context.TODO(), testEnv.Client, projectsveltosNs)).To(Succeed())

// Wait for synchronization
// Sometimes we otherwise get "no matches for kind "AddonCompliance" in version "lib.projectsveltos.io/v1beta1"
// Sometimes we otherwise get "no matches for kind ... in version "lib.projectsveltos.io/v1beta1"
time.Sleep(2 * time.Second)

controllers.InitializeManager(textlogger.NewLogger(textlogger.NewConfig()),
Expand Down
58 changes: 30 additions & 28 deletions controllers/handlers_helm.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,23 +210,9 @@ func undeployHelmCharts(ctx context.Context, c client.Client,
clusterType libsveltosv1beta1.ClusterType,
o deployer.Options, logger logr.Logger) error {

var cluster client.Object
cluster, err := clusterproxy.GetCluster(ctx, c, clusterNamespace,
clusterName, clusterType)
if err != nil {
return err
}

if !cluster.GetDeletionTimestamp().IsZero() {
// If Helm uninstall client encountered an unreachable cluster api-server, the Helm SDK would become stuck.
// Since cluster is being deleted, no need to clean up deployed helm charts. Simply returns and error
// and wait for cluster to be gone.
return errors.New("cluster is marked for deletion.")
}

// Get ClusterSummary that requested this
clusterSummary := &configv1beta1.ClusterSummary{}
err = c.Get(ctx, types.NamespacedName{Namespace: clusterNamespace, Name: applicant}, clusterSummary)
err := c.Get(ctx, types.NamespacedName{Namespace: clusterNamespace, Name: applicant}, clusterSummary)
if err != nil {
if apierrors.IsNotFound(err) {
return nil
Expand Down Expand Up @@ -260,7 +246,6 @@ func undeployHelmCharts(ctx context.Context, c client.Client,
func undeployHelmChartResources(ctx context.Context, c client.Client, clusterSummary *configv1beta1.ClusterSummary,
kubeconfig string, logger logr.Logger) error {

var releaseReports []configv1beta1.ReleaseReport
releaseReports, err := uninstallHelmCharts(ctx, c, clusterSummary, kubeconfig, logger)
if err != nil {
return err
Expand Down Expand Up @@ -366,7 +351,7 @@ func uninstallHelmCharts(ctx context.Context, c client.Client, clusterSummary *c
return nil, err
}
if currentRelease != nil && currentRelease.Status != string(release.StatusUninstalled) {
err = doUninstallRelease(clusterSummary, currentChart, kubeconfig, registryOptions, logger)
err = doUninstallRelease(ctx, clusterSummary, currentChart, kubeconfig, registryOptions, logger)
if err != nil {
if !errors.Is(err, driver.ErrReleaseNotFound) {
return nil, err
Expand Down Expand Up @@ -750,13 +735,13 @@ func handleUpgrade(ctx context.Context, clusterSummary *configv1beta1.ClusterSum
return report, nil
}

func handleUninstall(clusterSummary *configv1beta1.ClusterSummary, currentChart *configv1beta1.HelmChart,
kubeconfig string, registryOptions *registryClientOptions, logger logr.Logger,
) (*configv1beta1.ReleaseReport, error) {
func handleUninstall(ctx context.Context, clusterSummary *configv1beta1.ClusterSummary,
currentChart *configv1beta1.HelmChart, kubeconfig string, registryOptions *registryClientOptions,
logger logr.Logger) (*configv1beta1.ReleaseReport, error) {

var report *configv1beta1.ReleaseReport
logger.V(logs.LogDebug).Info("uninstall helm release")
err := doUninstallRelease(clusterSummary, currentChart, kubeconfig, registryOptions, logger)
err := doUninstallRelease(ctx, clusterSummary, currentChart, kubeconfig, registryOptions, logger)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -829,7 +814,7 @@ func handleChart(ctx context.Context, clusterSummary *configv1beta1.ClusterSumma
return nil, nil, err
}
} else if shouldUninstall(currentRelease, currentChart) {
report, err = handleUninstall(clusterSummary, currentChart, kubeconfig, registryOptions, logger)
report, err = handleUninstall(ctx, clusterSummary, currentChart, kubeconfig, registryOptions, logger)
if err != nil {
return nil, nil, err
}
Expand Down Expand Up @@ -1008,7 +993,7 @@ func checkDependencies(chartRequested *chart.Chart, installClient *action.Instal

// uninstallRelease removes helm release from a CAPI Cluster.
// No action in DryRun mode.
func uninstallRelease(clusterSummary *configv1beta1.ClusterSummary,
func uninstallRelease(ctx context.Context, clusterSummary *configv1beta1.ClusterSummary,
releaseName, releaseNamespace, kubeconfig string, registryOptions *registryClientOptions,
helmChart *configv1beta1.HelmChart, logger logr.Logger) error {

Expand All @@ -1017,6 +1002,21 @@ func uninstallRelease(clusterSummary *configv1beta1.ClusterSummary,
return nil
}

cluster, err := clusterproxy.GetCluster(ctx, getManagementClusterClient(), clusterSummary.Spec.ClusterNamespace,
clusterSummary.Spec.ClusterName, clusterSummary.Spec.ClusterType)
if err != nil {
if apierrors.IsNotFound(err) {
return nil
}
return err
}

if !cluster.GetDeletionTimestamp().IsZero() {
// if cluster is marked for deletion, no need to worry about removing helm charts deployed
// there.
return nil
}

logger = logger.WithValues("release", releaseName, "releaseNamespace", releaseNamespace)
logger.V(logs.LogDebug).Info("uninstalling release")

Expand Down Expand Up @@ -1437,8 +1437,9 @@ func doInstallRelease(ctx context.Context, clusterSummary *configv1beta1.Cluster

// doUninstallRelease uninstalls helm release from the CAPI Cluster.
// No action in DryRun mode.
func doUninstallRelease(clusterSummary *configv1beta1.ClusterSummary, requestedChart *configv1beta1.HelmChart,
kubeconfig string, registryOptions *registryClientOptions, logger logr.Logger) error {
func doUninstallRelease(ctx context.Context, clusterSummary *configv1beta1.ClusterSummary,
requestedChart *configv1beta1.HelmChart, kubeconfig string, registryOptions *registryClientOptions,
logger logr.Logger) error {

// No-op in DryRun mode
if clusterSummary.Spec.ClusterProfileSpec.SyncMode == configv1beta1.SyncModeDryRun {
Expand All @@ -1450,7 +1451,7 @@ func doUninstallRelease(clusterSummary *configv1beta1.ClusterSummary, requestedC
requestedChart.RepositoryURL,
requestedChart.RepositoryName))

return uninstallRelease(clusterSummary, requestedChart.ReleaseName, requestedChart.ReleaseNamespace,
return uninstallRelease(ctx, clusterSummary, requestedChart.ReleaseName, requestedChart.ReleaseNamespace,
kubeconfig, registryOptions, requestedChart, logger)
}

Expand Down Expand Up @@ -1548,8 +1549,9 @@ func undeployStaleReleases(ctx context.Context, c client.Client, clusterSummary
return nil, err
}

if err := uninstallRelease(clusterSummary, managedHelmReleases[i].Name, managedHelmReleases[i].Namespace,
kubeconfig, &registryClientOptions{}, nil, logger); err != nil {
if err := uninstallRelease(ctx, clusterSummary, managedHelmReleases[i].Name,
managedHelmReleases[i].Namespace, kubeconfig, &registryClientOptions{}, nil,
logger); err != nil {
return nil, err
}

Expand Down
18 changes: 18 additions & 0 deletions controllers/handlers_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -1044,6 +1044,24 @@ func undeployStaleResources(ctx context.Context, isMgmtCluster bool,

logger.V(logs.LogDebug).Info("removing stale resources")

if !isMgmtCluster {
cluster, err := clusterproxy.GetCluster(ctx, getManagementClusterClient(), clusterSummary.Spec.ClusterNamespace,
clusterSummary.Spec.ClusterName, clusterSummary.Spec.ClusterType)
if err != nil {
if apierrors.IsNotFound(err) {
return nil, nil
}
return nil, err
}

if !cluster.GetDeletionTimestamp().IsZero() {
// if cluster is marked for deletion, no need to worry about removing resources deployed
// there. This check applies only for managed cluster. Resources deployed in the management
// cluster are still removed
return nil, nil
}
}

profile, _, err := configv1beta1.GetProfileOwnerAndTier(ctx, getManagementClusterClient(), clusterSummary)
if err != nil {
return nil, err
Expand Down
61 changes: 49 additions & 12 deletions controllers/profile_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,7 @@ func updateClusterReports(ctx context.Context, c client.Client, profileScope *sc
}
} else {
// delete all ClusterReports created by this ClusterProfile/Profile instance
err := cleanClusterReports(ctx, c, profileScope.Profile)
err := cleanClusterReports(ctx, c, profileScope)
if err != nil {
profileScope.Logger.Error(err, "failed to delete ClusterReports")
return err
Expand All @@ -756,7 +756,7 @@ func createClusterReports(ctx context.Context, c client.Client, profileScope *sc
for i := range profileScope.GetStatus().MatchingClusterRefs {
cluster := profileScope.GetStatus().MatchingClusterRefs[i]

// Create ClusterConfiguration if not already existing.
// Create ClusterReport if not already existing.
err := createClusterReport(ctx, c, profileScope.Profile, &cluster)
if err != nil {
return err
Expand All @@ -772,15 +772,29 @@ func createClusterReport(ctx context.Context, c client.Client, profile client.Ob

clusterType := clusterproxy.GetClusterType(cluster)

lbls := map[string]string{
configv1beta1.ClusterNameLabel: cluster.Name,
configv1beta1.ClusterTypeLabel: string(clusterproxy.GetClusterType(cluster)),
}
if profile.GetObjectKind().GroupVersionKind().Kind == configv1beta1.ClusterProfileKind {
lbls[ClusterProfileLabelName] = profile.GetName()
} else {
lbls[ProfileLabelName] = profile.GetName()
}

clusterReport := &configv1beta1.ClusterReport{
ObjectMeta: metav1.ObjectMeta{
Namespace: cluster.Namespace,
Name: getClusterReportName(profile.GetObjectKind().GroupVersionKind().Kind, profile.GetName(),
cluster.Name, clusterType),
Labels: map[string]string{
ClusterProfileLabelName: profile.GetName(),
configv1beta1.ClusterNameLabel: cluster.Name,
configv1beta1.ClusterTypeLabel: string(clusterproxy.GetClusterType(cluster)),
Labels: lbls,
OwnerReferences: []metav1.OwnerReference{
{
Kind: profile.GetObjectKind().GroupVersionKind().Kind,
UID: profile.GetUID(),
APIVersion: configv1beta1.GroupVersion.String(),
Name: profile.GetName(),
},
},
},
Spec: configv1beta1.ClusterReportSpec{
Expand All @@ -800,15 +814,27 @@ func createClusterReport(ctx context.Context, c client.Client, profile client.Ob
}

// cleanClusterReports deletes ClusterReports created by this ClusterProfile/Profile instance.
func cleanClusterReports(ctx context.Context, c client.Client, profile client.Object) error {
func cleanClusterReports(ctx context.Context, c client.Client, profileScope *scope.ProfileScope) error {
listOptions := []client.ListOption{}

if profile.GetObjectKind().GroupVersionKind().Kind == configv1beta1.ClusterProfileKind {
listOptions = append(listOptions, client.MatchingLabels{ClusterProfileLabelName: profile.GetName()})
if profileScope.GetKind() == configv1beta1.ClusterProfileKind {
listOptions = append(listOptions, client.MatchingLabels{ClusterProfileLabelName: profileScope.Name()})
} else {
listOptions = append(listOptions,
client.MatchingLabels{ProfileLabelName: profile.GetName()},
client.InNamespace(profile.GetNamespace()))
client.MatchingLabels{ProfileLabelName: profileScope.Name()},
client.InNamespace(profileScope.Namespace()))
}

matchingClusterMap := make(map[string]bool)

info := func(namespace, clusterReportName string) string {
return fmt.Sprintf("%s--%s", namespace, clusterReportName)
}

for i := range profileScope.GetStatus().MatchingClusterRefs {
ref := &profileScope.GetStatus().MatchingClusterRefs[i]
matchingClusterMap[info(ref.Namespace, getClusterReportName(profileScope.GetKind(),
profileScope.Name(), ref.Name, clusterproxy.GetClusterType(ref)))] = true
}

clusterReportList := &configv1beta1.ClusterReportList{}
Expand All @@ -819,6 +845,11 @@ func cleanClusterReports(ctx context.Context, c client.Client, profile client.Ob

for i := range clusterReportList.Items {
cr := &clusterReportList.Items[i]

if _, ok := matchingClusterMap[info(cr.Namespace, cr.Name)]; ok {
continue
}

err = c.Delete(ctx, cr)
if err != nil {
if !apierrors.IsNotFound(err) {
Expand Down Expand Up @@ -995,7 +1026,7 @@ func reconcileDeleteCommon(ctx context.Context, c client.Client, profileScope *s
}

profile := profileScope.Profile
if err := cleanClusterReports(ctx, c, profile); err != nil {
if err := cleanClusterReports(ctx, c, profileScope); err != nil {
logger.V(logs.LogInfo).Error(err, "failed to clean ClusterReports")
return err
}
Expand Down Expand Up @@ -1045,6 +1076,12 @@ func reconcileNormalCommon(ctx context.Context, c client.Client, profileScope *s
return err
}

// For Sveltos/Cluster not matching, remove ClusterReports
if err := cleanClusterReports(ctx, c, profileScope); err != nil {
logger.V(logs.LogInfo).Error(err, "failed to clean ClusterConfigurations")
return err
}

return nil
}

Expand Down
24 changes: 20 additions & 4 deletions controllers/profile_utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -887,10 +887,18 @@ var _ = Describe("Profile: Reconciler", func() {

c := fake.NewClientBuilder().WithScheme(scheme).WithStatusSubresource(initObjects...).WithObjects(initObjects...).Build()

Expect(controllers.CleanClusterReports(context.TODO(), c, clusterProfile)).To(Succeed())
clusterProfileScope, err := scope.NewProfileScope(scope.ProfileScopeParams{
Client: c,
Logger: logger,
Profile: clusterProfile,
ControllerName: "clusterprofile",
})
Expect(err).To(BeNil())

Expect(controllers.CleanClusterReports(context.TODO(), c, clusterProfileScope)).To(Succeed())
// ClusterReport1 is gone
currentClusterReport := &configv1beta1.ClusterReport{}
err := c.Get(context.TODO(),
err = c.Get(context.TODO(),
types.NamespacedName{Namespace: clusterReport1.Namespace, Name: clusterReport1.Name}, currentClusterReport)
Expect(err).ToNot(BeNil())
Expect(apierrors.IsNotFound(err)).To(BeTrue())
Expand Down Expand Up @@ -969,10 +977,18 @@ var _ = Describe("Profile: Reconciler", func() {

c := fake.NewClientBuilder().WithScheme(scheme).WithStatusSubresource(initObjects...).WithObjects(initObjects...).Build()

Expect(controllers.CleanClusterReports(context.TODO(), c, &profile)).To(Succeed())
profileScope, err := scope.NewProfileScope(scope.ProfileScopeParams{
Client: c,
Logger: logger,
Profile: &profile,
ControllerName: "profile",
})
Expect(err).To(BeNil())

Expect(controllers.CleanClusterReports(context.TODO(), c, profileScope)).To(Succeed())
// ClusterReport1 is gone
currentClusterReport := &configv1beta1.ClusterReport{}
err := c.Get(context.TODO(),
err = c.Get(context.TODO(),
types.NamespacedName{Namespace: clusterReport1.Namespace, Name: clusterReport1.Name}, currentClusterReport)
Expect(err).ToNot(BeNil())
Expect(apierrors.IsNotFound(err)).To(BeTrue())
Expand Down