Skip to content

Commit

Permalink
Ensure all hubble Pods are gone before proceeding with uninstall --wait.
Browse files Browse the repository at this point in the history
The last commit added using foreground cascading delete when doing uninstall with --wait.
However, other issues that can occur when reusing clusters following uninstall are:

* Old endpoint state written to disk being restored upon reinstall.
* CNI deletes can be written to disk in a local queue if Cilium Agent CNI is down, resulting in potential error logs when re-installing cilium and replaying queued CNI DEL commands.

When uninstalling with --wait, put disabling Hubble into a seperate uninstall step, which then blocks until there are no more Hubble Pods running.
This ensures that Hubble Pods can fully terminate via Cilium without the above situations happening.
Because Helm hubble disable uses Helm upgrade, we cannot rely on cascading foreground delete - so we just poll k8s until all Hubble Pods are gone.

Signed-off-by: Tom Hadlaw <[email protected]>
  • Loading branch information
tommyp1ckles committed Feb 28, 2024
1 parent c025a14 commit 1b96d74
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 3 deletions.
7 changes: 4 additions & 3 deletions connectivity/check/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -975,9 +975,10 @@ func (ct *ConnectivityTest) UninstallResources(ctx context.Context, wait bool) {
ct.Logf("🔥 Deleting %s namespace...", ct.params.TestNamespace)
ct.client.DeleteNamespace(ctx, ct.params.TestNamespace, metav1.DeleteOptions{})

// To avoid cases where test pods are stuck in terminating state because
// cni (cilium) pods were deleted sooner, wait until test pods are deleted
// before moving onto deleting cilium pods.
// If test Pods are not deleted prior to uninstalling Cilium then the CNI deletes
// may be queued by cilium-cni. This can cause error to be logged when re-installing
// Cilium later.
// Thus we wait for all cilium-test Pods to fully terminate before proceeding.
if wait {
ct.Logf("⌛ Waiting for %s namespace to be terminated...", ct.params.TestNamespace)
for {
Expand Down
3 changes: 3 additions & 0 deletions hubble/hubble.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ type Parameters struct {

// UIOpenBrowser will automatically open browser if true
UIOpenBrowser bool

Wait bool
}

func (p *Parameters) Log(format string, a ...interface{}) {
Expand All @@ -55,6 +57,7 @@ func EnableWithHelm(ctx context.Context, k8sClient *k8s.Client, params Parameter
Values: vals,
ResetValues: false,
ReuseValues: true,
Wait: params.Wait,
}
_, err = helm.Upgrade(ctx, k8sClient.HelmActionConfig, upgradeParams)
return err
Expand Down
39 changes: 39 additions & 0 deletions internal/cli/cmd/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,15 @@ import (
"io"
"os"

"github.com/cilium/cilium/pkg/inctimer"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
k8sErrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/cilium/cilium-cli/connectivity/check"
"github.com/cilium/cilium-cli/defaults"
"github.com/cilium/cilium-cli/hubble"
"github.com/cilium/cilium-cli/install"
)

Expand Down Expand Up @@ -114,6 +118,41 @@ func newCmdUninstallWithHelm() *cobra.Command {
cc.UninstallResources(ctx, params.Wait)
}
uninstaller := install.NewK8sUninstaller(k8sClient, params)
var hubbleParams = hubble.Parameters{
Writer: os.Stdout,
Wait: true,
}

if params.Wait {
// Disable Hubble, then wait for Pods to terminate before uninstalling Cilium.
// This guarantees that relay Pods are terminated fully via Cilium (rather than
// being queued for deletion) before uninstalling Cilium.
fmt.Printf("⌛ Waiting to disable Hubble before uninstalling Cilium\n")
if err := hubble.DisableWithHelm(ctx, k8sClient, hubbleParams); err != nil {
fmt.Printf("⚠ ️ Failed to disable Hubble prior to uninstalling Cilium: %s\n", err)
}
for {
ps, err := k8sClient.ListPods(ctx, hubbleParams.Namespace, metav1.ListOptions{
LabelSelector: "k8s-app=hubble-relay",
})
if err != nil {
if k8sErrors.IsNotFound(err) {
break
}
fatalf("Unable to list pods waiting for hubble-relay to stop: %s", err)
}
if len(ps.Items) == 0 {
break
}
select {
case <-inctimer.After(defaults.WaitRetryInterval):
case <-ctx.Done():
fatalf("Timed out waiting for Hubble Pods to terminate")
}
}
}

fmt.Printf("⌛ Uninstalling Cilium\n")
if err := uninstaller.UninstallWithHelm(ctx, k8sClient.HelmActionConfig); err != nil {
fatalf("Unable to uninstall Cilium: %s", err)
}
Expand Down

0 comments on commit 1b96d74

Please sign in to comment.