Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

multinode: fix control plane not ready on restart #8698

Merged
merged 8 commits into from
Jul 16, 2020
64 changes: 34 additions & 30 deletions pkg/minikube/bootstrapper/kubeadm/kubeadm.go
Original file line number Diff line number Diff line change
Expand Up @@ -396,16 +396,15 @@ func (k *Bootstrapper) client(ip string, port int) (*kubernetes.Clientset, error
func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, timeout time.Duration) error {
start := time.Now()

if !n.ControlPlane {
glog.Infof("%s is not a control plane, nothing to wait for", n.Name)
return nil
}

register.Reg.SetStep(register.VerifyingKubernetes)
out.T(out.HealthCheck, "Verifying Kubernetes components...")

// TODO: #7706: for better performance we could use k.client inside minikube to avoid asking for external IP:PORT
hostname, _, port, err := driver.ControlPlaneEndpoint(&cfg, &n, cfg.Driver)
cp, err := config.PrimaryControlPlane(&cfg)
if err != nil {
return errors.Wrap(err, "get primary control plane")
}
hostname, _, port, err := driver.ControlPlaneEndpoint(&cfg, &cp, cfg.Driver)
if err != nil {
return errors.Wrap(err, "get control plane endpoint")
}
Expand All @@ -430,31 +429,33 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time
return errors.Wrapf(err, "create runtme-manager %s", cfg.KubernetesConfig.ContainerRuntime)
}

if cfg.VerifyComponents[kverify.APIServerWaitKey] {
if err := kverify.WaitForAPIServerProcess(cr, k, cfg, k.c, start, timeout); err != nil {
return errors.Wrap(err, "wait for apiserver proc")
}
if n.ControlPlane {
if cfg.VerifyComponents[kverify.APIServerWaitKey] {
if err := kverify.WaitForAPIServerProcess(cr, k, cfg, k.c, start, timeout); err != nil {
return errors.Wrap(err, "wait for apiserver proc")
}

if err := kverify.WaitForHealthyAPIServer(cr, k, cfg, k.c, client, start, hostname, port, timeout); err != nil {
return errors.Wrap(err, "wait for healthy API server")
if err := kverify.WaitForHealthyAPIServer(cr, k, cfg, k.c, client, start, hostname, port, timeout); err != nil {
return errors.Wrap(err, "wait for healthy API server")
}
}
}

if cfg.VerifyComponents[kverify.SystemPodsWaitKey] {
if err := kverify.WaitForSystemPods(cr, k, cfg, k.c, client, start, timeout); err != nil {
return errors.Wrap(err, "waiting for system pods")
if cfg.VerifyComponents[kverify.SystemPodsWaitKey] {
if err := kverify.WaitForSystemPods(cr, k, cfg, k.c, client, start, timeout); err != nil {
return errors.Wrap(err, "waiting for system pods")
}
}
}

if cfg.VerifyComponents[kverify.DefaultSAWaitKey] {
if err := kverify.WaitForDefaultSA(client, timeout); err != nil {
return errors.Wrap(err, "waiting for default service account")
if cfg.VerifyComponents[kverify.DefaultSAWaitKey] {
if err := kverify.WaitForDefaultSA(client, timeout); err != nil {
return errors.Wrap(err, "waiting for default service account")
}
}
}

if cfg.VerifyComponents[kverify.AppsRunningKey] {
if err := kverify.WaitForAppsRunning(client, kverify.AppsRunningList, timeout); err != nil {
return errors.Wrap(err, "waiting for apps_running")
if cfg.VerifyComponents[kverify.AppsRunningKey] {
if err := kverify.WaitForAppsRunning(client, kverify.AppsRunningList, timeout); err != nil {
return errors.Wrap(err, "waiting for apps_running")
}
}
}

Expand Down Expand Up @@ -730,7 +731,7 @@ func (k *Bootstrapper) SetupCerts(k8s config.KubernetesConfig, n config.Node) er
return err
}

// UpdateCluster updates the cluster.
// UpdateCluster updates the control plane with cluster-level info.
func (k *Bootstrapper) UpdateCluster(cfg config.ClusterConfig) error {
images, err := images.Kubeadm(cfg.KubernetesConfig.ImageRepository, cfg.KubernetesConfig.KubernetesVersion)
if err != nil {
Expand All @@ -753,11 +754,14 @@ func (k *Bootstrapper) UpdateCluster(cfg config.ClusterConfig) error {
}
}

for _, n := range cfg.Nodes {
err := k.UpdateNode(cfg, n, r)
if err != nil {
return errors.Wrap(err, "updating node")
}
cp, err := config.PrimaryControlPlane(&cfg)
if err != nil {
return errors.Wrap(err, "getting control plane")
}

err = k.UpdateNode(cfg, cp, r)
if err != nil {
return errors.Wrap(err, "updating control plane")
}

return nil
Expand Down
36 changes: 35 additions & 1 deletion pkg/minikube/node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ package node

import (
"fmt"
"os/exec"

"github.com/golang/glog"
"github.com/pkg/errors"
"github.com/spf13/viper"

"k8s.io/minikube/pkg/kapi"
"k8s.io/minikube/pkg/minikube/config"
"k8s.io/minikube/pkg/minikube/driver"
"k8s.io/minikube/pkg/minikube/machine"
Expand Down Expand Up @@ -66,12 +68,44 @@ func Delete(cc config.ClusterConfig, name string) (*config.Node, error) {
return n, errors.Wrap(err, "retrieve")
}

m := driver.MachineName(cc, *n)
api, err := machine.NewAPIClient()
if err != nil {
return n, err
}

err = machine.DeleteHost(api, driver.MachineName(cc, *n))
// grab control plane to use kubeconfig
host, err := machine.LoadHost(api, cc.Name)
if err != nil {
return n, err
}

runner, err := machine.CommandRunner(host)
if err != nil {
return n, err
}

// kubectl drain
kubectl := kapi.KubectlBinaryPath(cc.KubernetesConfig.KubernetesVersion)
cmd := exec.Command("sudo", "KUBECONFIG=/var/lib/minikube/kubeconfig", kubectl, "drain", m)
if _, err := runner.RunCmd(cmd); err != nil {
glog.Warningf("unable to scale coredns replicas to 1: %v", err)
} else {
glog.Infof("successfully scaled coredns replicas to 1")
}

// kubectl delete
client, err := kapi.Client(cc.Name)
if err != nil {
return n, err
}

err = client.CoreV1().Nodes().Delete(m, nil)
if err != nil {
return n, err
}

err = machine.DeleteHost(api, m)
if err != nil {
return n, err
}
Expand Down
10 changes: 5 additions & 5 deletions pkg/minikube/node/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,11 +162,6 @@ func Start(starter Starter, apiServer bool) (*kubeconfig.Settings, error) {
prepareNone()
}

glog.Infof("Will wait %s for node ...", waitTimeout)
if err := bs.WaitForNode(*starter.Cfg, *starter.Node, viper.GetDuration(waitTimeout)); err != nil {
return nil, errors.Wrapf(err, "wait %s for node", viper.GetDuration(waitTimeout))
}

} else {
if err := bs.UpdateNode(*starter.Cfg, *starter.Node, cr); err != nil {
return nil, errors.Wrap(err, "update node")
Expand Down Expand Up @@ -197,6 +192,11 @@ func Start(starter Starter, apiServer bool) (*kubeconfig.Settings, error) {
}
}

glog.Infof("Will wait %s for node ...", waitTimeout)
if err := bs.WaitForNode(*starter.Cfg, *starter.Node, viper.GetDuration(waitTimeout)); err != nil {
return nil, errors.Wrapf(err, "wait %s for node", viper.GetDuration(waitTimeout))
}

glog.Infof("waiting for startup goroutines ...")
wg.Wait()

Expand Down
28 changes: 24 additions & 4 deletions test/integration/multinode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"os/exec"
"strings"
"testing"
"time"
)

func TestMultiNode(t *testing.T) {
Expand Down Expand Up @@ -179,7 +180,7 @@ func validateStartNodeAfterStop(ctx context.Context, t *testing.T, profile strin
}

func validateStopMultiNodeCluster(ctx context.Context, t *testing.T, profile string) {
// Run minikube node stop on that node
// Run minikube stop on the cluster
rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "stop"))
if err != nil {
t.Errorf("node stop returned an error. args %q: %v", rr.Command(), err)
Expand Down Expand Up @@ -218,7 +219,7 @@ func validateRestartMultiNodeCluster(ctx context.Context, t *testing.T, profile
}
}
// Restart a full cluster with minikube start
startArgs := append([]string{"start", "-p", profile}, StartArgs()...)
startArgs := append([]string{"start", "-p", profile, "--wait=true"}, StartArgs()...)
rr, err := Run(t, exec.CommandContext(ctx, Target(), startArgs...))
if err != nil {
t.Fatalf("failed to start cluster. args %q : %v", rr.Command(), err)
Expand All @@ -231,11 +232,30 @@ func validateRestartMultiNodeCluster(ctx context.Context, t *testing.T, profile
}

if strings.Count(rr.Stdout.String(), "host: Running") != 2 {
t.Errorf("status says both hosts are not running: args %q: %v", rr.Command(), rr.Stdout.String())
t.Errorf("status says both hosts are not running: args %q: %v", rr.Command(), rr.Output())
}

if strings.Count(rr.Stdout.String(), "kubelet: Running") != 2 {
t.Errorf("status says both kubelets are not running: args %q: %v", rr.Command(), rr.Stdout.String())
t.Errorf("status says both kubelets are not running: args %q: %v", rr.Command(), rr.Output())
}

time.Sleep(Seconds(30))

// Make sure kubectl reports that all nodes are ready
rr, err = Run(t, exec.CommandContext(ctx, "kubectl", "get", "nodes"))
if err != nil {
t.Fatalf("failed to run kubectl get nodes. args %q : %v", rr.Command(), err)
}
if strings.Count(rr.Stdout.String(), "NotReady") > 0 {
t.Errorf("expected 2 nodes to be Ready, got %v", rr.Output())
}

rr, err = Run(t, exec.CommandContext(ctx, "kubectl", "get", "nodes", "-o", `go-template='{{range .items}}{{range .status.conditions}}{{if eq .type "Ready"}} {{.status}}{{"\n"}}{{end}}{{end}}{{end}}'`))
if err != nil {
t.Fatalf("failed to run kubectl get nodes. args %q : %v", rr.Command(), err)
}
if strings.Count(rr.Stdout.String(), "True") != 2 {
t.Errorf("expected 2 nodes Ready status to be True, got %v", rr.Output())
}
}

Expand Down