From 97d96a5cc9c864fffc6a4f9e5cae107be3048e46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan-Luis=20de=20Sousa-Valadas=20Casta=C3=B1o?= Date: Thu, 14 Mar 2024 13:47:38 +0100 Subject: [PATCH 1/2] Increase the default timeout to 6 minutes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We see the timeouts are quite tight in general, specially the remove APIs timeout in particular. As discussed in the call, increase the default timeout from 4 to 6 minutes Signed-off-by: Juan-Luis de Sousa-Valadas CastaƱo (cherry picked from commit e2b6eb3012596373db2203edd29ec32ded76374f) --- inttest/Makefile | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/inttest/Makefile b/inttest/Makefile index d7f77f3233d3..f3fea22931df 100644 --- a/inttest/Makefile +++ b/inttest/Makefile @@ -66,18 +66,9 @@ check-conformance: bin/sonobuoy get-conformance-results: bin/sonobuoy $(realpath bin/sonobuoy) retrieve -TIMEOUT ?= 4m +TIMEOUT ?= 6m check-ctr: TIMEOUT=10m -check-byocri: TIMEOUT=5m -# readiness check for metric tests takes between around 5 and 6 minutes. -check-metrics: TIMEOUT=6m -check-metricsscraper: TIMEOUT=6m - -check-calico: TIMEOUT=6m - -# Establishing konnectivity tunnels with the LB in place takes a while, thus a bit longer timeout for the smoke -check-customports: TIMEOUT=6m # Config change smoke runs actually many cases hence a bit longer timeout check-configchange: TIMEOUT=8m @@ -88,7 +79,6 @@ check-backup: TIMEOUT=10m # Autopilot 3x3 HA test can take a while to run check-ap-ha3x3: K0S_UPDATE_FROM_BIN ?= ../k0s check-ap-ha3x3: K0S_UPDATE_FROM_PATH ?= $(realpath $(K0S_UPDATE_FROM_BIN)) -check-ap-ha3x3: TIMEOUT=6m check-customports-dynamicconfig: export K0S_ENABLE_DYNAMIC_CONFIG=true check-customports-dynamicconfig: TEST_PACKAGE=customports From a1d7798a93e7e467bfb4bf30e8bc7afa5ce97a28 Mon Sep 17 00:00:00 2001 From: Jussi Nummelin Date: Tue, 21 May 2024 14:47:12 +0300 Subject: [PATCH 2/2] Use dedicated leaspool for worker config component With this change the first controller on new version can apply the needed versioned resources as it will always be guaranteed to become the leader. Signed-off-by: Jussi Nummelin (cherry picked from commit e5a271bc4a1b97d3000f5120413c7c611db4ea30) (cherry picked from commit a9e79f6ecd6929bc27702a4ab7080ba5bc51e90c) Signed-off-by: Tom Wieczorek --- cmd/controller/controller.go | 11 +- inttest/Makefile | 3 + inttest/Makefile.variables | 1 + .../controllerworker_test.go | 209 ++++++++++++++++++ .../controller/leaderelector/leasepool.go | 6 +- 5 files changed, 226 insertions(+), 4 deletions(-) create mode 100644 inttest/ap-controllerworker/controllerworker_test.go diff --git a/cmd/controller/controller.go b/cmd/controller/controller.go index a5f25e25e120..49fc5afe3c31 100644 --- a/cmd/controller/controller.go +++ b/cmd/controller/controller.go @@ -259,7 +259,9 @@ func (c *command) start(ctx context.Context) error { // One leader elector per controller if !c.SingleNode { - leaderElector = leaderelector.NewLeasePool(adminClientFactory) + // The name used to be hardcoded in the component itself + // At some point we need to rename this. + leaderElector = leaderelector.NewLeasePool(adminClientFactory, "k0s-endpoint-reconciler") } else { leaderElector = &leaderelector.Dummy{Leader: true} } @@ -468,7 +470,12 @@ func (c *command) start(ctx context.Context) error { } if !slices.Contains(c.DisableComponents, constant.WorkerConfigComponentName) { - reconciler, err := workerconfig.NewReconciler(c.K0sVars, nodeConfig.Spec, adminClientFactory, leaderElector, enableKonnectivity) + // Create new dedicated leasepool for worker config reconciler + leaseName := fmt.Sprintf("k0s-%s-%s", constant.WorkerConfigComponentName, constant.KubernetesMajorMinorVersion) + workerConfigLeasePool := leaderelector.NewLeasePool(adminClientFactory, leaseName) + clusterComponents.Add(ctx, workerConfigLeasePool) + + reconciler, err := workerconfig.NewReconciler(c.K0sVars, nodeConfig.Spec, adminClientFactory, workerConfigLeasePool, enableKonnectivity) if err != nil { return err } diff --git a/inttest/Makefile b/inttest/Makefile index f3fea22931df..8ae94e5a9fa7 100644 --- a/inttest/Makefile +++ b/inttest/Makefile @@ -80,6 +80,9 @@ check-backup: TIMEOUT=10m check-ap-ha3x3: K0S_UPDATE_FROM_BIN ?= ../k0s check-ap-ha3x3: K0S_UPDATE_FROM_PATH ?= $(realpath $(K0S_UPDATE_FROM_BIN)) +check-ap-controllerworker: K0S_UPDATE_FROM_BIN ?= ../k0s +check-ap-controllerworker: K0S_UPDATE_FROM_PATH ?= $(realpath $(K0S_UPDATE_FROM_BIN)) + check-customports-dynamicconfig: export K0S_ENABLE_DYNAMIC_CONFIG=true check-customports-dynamicconfig: TEST_PACKAGE=customports diff --git a/inttest/Makefile.variables b/inttest/Makefile.variables index 1040bc3b8393..a25aa93bf59e 100644 --- a/inttest/Makefile.variables +++ b/inttest/Makefile.variables @@ -3,6 +3,7 @@ smoketests := \ check-addons \ check-airgap \ check-ap-airgap \ + check-ap-controllerworker \ check-ap-ha3x3 \ check-ap-platformselect \ check-ap-quorum \ diff --git a/inttest/ap-controllerworker/controllerworker_test.go b/inttest/ap-controllerworker/controllerworker_test.go new file mode 100644 index 000000000000..bc302f11d9c2 --- /dev/null +++ b/inttest/ap-controllerworker/controllerworker_test.go @@ -0,0 +1,209 @@ +// Copyright 2024 k0s authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package controllerworker + +import ( + "fmt" + "strings" + "testing" + "time" + + "github.com/k0sproject/k0s/inttest/common" + aptest "github.com/k0sproject/k0s/inttest/common/autopilot" + + apconst "github.com/k0sproject/k0s/pkg/autopilot/constant" + appc "github.com/k0sproject/k0s/pkg/autopilot/controller/plans/core" + "github.com/k0sproject/k0s/pkg/constant" + "github.com/k0sproject/k0s/pkg/kubernetes/watch" + + "github.com/stretchr/testify/suite" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +type controllerworkerSuite struct { + common.BootlooseSuite +} + +const k0sConfigWithMultiController = ` +spec: + api: + address: %s + storage: + etcd: + peerAddress: %s +` + +const oldVersion = "v1.29.4+k0s.0" + +// SetupTest prepares the controller and filesystem, getting it into a consistent +// state which we can run tests against. +func (s *controllerworkerSuite) SetupTest() { + ctx := s.Context() + // ipAddress := s.GetControllerIPAddress(0) + var joinToken string + + for idx := 0; idx < s.BootlooseSuite.ControllerCount; idx++ { + nodeName, require := s.ControllerNode(idx), s.Require() + address := s.GetControllerIPAddress(idx) + + s.Require().NoError(s.WaitForSSH(nodeName, 2*time.Minute, 1*time.Second)) + ssh, err := s.SSH(ctx, nodeName) + require.NoError(err) + defer ssh.Disconnect() + s.PutFile(nodeName, "/tmp/k0s.yaml", fmt.Sprintf(k0sConfigWithMultiController, address, address)) + // Install older version of k0s + downloadCmd := fmt.Sprintf("curl -sSfL get.k0s.sh | K0S_VERSION=%s sh", oldVersion) + out, err := ssh.ExecWithOutput(ctx, downloadCmd) + if err != nil { + s.T().Logf("error getting k0s: %s", out) + } + require.NoError(err) + s.T().Logf("downloaded succesfully: %s", out) + // Note that the token is intentionally empty for the first controller + args := []string{ + "--debug", + "--disable-components=metrics-server,helm,konnectivity-server", + "--enable-worker", + "--config=/tmp/k0s.yaml", + } + if joinToken != "" { + s.PutFile(nodeName, "/tmp/token", joinToken) + args = append(args, "--token-file=/tmp/token") + } + out, err = ssh.ExecWithOutput(ctx, "k0s install controller "+strings.Join(args, " ")) + if err != nil { + s.T().Logf("error installing k0s: %s", out) + } + require.NoError(err) + _, err = ssh.ExecWithOutput(ctx, "k0s start") + require.NoError(err) + // s.Require().NoError(s.InitController(idx, "--config=/tmp/k0s.yaml", "--disable-components=metrics-server", "--enable-worker", joinToken)) + s.Require().NoError(s.WaitJoinAPI(nodeName)) + kc, err := s.KubeClient(nodeName) + require.NoError(err) + require.NoError(s.WaitForNodeReady(nodeName, kc)) + + node, err := kc.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) + require.NoError(err) + require.Equal("v1.29.4+k0s", node.Status.NodeInfo.KubeletVersion) + + client, err := s.ExtensionsClient(s.ControllerNode(0)) + s.Require().NoError(err) + + s.Require().NoError(aptest.WaitForCRDByName(ctx, client, "plans")) + s.Require().NoError(aptest.WaitForCRDByName(ctx, client, "controlnodes")) + + // With the primary controller running, create the join token for subsequent controllers. + if idx == 0 { + token, err := s.GetJoinToken("controller") + s.Require().NoError(err) + joinToken = token + } + } + + // Final sanity -- ensure all nodes see each other according to etcd + for idx := 0; idx < s.BootlooseSuite.ControllerCount; idx++ { + s.Require().Len(s.GetMembers(idx), s.BootlooseSuite.ControllerCount) + } +} + +// TestApply applies a well-formed `plan` yaml, and asserts that +// all of the correct values across different objects + controllers are correct. +func (s *controllerworkerSuite) TestApply() { + + planTemplate := ` +apiVersion: autopilot.k0sproject.io/v1beta2 +kind: Plan +metadata: + name: autopilot +spec: + id: id123 + timestamp: now + commands: + - k0supdate: + version: v0.0.0 + forceupdate: true + platforms: + linux-amd64: + url: http://localhost/dist/k0s-new + linux-arm64: + url: http://localhost/dist/k0s-new + targets: + controllers: + discovery: + static: + nodes: + - controller1 + - controller2 + - controller0 +` + ctx := s.Context() + manifestFile := "/tmp/happy.yaml" + s.PutFileTemplate(s.ControllerNode(0), manifestFile, planTemplate, nil) + + out, err := s.RunCommandController(0, fmt.Sprintf("/usr/local/bin/k0s kubectl apply -f %s", manifestFile)) + s.T().Logf("kubectl apply output: '%s'", out) + s.Require().NoError(err) + + client, err := s.AutopilotClient(s.ControllerNode(0)) + s.Require().NoError(err) + s.NotEmpty(client) + + // The plan has enough information to perform a successful update of k0s, so wait for it. + plan, err := aptest.WaitForPlanState(s.Context(), client, apconst.AutopilotName, appc.PlanCompleted) + s.Require().NoError(err) + + s.Equal(1, len(plan.Status.Commands)) + cmd := plan.Status.Commands[0] + + s.Equal(appc.PlanCompleted, cmd.State) + s.NotNil(cmd.K0sUpdate) + s.NotNil(cmd.K0sUpdate.Controllers) + s.Empty(cmd.K0sUpdate.Workers) + + for _, node := range cmd.K0sUpdate.Controllers { + s.Equal(appc.SignalCompleted, node.State) + } + + kc, err := s.KubeClient(s.ControllerNode(0)) + s.NoError(err) + + for idx := 0; idx < s.BootlooseSuite.ControllerCount; idx++ { + nodeName, require := s.ControllerNode(idx), s.Require() + require.NoError(s.WaitForNodeReady(nodeName, kc)) + // Wait till we see kubelet reporting the expected version + err := watch.Nodes(kc.CoreV1().Nodes()). + WithObjectName(nodeName). + WithErrorCallback(common.RetryWatchErrors(s.T().Logf)). + Until(ctx, func(node *corev1.Node) (bool, error) { + return strings.Contains(node.Status.NodeInfo.KubeletVersion, fmt.Sprintf("v%s.", constant.KubernetesMajorMinorVersion)), nil + }) + require.NoError(err) + } +} + +// TestQuorumSuite sets up a suite using 3 controllers for quorum, and runs various +// autopilot upgrade scenarios against them. +func TestQuorumSuite(t *testing.T) { + suite.Run(t, &controllerworkerSuite{ + common.BootlooseSuite{ + ControllerCount: 3, + WorkerCount: 0, + LaunchMode: common.LaunchModeOpenRC, + }, + }) +} diff --git a/pkg/component/controller/leaderelector/leasepool.go b/pkg/component/controller/leaderelector/leasepool.go index 3456b88770b2..b63165250550 100644 --- a/pkg/component/controller/leaderelector/leasepool.go +++ b/pkg/component/controller/leaderelector/leasepool.go @@ -37,13 +37,14 @@ type LeasePool struct { acquiredLeaseCallbacks []func() lostLeaseCallbacks []func() + name string } var _ Interface = (*LeasePool)(nil) var _ manager.Component = (*LeasePool)(nil) // NewLeasePool creates a new leader elector using a Kubernetes lease pool. -func NewLeasePool(kubeClientFactory kubeutil.ClientFactoryInterface) *LeasePool { +func NewLeasePool(kubeClientFactory kubeutil.ClientFactoryInterface, name string) *LeasePool { d := atomic.Value{} d.Store(false) return &LeasePool{ @@ -51,6 +52,7 @@ func NewLeasePool(kubeClientFactory kubeutil.ClientFactoryInterface) *LeasePool kubeClientFactory: kubeClientFactory, log: logrus.WithFields(logrus.Fields{"component": "poolleaderelector"}), leaderStatus: d, + name: name, } } @@ -63,7 +65,7 @@ func (l *LeasePool) Start(ctx context.Context) error { if err != nil { return fmt.Errorf("can't create kubernetes rest client for lease pool: %v", err) } - leasePool, err := leaderelection.NewLeasePool(ctx, client, "k0s-endpoint-reconciler", + leasePool, err := leaderelection.NewLeasePool(ctx, client, l.name, leaderelection.WithLogger(l.log), leaderelection.WithContext(ctx)) if err != nil {