Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kubevirt: upgrade k3s,multus,kubevirt,cdi,longhorn #4501

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
The diff you're trying to view is too large. We only load the first 3000 changed files.
7 changes: 7 additions & 0 deletions pkg/kube/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,20 @@ COPY cert-gen /plugins/cert-gen
WORKDIR /plugins/cert-gen
RUN GO111MODULE=on CGO_ENABLED=0 go build -v -ldflags "-s -w" -o /out/usr/bin/cert-gen .

COPY update-component /plugins/update-component
WORKDIR /plugins/update-component
RUN GO111MODULE=on go build -v -ldflags "-s -w" -mod=vendor -o /out/usr/bin/update-component .

FROM scratch
COPY --from=build /out/ /
COPY cluster-init.sh /usr/bin/
COPY cluster-utils.sh /usr/bin/
COPY cgconfig.conf /etc

# upgrades
COPY cluster-update.sh /usr/bin/
COPY update-component/expected_versions.yaml /etc/
COPY update-component/settings_longhorn.yaml /etc/

# k3s
COPY install-etcdctl.sh /usr/bin/
Expand Down
3 changes: 3 additions & 0 deletions pkg/kube/cluster-init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,8 @@ logmsg "Using ZFS persistent storage"

setup_prereqs

Update_CheckNodeComponents


if [ -f /var/lib/convert-to-single-node ]; then
logmsg "remove /var/lib and copy saved single node /var/lib"
Expand Down Expand Up @@ -862,6 +864,7 @@ fi
check_kubeconfig_yaml_files
check_and_remove_excessive_k3s_logs
check_and_run_vnc
Update_CheckClusterComponents
wait_for_item "wait"
sleep 15
done
247 changes: 247 additions & 0 deletions pkg/kube/cluster-update.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,254 @@
#
# Copyright (c) 2024 Zededa, Inc.
# SPDX-License-Identifier: Apache-2.0
K3S_VERSION=v1.28.5+k3s1
EdgeNodeInfoPath="/persist/status/zedagent/EdgeNodeInfo/global.json"

#
# Handle any migrations needed due to updated cluster-init.sh
# This is expected to be bumped any time:
# - a migration is needed (new path for something)
# - a version bump of: K3s, multus, kubevirt, cdi, longhorn
#
KUBE_VERSION=1
APPLIED_KUBE_VERSION_PATH="/var/lib/applied-kube-version"
update_Version_Set() {
version=$1
echo "$version" > "$APPLIED_KUBE_VERSION_PATH"
}

update_Version_Get() {
if [ ! -f "$APPLIED_KUBE_VERSION_PATH" ]; then
# First Boot
echo "0"
fi
cat "$APPLIED_KUBE_VERSION_PATH"
}

#
# update_Failed()
# Mark failure if Status == COMP_STATUS_FAILED and DestinationKubeUpdateVersion == KUBE_VERSION
# This allows:
# - update retry control for a given version
# - recovery update if the eve os version is updated to another release (with a different cluster-init.sh)
#
UPDATE_STATUS_PATH=/persist/status/zedkube/KubeClusterUpdateStatus/global.json
update_Failed() {
if [ -f $UPDATE_STATUS_PATH ]; then
if [ "$(jq --arg gen $KUBE_VERSION '.Status==4 and .DestinationKubeUpdateVersion==$gen' < $UPDATE_STATUS_PATH)" = "true" ]; then
return 0
fi
fi
return 1
}

trigger_k3s_selfextraction() {
# Run some k3s cli command so that binaries are self-extracted
/usr/bin/k3s check-config >> "$INSTALL_LOG" 2>&1
}

link_multus_into_k3s() {
ln -s /var/lib/cni/bin/multus /var/lib/rancher/k3s/data/current/bin/multus
}

update_k3s() {
logmsg "Installing K3S version $K3S_VERSION on $HOSTNAME"
mkdir -p /var/lib/k3s/bin
/usr/bin/curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=${K3S_VERSION} INSTALL_K3S_SKIP_ENABLE=true INSTALL_K3S_SKIP_START=true INSTALL_K3S_BIN_DIR=/var/lib/k3s/bin sh -
sleep 5
logmsg "Initializing K3S version $K3S_VERSION"
ln -s /var/lib/k3s/bin/* /usr/bin
trigger_k3s_selfextraction
link_multus_into_k3s
touch /var/lib/k3s_installed_unpacked
}

# k3s_get_version: return version in form "vW.X.Y+k3sZ"
k3s_get_version() {
if [ ! -f /var/lib/k3s/bin/k3s ]; then
echo "v0.0.0+k3s0"
return
fi
/var/lib/k3s/bin/k3s --version | awk '$1=="k3s" {print $3}' | tr -d '\n'
}

# Run on every boot before k3s starts
Update_CheckNodeComponents() {
applied_version=$(update_Version_Get)
if [ "$KUBE_VERSION" = "$applied_version" ]; then
return
fi

if update_Failed; then
return
fi
logmsg "update_HandleNode: version:$KUBE_VERSION appliedversion:$applied_version continuing"

# Handle version specific node migrations here

# Handle node specific updates, just k3s for now
if [ "$(k3s_get_version)" != "$K3S_VERSION" ]; then
publishUpdateStatus "k3s" "download"
update_k3s
current_k3s_version=$(k3s_get_version)
if [ "$current_k3s_version" != "$K3S_VERSION" ]; then
logmsg "k3s version mismatch after install:$current_k3s_version"
publishUpdateStatus "k3s" "failed" "version mismatch after install:$current_k3s_version"
else
logmsg "k3s installed and unpacked or copied"
publishUpdateStatus "k3s" "completed"
fi
fi
}

# Run on every boot after k3s is started
Update_CheckClusterComponents() {
wait_for_item "update_cluster_pre"

applied_version=$(update_Version_Get)
if [ "$KUBE_VERSION" = "$applied_version" ]; then
return
fi

if update_Failed; then
return
fi

if ! update_isClusterReady; then
return
fi
logmsg "update_HandleCluster: version:$KUBE_VERSION appliedversion:$applied_version continuing"

# Handle cluster wide component updates
for comp in multus kubevirt cdi longhorn; do
while ! update_Component_CheckReady "$comp"; do
logmsg "Component: $comp not ready on existing version"
sleep 60
done
logmsg "Component: $comp ready on existing version"
if update_Component_IsRunningExpectedVersion "$comp"; then
logmsg "Component:$comp running expected version, continuing"
publishUpdateStatus "$comp" "completed"
continue
fi
if ! update_Component "$comp"; then
logmsg "Not continuing with further updates after component:${comp} update failed"
break
fi
done

update_Version_Set "$KUBE_VERSION"
wait_for_item "update_cluster_post"
}

Update_RunDescheduler() {
# Don't run unless it has been installed
if [ ! -f /var/lib/descheduler_initialized ]; then
return
fi
# Only run once per boot
if [ -f /tmp/descheduler-ran ]; then
return
fi

if [ ! -f $EdgeNodeInfoPath ]; then
return
fi
# is api ready
if ! update_isClusterReady; then
return
fi
# node ready and allowing scheduling
node=$(jq -r '.DeviceName' < $EdgeNodeInfoPath | tr -d '\n' | tr '[:upper:]' '[:lower:]')
node_count_ready=$(kubectl get "node/${node}" | grep -v SchedulingDisabled | grep -cw Ready )
if [ "$node_count_ready" -ne 1 ]; then
return
fi
# Job lives persistently in cluster, cleanup after old runs
if kubectl -n kube-system get job/descheduler-job; then
kubectl -n kube-system delete job/descheduler-job
fi
kubectl apply -f /etc/descheduler-job.yaml
touch /tmp/descheduler-ran
}

update_isClusterReady() {
if ! kubectl cluster-info; then
return 1
fi

if ! update_Helper_APIResponding; then
return 1
fi
return 0
}

#
# Handle kube component updates
#
COMP_UPDATE_PATH="/usr/bin/update-component"

update_Helper_APIResponding() {
if $COMP_UPDATE_PATH --check-api-ready; then
return 0
fi
return 1
}
update_Component_CheckReady() {
comp=$1
if $COMP_UPDATE_PATH --versions-file /etc/expected_versions.yaml --component "$comp" --check-comp-ready; then
return 0
fi
return 1
}
update_Component_Uptime() {
comp=$1
$COMP_UPDATE_PATH --versions-file /etc/expected_versions.yaml --component "$comp" --get-uptime
}
update_Component_IsRunningExpectedVersion() {
comp=$1
if $COMP_UPDATE_PATH --versions-file /etc/expected_versions.yaml --component "$comp" --compare; then
return 0
fi
return 1
}

update_Component() {
comp=$1
# Run go app to check and apply updates and block until new version is ready
publishUpdateStatus "$comp" "in_progress"
if $COMP_UPDATE_PATH --versions-file /etc/expected_versions.yaml --component "$comp" --upgrade; then
publishUpdateStatus "$comp" "completed"
return 0
fi
upgrade_log_path="/persist/kubelog/upgrade-component.log"
logmsg "update_Component comp:${comp} error starting update, see $upgrade_log_path"
publishUpdateStatus "$comp" "failed" "error in $upgrade_log_path"
return 1
}

publishUpdateStatus() {
component=$1
status=$2
errorstr=""
if [ ! -x $3 ]; then
errorstr=$3
fi

# If gen==0, then we are in the initial boot not updating, just installing first versions at most-likely first
# boot of the device. Don't publish as this will trigger zedagent to claim baseos_updating.
cur_version=$(update_Version_Get)
if [ "$cur_version" = "0" ]; then
return
fi

node=$(jq -r '.DeviceName' < /persist/status/zedagent/EdgeNodeInfo/global.json | tr -d '\n')
logmsg "publishUpdateStatus() $node $component $status"

pillarRootfs=/hostfs/containers/services/pillar/rootfs
LD_LIBRARY_PATH=${pillarRootfs}/usr/lib/ ${pillarRootfs}/opt/zededa/bin/zedkube pubKubeClusterUpdateStatus "$node" "$component" "$status" "$KUBE_VERSION" "$errorstr"
rc=$?
if [ $rc -ne 0 ]; then
logmsg "publishUpdateStatus() $node $component $status in error:$rc"
fi
}
1 change: 1 addition & 0 deletions pkg/kube/update-component/README
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
./upgrades -c -d -n kube3 -l kubevirt,cdi,longhorn -f upgrades.yaml -u
89 changes: 89 additions & 0 deletions pkg/kube/update-component/cdi.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package main

import (
"context"
"fmt"
"strings"
"time"

"github.com/Masterminds/semver"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

const (
cdiNamespace = "cdi"
compCdiDeploymentOperator = "cdi-operator"
)

type cdiComponent struct {
commonComponent
}

func (ctx *cdiComponent) GetVersion() (string, error) {
cs := ctx.cs
cdiDeployment, err := cs.AppsV1().Deployments(cdiNamespace).Get(context.Background(), compCdiDeploymentOperator, metav1.GetOptions{})
if cdiDeployment == nil || err != nil {
return "", fmt.Errorf("failed to get cdi deployment/%s: %v", compCdiDeploymentOperator, err)
}
env := cdiDeployment.Spec.Template.Spec.Containers[0].Env
for _, e := range env {
if e.Name == "OPERATOR_VERSION" {
return e.Value, nil
}
}
return "", nil
}

func (ctx *cdiComponent) UpgradeSupported(sourceVer string, destVer string) error {
destV, err := semver.NewVersion(destVer)
if err != nil {
return err
}
c, err := semver.NewConstraint(">=" + sourceVer)
if err != nil {
return err
}
if !c.Check(destV) {
return fmt.Errorf("version constraints deny %s->%s", sourceVer, destVer)
}
return nil
}

func (ctx *cdiComponent) Uptime(version string) (time.Time, error) {
cs := ctx.cs
cdiDeployment, err := cs.AppsV1().Deployments(cdiNamespace).Get(context.Background(), compCdiDeploymentOperator, metav1.GetOptions{})
if cdiDeployment == nil || err != nil {
return time.Time{}, fmt.Errorf("failed to get cdi deployment/%s: %v", compCdiDeploymentOperator, err)
}
for _, condition := range cdiDeployment.Status.Conditions {
if condition.Reason == "MinimumReplicasAvailable" && condition.Status == v1.ConditionTrue {
return condition.LastTransitionTime.Time, nil
}
}
return time.Time{}, fmt.Errorf("failed to get uptime for cdi deployment/%s", compCdiDeploymentOperator)
}

func (ctx *cdiComponent) Ready(version string) error {
cs := ctx.cs
cdiDeployment, err := cs.AppsV1().Deployments(cdiNamespace).Get(context.Background(), compCdiDeploymentOperator, metav1.GetOptions{})
if cdiDeployment == nil || err != nil {
return fmt.Errorf("failed to get deployment/%s: %v", compCdiDeploymentOperator, err)
}
for _, cdiContainer := range cdiDeployment.Spec.Template.Spec.Containers {
imageTagParts := strings.Split(cdiContainer.Image, ":")
imageVersion := imageTagParts[len(imageTagParts)-1]
if version != imageVersion {
return fmt.Errorf("CDI not yet online at version: %s", version)
}
}
if cdiDeployment.Status.ReadyReplicas != cdiDeployment.Status.Replicas {
return fmt.Errorf("insufficient cdi readiness for deployment: %s", compCdiDeploymentOperator)
}
return nil
}

func (ctx *cdiComponent) UpgradeStart(version string) error {
yamlPath := "https://github.com/kubevirt/containerized-data-importer/releases/download/" + version + "/cdi-operator.yaml"
return ctx.KubectlApply(yamlPath)
}
5 changes: 5 additions & 0 deletions pkg/kube/update-component/expected_versions.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
multus: "v3.9.3"
kubevirt: "v1.1.0-dirty"
cdi: "v1.57.1"
longhorn: "v1.6.3"
Loading
Loading