Skip to content

Commit

Permalink
Fix issues with high availability detection
Browse files Browse the repository at this point in the history
* add the missing "watch" permission for nodes.
* make the status field a pointer to bool, to distinglish between false
  and field not set.
* On mutation webhook, fix the eviction strategy if SNO becam HA.
* fix an issue where the status field is not set if no node is added or
  deleted.
* fix unit test (assert old HCO instead of HCO from client)
* fix issue in the node controller, to ignore error if HCO is not
  present.

Signed-off-by: Nahshon Unna-Tsameret <[email protected]>
  • Loading branch information
nunnatsa committed Dec 15, 2024
1 parent 70de1bc commit fca9890
Show file tree
Hide file tree
Showing 14 changed files with 148 additions and 88 deletions.
2 changes: 1 addition & 1 deletion api/v1beta1/hyperconverged_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -736,7 +736,7 @@ type HyperConvergedStatus struct {
// InfrastructureHighlyAvailable describes whether the cluster has only one worker node
// (false) or more (true).
// +optional
InfrastructureHighlyAvailable bool `json:"infrastructureHighlyAvailable,omitempty"`
InfrastructureHighlyAvailable *bool `json:"infrastructureHighlyAvailable,omitempty"`
}

type Version struct {
Expand Down
5 changes: 5 additions & 0 deletions api/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions controllers/hyperconverged/hyperconverged_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/handler"
Expand Down Expand Up @@ -423,6 +424,9 @@ func (r *ReconcileHyperConverged) doReconcile(req *common.HcoRequest) (reconcile
if init {
r.eventEmitter.EmitEvent(req.Instance, corev1.EventTypeNormal, "InitHCO", "Initiating the HyperConverged")
r.setInitialConditions(req)

req.Instance.Status.InfrastructureHighlyAvailable = ptr.To(hcoutil.GetClusterInfo().IsInfrastructureHighlyAvailable())
req.StatusDirty = true
}

r.setLabels(req)
Expand Down
23 changes: 16 additions & 7 deletions controllers/nodes/nodes_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,22 @@ package nodes
import (
"context"

operatorhandler "github.com/operator-framework/operator-lib/handler"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/util/workqueue"
"sigs.k8s.io/controller-runtime/pkg/event"

hcov1beta1 "github.com/kubevirt/hyperconverged-cluster-operator/api/v1beta1"

operatorhandler "github.com/operator-framework/operator-lib/handler"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/event"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"

hcov1beta1 "github.com/kubevirt/hyperconverged-cluster-operator/api/v1beta1"
hcoutil "github.com/kubevirt/hyperconverged-cluster-operator/pkg/util"
)

Expand Down Expand Up @@ -113,11 +113,20 @@ func (r *ReconcileNodeCounter) Reconcile(ctx context.Context, _ reconcile.Reques
}
err = r.client.Get(ctx, hcoKey, hco)
if err != nil {
if errors.IsNotFound(err) {
return reconcile.Result{}, nil
}
return reconcile.Result{}, err
}

if hco.Status.InfrastructureHighlyAvailable != clusterInfo.IsInfrastructureHighlyAvailable() {
hco.Status.InfrastructureHighlyAvailable = clusterInfo.IsInfrastructureHighlyAvailable()
if !hco.ObjectMeta.DeletionTimestamp.IsZero() {
return reconcile.Result{}, nil
}

if hco.Status.InfrastructureHighlyAvailable == nil ||
*hco.Status.InfrastructureHighlyAvailable != clusterInfo.IsInfrastructureHighlyAvailable() {

hco.Status.InfrastructureHighlyAvailable = ptr.To(clusterInfo.IsInfrastructureHighlyAvailable())
err = r.client.Status().Update(ctx, hco)
if err != nil {
return reconcile.Result{}, err
Expand Down
45 changes: 33 additions & 12 deletions controllers/nodes/nodes_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,6 @@ var _ = Describe("NodesController", func() {
Describe("Reconcile NodesController", func() {

BeforeEach(func() {
hcoutil.GetClusterInfo = func() hcoutil.ClusterInfo {
return commontestutils.ClusterInfoMock{}
}
_ = os.Setenv(hcoutil.OperatorNamespaceEnv, commontestutils.Namespace)
})

Expand All @@ -49,6 +46,9 @@ var _ = Describe("NodesController", func() {

Context("Node Count Change", func() {
It("Should update InfrastructureHighlyAvailable to true if there are two or more worker nodes", func() {
hcoutil.GetClusterInfo = func() hcoutil.ClusterInfo {
return commontestutils.ClusterInfoMock{}
}
hco := commontestutils.NewHco()
numWorkerNodes := 3
var nodesArray []client.Object
Expand All @@ -66,8 +66,6 @@ var _ = Describe("NodesController", func() {

resources := []client.Object{hco, nodesArray[0], nodesArray[1], nodesArray[2]}
cl := commontestutils.InitClient(resources)
logger := zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true)).WithName("nodes_controller_test")
Expect(hcoutil.GetClusterInfo().Init(context.TODO(), cl, logger)).To(Succeed())
r := &ReconcileNodeCounter{
client: cl,
}
Expand All @@ -82,12 +80,14 @@ var _ = Describe("NodesController", func() {
cl.Get(context.TODO(),
types.NamespacedName{Name: commontestutils.Name, Namespace: commontestutils.Namespace},
latestHCO),
).ToNot(HaveOccurred())
).To(Succeed())

Expect(latestHCO.Status.InfrastructureHighlyAvailable).To(BeTrue())
Expect(res).To(Equal(reconcile.Result{}))
Expect(latestHCO.Status.InfrastructureHighlyAvailable).To(HaveValue(BeTrue()))
})
It("Should update InfrastructureHighlyAvailable to false if there is only one worker node", func() {
hcoutil.GetClusterInfo = func() hcoutil.ClusterInfo {
return commontestutils.ClusterInfoSNOMock{}
}
hco := commontestutils.NewHco()
workerNode := &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Expand All @@ -99,8 +99,6 @@ var _ = Describe("NodesController", func() {
}
resources := []client.Object{hco, workerNode}
cl := commontestutils.InitClient(resources)
logger := zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true)).WithName("nodes_controller_test")
Expect(hcoutil.GetClusterInfo().Init(context.TODO(), cl, logger)).To(Succeed())
r := &ReconcileNodeCounter{
client: cl,
}
Expand All @@ -115,11 +113,34 @@ var _ = Describe("NodesController", func() {
cl.Get(context.TODO(),
types.NamespacedName{Name: commontestutils.Name, Namespace: commontestutils.Namespace},
latestHCO),
).ToNot(HaveOccurred())
).To(Succeed())

Expect(hco.Status.InfrastructureHighlyAvailable).To(BeFalse())
Expect(latestHCO.Status.InfrastructureHighlyAvailable).To(HaveValue(BeFalse()))
Expect(res).To(Equal(reconcile.Result{}))
})

It("Should not return error if the HyperConverged CR is not exist", func() {
workerNode := &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "worker",
Labels: map[string]string{
"node-role.kubernetes.io/worker": "",
},
},
}
resources := []client.Object{workerNode}
cl := commontestutils.InitClient(resources)
logger := zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true)).WithName("nodes_controller_test")
Expect(hcoutil.GetClusterInfo().Init(context.TODO(), cl, logger)).To(Succeed())
r := &ReconcileNodeCounter{
client: cl,
}

// Reconcile to update HCO's status with the correct InfrastructureHighlyAvailable value
res, err := r.Reconcile(context.TODO(), request)
Expect(err).ToNot(HaveOccurred())
Expect(res.Requeue).To(BeFalse())
})
})

})
Expand Down
1 change: 1 addition & 0 deletions deploy/cluster_role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -857,6 +857,7 @@ rules:
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ spec:
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ metadata:
certified: "false"
console.openshift.io/disable-operand-delete: "true"
containerImage: quay.io/kubevirt/hyperconverged-cluster-operator:1.14.0-unstable
createdAt: "2024-12-13 05:05:26"
createdAt: "2024-12-15 07:54:32"
description: A unified operator deploying and controlling KubeVirt and its supporting
operators with opinionated defaults
features.operators.openshift.io/cnf: "false"
Expand Down Expand Up @@ -327,6 +327,7 @@ spec:
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
Expand Down
2 changes: 1 addition & 1 deletion docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ HyperConvergedStatus defines the observed state of HyperConverged
| dataImportSchedule | DataImportSchedule is the cron expression that is used in for the hard-coded data import cron templates. HCO generates the value of this field once and stored in the status field, so will survive restart. | string | | false |
| dataImportCronTemplates | DataImportCronTemplates is a list of the actual DataImportCronTemplates as HCO update in the SSP CR. The list contains both the common and the custom templates, including any modification done by HCO. | [][DataImportCronTemplateStatus](#dataimportcrontemplatestatus) | | false |
| systemHealthStatus | SystemHealthStatus reflects the health of HCO and its secondary resources, based on the aggregated conditions. | string | | false |
| infrastructureHighlyAvailable | InfrastructureHighlyAvailable describes whether the cluster has only one worker node (false) or more (true). | bool | | false |
| infrastructureHighlyAvailable | InfrastructureHighlyAvailable describes whether the cluster has only one worker node (false) or more (true). | *bool | | false |

[Back to TOC](#table-of-contents)

Expand Down
2 changes: 1 addition & 1 deletion pkg/components/components.go
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,7 @@ func GetClusterPermissions() []rbacv1.PolicyRule {
{
APIGroups: emptyAPIGroup,
Resources: stringListToSlice("nodes"),
Verbs: stringListToSlice("get", "list"),
Verbs: stringListToSlice("get", "list", "watch"),
},
{
APIGroups: emptyAPIGroup,
Expand Down
45 changes: 26 additions & 19 deletions pkg/webhooks/mutator/hyperConvergedMutator.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ import (
"fmt"
"net/http"

hcoutil "github.com/kubevirt/hyperconverged-cluster-operator/pkg/util"

"gomodules.xyz/jsonpatch/v2"
admissionv1 "k8s.io/api/admission/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -79,23 +77,7 @@ func (hcm *HyperConvergedMutator) mutateHyperConverged(_ context.Context, req ad
}
}

if hc.Spec.EvictionStrategy == nil {
ci := hcoutil.GetClusterInfo()
if ci.IsInfrastructureHighlyAvailable() {
patches = append(patches, jsonpatch.JsonPatchOperation{
Operation: "add",
Path: "/spec/evictionStrategy",
Value: kubevirtcorev1.EvictionStrategyLiveMigrate,
})
} else {
patches = append(patches, jsonpatch.JsonPatchOperation{
Operation: "add",
Path: "/spec/evictionStrategy",
Value: kubevirtcorev1.EvictionStrategyNone,
})
}

}
patches = mutateEvictionStrategy(hc, patches)

if hc.Spec.MediatedDevicesConfiguration != nil {
if len(hc.Spec.MediatedDevicesConfiguration.MediatedDevicesTypes) > 0 && len(hc.Spec.MediatedDevicesConfiguration.MediatedDeviceTypes) == 0 { //nolint SA1019
Expand All @@ -122,3 +104,28 @@ func (hcm *HyperConvergedMutator) mutateHyperConverged(_ context.Context, req ad

return admission.Allowed("")
}

func mutateEvictionStrategy(hc *hcov1beta1.HyperConverged, patches []jsonpatch.JsonPatchOperation) []jsonpatch.JsonPatchOperation {
if hc.Status.InfrastructureHighlyAvailable == nil { // New HyperConverged CR
return patches
}

var value = kubevirtcorev1.EvictionStrategyNone
if hc.Spec.EvictionStrategy == nil {
if *hc.Status.InfrastructureHighlyAvailable {
value = kubevirtcorev1.EvictionStrategyLiveMigrate
}
} else if *hc.Status.InfrastructureHighlyAvailable && *hc.Spec.EvictionStrategy == kubevirtcorev1.EvictionStrategyNone {
value = kubevirtcorev1.EvictionStrategyLiveMigrate
} else {
return patches
}

patches = append(patches, jsonpatch.JsonPatchOperation{
Operation: "replace",
Path: "/spec/evictionStrategy",
Value: value,
})

return patches
}
41 changes: 19 additions & 22 deletions pkg/webhooks/mutator/hyperConvergedMutator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,11 @@ var _ = Describe("test HyperConverged mutator", func() {
Name: util.HyperConvergedName,
Namespace: HcoValidNamespace,
},
Spec: v1beta1.HyperConvergedSpec{},
Spec: v1beta1.HyperConvergedSpec{
EvictionStrategy: ptr.To(kubevirtcorev1.EvictionStrategyLiveMigrate),
},
}
cr.Spec.EvictionStrategy = ptr.To(kubevirtcorev1.EvictionStrategyLiveMigrate)

cli = commontestutils.InitClient(nil)
mutator = initHCMutator(s, cli)
})
Expand Down Expand Up @@ -267,13 +269,9 @@ var _ = Describe("test HyperConverged mutator", func() {

DescribeTable("check EvictionStrategy default", func(SNO bool, strategy *kubevirtcorev1.EvictionStrategy, patches []jsonpatch.JsonPatchOperation) {
if SNO {
hcoutil.GetClusterInfo = func() hcoutil.ClusterInfo {
return &commontestutils.ClusterInfoSNOMock{}
}
cr.Status.InfrastructureHighlyAvailable = ptr.To(false)
} else {
hcoutil.GetClusterInfo = func() hcoutil.ClusterInfo {
return &commontestutils.ClusterInfoMock{}
}
cr.Status.InfrastructureHighlyAvailable = ptr.To(true)
}

cr.Spec.EvictionStrategy = strategy
Expand All @@ -289,48 +287,52 @@ var _ = Describe("test HyperConverged mutator", func() {
true,
nil,
[]jsonpatch.JsonPatchOperation{jsonpatch.JsonPatchOperation{
Operation: "add",
Operation: "replace",
Path: "/spec/evictionStrategy",
Value: kubevirtcorev1.EvictionStrategyNone,
}},
),
Entry("should not override EvictionStrategy if set and on SNO - 1",
true,
pointerEvictionStrategy(kubevirtcorev1.EvictionStrategyNone),
ptr.To(kubevirtcorev1.EvictionStrategyNone),
nil,
),
Entry("should not override EvictionStrategy if set and on SNO - 2",
true,
pointerEvictionStrategy(kubevirtcorev1.EvictionStrategyLiveMigrate),
ptr.To(kubevirtcorev1.EvictionStrategyLiveMigrate),
nil,
),
Entry("should not override EvictionStrategy if set and on SNO - 3",
true,
pointerEvictionStrategy(kubevirtcorev1.EvictionStrategyExternal),
ptr.To(kubevirtcorev1.EvictionStrategyExternal),
nil,
),
Entry("should set EvictionStrategyLiveMigrate if not set and not on SNO",
false,
nil,
[]jsonpatch.JsonPatchOperation{jsonpatch.JsonPatchOperation{
Operation: "add",
Operation: "replace",
Path: "/spec/evictionStrategy",
Value: kubevirtcorev1.EvictionStrategyLiveMigrate,
}},
),
Entry("should not override EvictionStrategy if set and not on SNO - 1",
false,
pointerEvictionStrategy(kubevirtcorev1.EvictionStrategyNone),
nil,
ptr.To(kubevirtcorev1.EvictionStrategyNone),
[]jsonpatch.JsonPatchOperation{jsonpatch.JsonPatchOperation{
Operation: "replace",
Path: "/spec/evictionStrategy",
Value: kubevirtcorev1.EvictionStrategyLiveMigrate,
}},
),
Entry("should not override EvictionStrategy if set and not on SNO - 2",
false,
pointerEvictionStrategy(kubevirtcorev1.EvictionStrategyLiveMigrate),
ptr.To(kubevirtcorev1.EvictionStrategyLiveMigrate),
nil,
),
Entry("should not override EvictionStrategy if set and not on SNO - 3",
false,
pointerEvictionStrategy(kubevirtcorev1.EvictionStrategyExternal),
ptr.To(kubevirtcorev1.EvictionStrategyExternal),
nil,
),
)
Expand Down Expand Up @@ -468,8 +470,3 @@ func initHCMutator(s *runtime.Scheme, testClient client.Client) *HyperConvergedM

return mutator
}

func pointerEvictionStrategy(strategy kubevirtcorev1.EvictionStrategy) *kubevirtcorev1.EvictionStrategy {
str := strategy
return &str
}
Loading

0 comments on commit fca9890

Please sign in to comment.