Skip to content

Commit

Permalink
Add alert for rhel6 running vms
Browse files Browse the repository at this point in the history
Adding a vm controller that watch for rhel 6 vms.
Rhel6 template put a label in the vm with the format:
rhel6-<workload>-<flavor>.
This label is checked to determine whether a vm is rhel6 or not.

Signed-off-by: fossedihelm <[email protected]>
  • Loading branch information
fossedihelm committed Jan 20, 2023
1 parent a8898a1 commit 253fb74
Show file tree
Hide file tree
Showing 9 changed files with 211 additions and 7 deletions.
11 changes: 11 additions & 0 deletions controllers/reconciler.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package controllers

import (
"context"
ctrl "sigs.k8s.io/controller-runtime"
)

type ControllerReconciler interface {
Start(ctx context.Context, mgr ctrl.Manager) error
Name() string
}
9 changes: 7 additions & 2 deletions controllers/services_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ import (
)

const (
MetricsServiceName = "ssp-operator-metrics"
OperatorName = "ssp-operator"
MetricsServiceName = "ssp-operator-metrics"
OperatorName = "ssp-operator"
serviceControllerName = "service-controller"
)

func ServiceObject(namespace string) *v1.Service {
Expand Down Expand Up @@ -65,6 +66,10 @@ func CreateServiceController(ctx context.Context, mgr ctrl.Manager) (*serviceRec
return newServiceReconciler(ctx, mgr)
}

func (r *serviceReconciler) Name() string {
return serviceControllerName
}

func (r *serviceReconciler) Start(ctx context.Context, mgr ctrl.Manager) error {
err := r.createMetricsService(ctx)
if err != nil && !errors.IsAlreadyExists(err) {
Expand Down
18 changes: 14 additions & 4 deletions controllers/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,19 +115,29 @@ func setupManager(ctx context.Context, cancel context.CancelFunc, mgr controller
return fmt.Errorf("error adding service controller: %w", err)
}

vmController, err := CreateVmController(mgr)
if err != nil {
return fmt.Errorf("failed to create vm controller: %w", err)
}

if err = mgr.Add(getRunnable(mgr, vmController)); err != nil {
return fmt.Errorf("error adding vm controller: %w", err)
}

reconciler := NewSspReconciler(mgr.GetClient(), mgr.GetAPIReader(), infrastructureTopology, sspOperands, crdWatch)

return reconciler.setupController(mgr)
}

func getRunnable(mgr controllerruntime.Manager, serviceController *serviceReconciler) manager.Runnable {
func getRunnable(mgr controllerruntime.Manager, ctrl ControllerReconciler) manager.Runnable {
return manager.RunnableFunc(func(ctx context.Context) error {
err := serviceController.Start(ctx, mgr)
mgr.GetLogger().Info(fmt.Sprintf("Starting %s", ctrl.Name()))
err := ctrl.Start(ctx, mgr)
if err != nil {
return fmt.Errorf("error starting serviceController: %w", err)
return fmt.Errorf("error starting %s: %w", ctrl.Name(), err)
}

mgr.GetLogger().Info("Services Controller started")
mgr.GetLogger().Info(fmt.Sprintf("%s started", ctrl.Name()))

return nil
})
Expand Down
111 changes: 111 additions & 0 deletions controllers/vm_controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
package controllers

import (
"context"
"github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"k8s.io/apimachinery/pkg/api/errors"
kubevirtv1 "kubevirt.io/api/core/v1"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"strings"
)

const (
vmControllerName = "vm-controller"
rhel6MetricName = "kubevirt_vm_rhel6"
)

var (
VmRhel6 = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: rhel6MetricName,
Help: "Indication for a VirtualMachine that is based on rhel6 template",
},
[]string{
"namespace", "name",
},
)
)

// Annotation to generate RBAC roles to read virtualmachines
// +kubebuilder:rbac:groups=kubevirt.io,resources=virtualmachines,verbs=get;list;watch

func CreateVmController(mgr ctrl.Manager) (*vmReconciler, error) {
return newVmReconciler(mgr)
}

func (r *vmReconciler) Name() string {
return vmControllerName
}

func (r *vmReconciler) Start(ctx context.Context, mgr ctrl.Manager) error {
return r.setupController(mgr)
}

func (r *vmReconciler) setupController(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
Named("vm-controller").
For(&kubevirtv1.VirtualMachine{}, builder.WithPredicates(predicate.NewPredicateFuncs(
func(object client.Object) bool {
return hasRhel6TemplateLabel(object)
}))).
Complete(r)
}

// vmReconciler watches the vms in the cluster
type vmReconciler struct {
client client.Client
log logr.Logger
}

func newVmReconciler(mgr ctrl.Manager) (*vmReconciler, error) {
logger := ctrl.Log.WithName("controllers").WithName("VirtualMachines")
reconciler := &vmReconciler{
client: mgr.GetClient(),
log: logger,
}

return reconciler, nil
}

func (r *vmReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res ctrl.Result, err error) {
r.log.V(1).Info("Starting vm reconciliation...", "request", req.String())
vm := kubevirtv1.VirtualMachine{}
err = r.client.Get(ctx, req.NamespacedName, &vm)
if err != nil {
if errors.IsNotFound(err) {
VmRhel6.WithLabelValues(req.Namespace, req.Name).Set(0)
r.log.Info("VM not found", "vm", req.NamespacedName)
// Request object not found, could have been deleted after reconcile request.
// Owned objects are automatically garbage collected. For additional cleanup logic use finalizers.
// Return and don't requeue
return ctrl.Result{}, nil
}
r.log.V(1).Info("Error retrieving the VM", "vm", req.NamespacedName)
// Error reading the object - requeue the request.
return ctrl.Result{}, err
}

if vm.Status.PrintableStatus == kubevirtv1.VirtualMachineStatusRunning {
VmRhel6.WithLabelValues(vm.GetNamespace(), vm.GetName()).Set(1)
} else {
VmRhel6.WithLabelValues(vm.GetNamespace(), vm.GetName()).Set(0)
}

return ctrl.Result{}, err
}

func hasRhel6TemplateLabel(vm client.Object) bool {
if value, exists := vm.GetLabels()["vm.kubevirt.io/template"]; exists && strings.HasPrefix(value, "rhel6") {
return true
}

return false
}

var _ reconcile.Reconciler = &vmReconciler{}
2 changes: 2 additions & 0 deletions internal/common/scheme.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
kubevirt "kubevirt.io/api/core/v1"
instancetypev1alpha2 "kubevirt.io/api/instancetype/v1alpha2"
sspv1beta1 "kubevirt.io/ssp-operator/api/v1beta1"
)
Expand All @@ -23,4 +24,5 @@ func init() {
utilruntime.Must(sspv1beta1.AddToScheme(Scheme))
utilruntime.Must(osconfv1.Install(Scheme))
utilruntime.Must(instancetypev1alpha2.AddToScheme(Scheme))
utilruntime.Must(kubevirt.AddToScheme(Scheme))
}
15 changes: 15 additions & 0 deletions internal/operands/metrics/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ const (
PrometheusClusterRoleName = "prometheus-k8s-ssp"
PrometheusServiceAccountName = "prometheus-k8s"
MetricsPortName = "metrics"
Rhel6AlertName = "DeprecatedRHEL6Vm"
)

const (
Expand Down Expand Up @@ -152,6 +153,20 @@ var alertRulesList = []promv1.Rule{
componentAlertLabelKey: componentAlertLabelValue,
},
},
{
Alert: Rhel6AlertName,
Expr: intstr.FromString("sum by (namespace, name) (kubevirt_vm_rhel6) > 0"),
Annotations: map[string]string{
"summary": "VM {{ $labels.namespace }}/{{ $labels.name }} is based on RHEL6 template, and this will not be supported in the next release",
//"runbook_url": runbookURLBasePath + Rhel6AlertName,
},
Labels: map[string]string{
severityAlertLabelKey: "warning",
healthImpactAlertLabelKey: "none",
partOfAlertLabelKey: partOfAlertLabelValue,
componentAlertLabelKey: componentAlertLabelValue,
},
},
}

func getRecordRules() []promv1.Rule {
Expand Down
1 change: 1 addition & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ func runPrometheusServer(metricsAddr string, tlsOptions common.SSPTLSOptions) er
setupLog.Info("Starting Prometheus metrics endpoint server with TLS")
metrics.Registry.MustRegister(common_templates.CommonTemplatesRestored)
metrics.Registry.MustRegister(common.SSPOperatorReconcilingProperly)
metrics.Registry.MustRegister(controllers.VmRhel6)
handler := promhttp.HandlerFor(metrics.Registry, promhttp.HandlerOpts{})
mux := http.NewServeMux()
mux.Handle("/metrics", handler)
Expand Down
2 changes: 1 addition & 1 deletion tests/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ var _ = Describe("Metrics", func() {
It("[test_id:7851]should have all the required annotations", func() {
for _, group := range promRule.Spec.Groups {
for _, rule := range group.Rules {
if rule.Alert != "" {
if rule.Alert != "" && rule.Alert != metrics.Rhel6AlertName {
Expect(rule.Annotations).To(HaveKeyWithValue("summary", Not(BeEmpty())),
fmt.Sprintf("%s summary is missing or empty", rule.Alert))
Expect(rule.Annotations).To(HaveKeyWithValue("runbook_url", Not(BeEmpty())),
Expand Down
49 changes: 49 additions & 0 deletions tests/monitoring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"

"k8s.io/utils/pointer"
kubevirtv1 "kubevirt.io/api/core/v1"
sspv1beta1 "kubevirt.io/ssp-operator/api/v1beta1"
"kubevirt.io/ssp-operator/internal/operands/metrics"
"kubevirt.io/ssp-operator/tests/env"
Expand Down Expand Up @@ -154,6 +156,44 @@ var _ = Describe("Prometheus Alerts", func() {
waitForAlertToActivate("SSPDown")
})
})

Context("DeprecatedRHEL6Vm Alert", func() {
var (
vm *kubevirtv1.VirtualMachine
vmi *kubevirtv1.VirtualMachineInstance
)

BeforeEach(func() {
vmi = NewRandomVMIWithBridgeInterface(strategy.GetNamespace())
vm = NewVirtualMachine(vmi)
vm.ObjectMeta.Labels = map[string]string{
"vm.kubevirt.io/template": "rhel6-desktop-large",
}
vm.Spec.Running = pointer.Bool(true)
eventuallyCreateVm(vm)
})

AfterEach(func() {
Expect(apiClient.Delete(ctx, vm)).ToNot(HaveOccurred(), "Failed to delete vm: %s", vm.Name)
})

It("Should fire the DeprecatedRHEL6Vm alert if there is a rhel6 running vm", func() {
waitForAlertToActivate(metrics.Rhel6AlertName)
})

It("Should deactivate the DeprecatedRHEL6Vm alert if the rhel6 running vm is stopped", func() {
waitForAlertToActivate(metrics.Rhel6AlertName)
Eventually(func() error {
foundVm := &kubevirtv1.VirtualMachine{}
err := apiClient.Get(ctx, client.ObjectKeyFromObject(vm), foundVm)
Expect(err).ToNot(HaveOccurred())
foundVm.Spec.Running = pointer.Bool(false)
return apiClient.Update(ctx, foundVm)
}, env.Timeout(), time.Second).Should(Succeed())

waitForAlertToDeactivate(metrics.Rhel6AlertName)
})
})
})

func waitForAlertToActivate(alertName string) {
Expand All @@ -165,6 +205,15 @@ func waitForAlertToActivate(alertName string) {
}, env.Timeout(), time.Second).ShouldNot(BeNil())
}

func waitForAlertToDeactivate(alertName string) {
Eventually(func() *promApiv1.Alert {
alerts, err := getPrometheusClient().Alerts(context.TODO())
Expect(err).ShouldNot(HaveOccurred())
alert := getAlertByName(alerts, alertName)
return alert
}, env.Timeout(), time.Second).Should(BeNil())
}

func waitForSeriesToBeDetected(seriesName string) {
Eventually(func() bool {
results, _, err := getPrometheusClient().Query(context.TODO(), seriesName, time.Now())
Expand Down

0 comments on commit 253fb74

Please sign in to comment.