diff --git a/bundle/manifests/monitoring.rhobs_monitoringstacks.yaml b/bundle/manifests/monitoring.rhobs_monitoringstacks.yaml index 278a94575..d308979b8 100644 --- a/bundle/manifests/monitoring.rhobs_monitoringstacks.yaml +++ b/bundle/manifests/monitoring.rhobs_monitoringstacks.yaml @@ -53,6 +53,53 @@ spec: - info - warning type: string + namespaceSelector: + description: Namespace selector for Monitoring Stack Resources. If + left empty the Monitoring Stack will only match resources in the + namespace it was created in. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that + contains values, a key, and an operator that relates the key + and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to + a set of values. Valid operators are In, NotIn, Exists + and DoesNotExist. + type: string + values: + description: values is an array of string values. If the + operator is In or NotIn, the values array must be non-empty. + If the operator is Exists or DoesNotExist, the values + array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator + is "In", and the values array contains only "value". The requirements + are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic prometheusConfig: default: replicas: 2 diff --git a/bundle/manifests/observability-operator.clusterserviceversion.yaml b/bundle/manifests/observability-operator.clusterserviceversion.yaml index 26bae4d0c..20212db65 100644 --- a/bundle/manifests/observability-operator.clusterserviceversion.yaml +++ b/bundle/manifests/observability-operator.clusterserviceversion.yaml @@ -17,7 +17,7 @@ metadata: "logLevel": "debug", "resourceSelector": { "matchLabels": { - "system": "foo" + "app": "demo" } }, "retention": "1d" @@ -328,6 +328,8 @@ spec: - apiGroups: - rbac.authorization.k8s.io resources: + - clusterrolebindings + - clusterroles - rolebindings - roles verbs: diff --git a/deploy/crds/common/monitoring.rhobs_monitoringstacks.yaml b/deploy/crds/common/monitoring.rhobs_monitoringstacks.yaml index 588cbce31..153063814 100644 --- a/deploy/crds/common/monitoring.rhobs_monitoringstacks.yaml +++ b/deploy/crds/common/monitoring.rhobs_monitoringstacks.yaml @@ -54,6 +54,53 @@ spec: - info - warning type: string + namespaceSelector: + description: Namespace selector for Monitoring Stack Resources. If + left empty the Monitoring Stack will only match resources in the + namespace it was created in. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that + contains values, a key, and an operator that relates the key + and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to + a set of values. Valid operators are In, NotIn, Exists + and DoesNotExist. + type: string + values: + description: values is an array of string values. If the + operator is In or NotIn, the values array must be non-empty. + If the operator is Exists or DoesNotExist, the values + array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator + is "In", and the values array contains only "value". The requirements + are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic prometheusConfig: default: replicas: 2 diff --git a/deploy/operator/observability-operator-cluster-role.yaml b/deploy/operator/observability-operator-cluster-role.yaml index 3a03f66bd..cad4c79c8 100644 --- a/deploy/operator/observability-operator-cluster-role.yaml +++ b/deploy/operator/observability-operator-cluster-role.yaml @@ -148,6 +148,8 @@ rules: - apiGroups: - rbac.authorization.k8s.io resources: + - clusterrolebindings + - clusterroles - rolebindings - roles verbs: diff --git a/deploy/samples/monitoring-stack.yaml b/deploy/samples/monitoring-stack.yaml index 8801cd60b..e285cfeed 100644 --- a/deploy/samples/monitoring-stack.yaml +++ b/deploy/samples/monitoring-stack.yaml @@ -9,4 +9,4 @@ spec: retention: 1d resourceSelector: matchLabels: - system: foo + app: demo diff --git a/deploy/samples/multi-ns-stack.yaml b/deploy/samples/multi-ns-stack.yaml new file mode 100644 index 000000000..9046c2e73 --- /dev/null +++ b/deploy/samples/multi-ns-stack.yaml @@ -0,0 +1,15 @@ +apiVersion: monitoring.rhobs/v1alpha1 +kind: MonitoringStack +metadata: + name: multi-ns + labels: + thanos-querier: the-querier +spec: + logLevel: debug + retention: 2h + resourceSelector: + matchLabels: + app: demo + namespaceSelector: + matchLabels: + monitoring.rhobs/stack: multi-ns diff --git a/docs/api.md b/docs/api.md index 55db3488c..994df8028 100644 --- a/docs/api.md +++ b/docs/api.md @@ -104,6 +104,13 @@ MonitoringStackSpec is the specification for desired Monitoring Stack Default: info
false + + namespaceSelector + object + + Namespace selector for Monitoring Stack Resources. If left empty the Monitoring Stack will only match resources in the namespace it was created in.
+ + false prometheusConfig object @@ -171,6 +178,81 @@ Define Alertmanager config +### MonitoringStack.spec.namespaceSelector +[↩ Parent](#monitoringstackspec) + + + +Namespace selector for Monitoring Stack Resources. If left empty the Monitoring Stack will only match resources in the namespace it was created in. + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
matchExpressions[]object + matchExpressions is a list of label selector requirements. The requirements are ANDed.
+
false
matchLabelsmap[string]string + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed.
+
false
+ + +### MonitoringStack.spec.namespaceSelector.matchExpressions[index] +[↩ Parent](#monitoringstackspecnamespaceselector) + + + +A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
keystring + key is the label key that the selector applies to.
+
true
operatorstring + operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist.
+
true
values[]string + values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch.
+
false
+ + ### MonitoringStack.spec.prometheusConfig [↩ Parent](#monitoringstackspec) diff --git a/pkg/apis/monitoring/v1alpha1/types.go b/pkg/apis/monitoring/v1alpha1/types.go index 8c45d8bbb..3a85e48db 100644 --- a/pkg/apis/monitoring/v1alpha1/types.go +++ b/pkg/apis/monitoring/v1alpha1/types.go @@ -58,6 +58,11 @@ type MonitoringStackSpec struct { // +optional ResourceSelector *metav1.LabelSelector `json:"resourceSelector,omitempty"` + // Namespace selector for Monitoring Stack Resources. + // If left empty the Monitoring Stack will only match resources in the namespace it was created in. + // +optional + NamespaceSelector *metav1.LabelSelector `json:"namespaceSelector,omitempty"` + // Time duration to retain data for. Default is '120h', // and must match the regular expression `[0-9]+(ms|s|m|h|d|w|y)` (milliseconds seconds minutes hours days weeks years). // +kubebuilder:default="120h" diff --git a/pkg/apis/monitoring/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/monitoring/v1alpha1/zz_generated.deepcopy.go index 80fb079c2..4c649c74e 100644 --- a/pkg/apis/monitoring/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/monitoring/v1alpha1/zz_generated.deepcopy.go @@ -126,6 +126,11 @@ func (in *MonitoringStackSpec) DeepCopyInto(out *MonitoringStackSpec) { *out = new(v1.LabelSelector) (*in).DeepCopyInto(*out) } + if in.NamespaceSelector != nil { + in, out := &in.NamespaceSelector, &out.NamespaceSelector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } in.Resources.DeepCopyInto(&out.Resources) if in.PrometheusConfig != nil { in, out := &in.PrometheusConfig, &out.PrometheusConfig diff --git a/pkg/controllers/monitoring/monitoring-stack/alertmanager.go b/pkg/controllers/monitoring/monitoring-stack/alertmanager.go index b8a13ada0..9e81cbf41 100644 --- a/pkg/controllers/monitoring/monitoring-stack/alertmanager.go +++ b/pkg/controllers/monitoring/monitoring-stack/alertmanager.go @@ -72,6 +72,7 @@ func newAlertmanager( RunAsNonRoot: pointer.Bool(true), RunAsUser: pointer.Int64(AlertmanagerUserFSGroupID), }, + AlertmanagerConfigNamespaceSelector: ms.Spec.NamespaceSelector, }, } } @@ -127,23 +128,21 @@ func newAlertmanagerPDB(ms *stack.MonitoringStack, instanceSelectorKey string, i } } -func newAlertManagerRole(ms *stack.MonitoringStack, rbacResourceName string, rbacVerbs []string) *rbacv1.Role { - return &rbacv1.Role{ +func newAlertManagerClusterRole(ms *stack.MonitoringStack, rbacResourceName string, rbacVerbs []string) *rbacv1.ClusterRole { + return &rbacv1.ClusterRole{ TypeMeta: metav1.TypeMeta{ APIVersion: rbacv1.SchemeGroupVersion.String(), - Kind: "Role", + Kind: "ClusterRole", }, ObjectMeta: metav1.ObjectMeta{ Name: rbacResourceName, Namespace: ms.Namespace, }, - Rules: []rbacv1.PolicyRule{ - { - APIGroups: []string{"security.openshift.io"}, - Resources: []string{"securitycontextconstraints"}, - ResourceNames: []string{"nonroot", "nonroot-v2"}, - Verbs: []string{"use"}, - }, - }, + Rules: []rbacv1.PolicyRule{{ + APIGroups: []string{"security.openshift.io"}, + Resources: []string{"securitycontextconstraints"}, + ResourceNames: []string{"nonroot", "nonroot-v2"}, + Verbs: []string{"use"}, + }}, } } diff --git a/pkg/controllers/monitoring/monitoring-stack/components.go b/pkg/controllers/monitoring/monitoring-stack/components.go index cbe128e10..508863133 100644 --- a/pkg/controllers/monitoring/monitoring-stack/components.go +++ b/pkg/controllers/monitoring/monitoring-stack/components.go @@ -23,62 +23,67 @@ const PrometheusUserFSGroupID = 65534 const AlertmanagerUserFSGroupID = 65535 func stackComponentReconcilers(ms *stack.MonitoringStack, instanceSelectorKey string, instanceSelectorValue string) []reconciler.Reconciler { - prometheusRBACResourceName := ms.Name + "-prometheus" - alertmanagerRBACResourceName := ms.Name + "-alertmanager" + prometheusName := ms.Name + "-prometheus" + alertmanagerName := ms.Name + "-alertmanager" rbacVerbs := []string{"get", "list", "watch"} additionalScrapeConfigsSecretName := ms.Name + "-prometheus-additional-scrape-configs" + hasNsSelector := ms.Spec.NamespaceSelector != nil + deployAlertmanager := !ms.Spec.AlertmanagerConfig.Disabled + return []reconciler.Reconciler{ - reconciler.NewUpdater(newServiceAccount(prometheusRBACResourceName, ms.Namespace), ms), - reconciler.NewUpdater(newPrometheusRole(ms, prometheusRBACResourceName, rbacVerbs), ms), - reconciler.NewUpdater(newRoleBinding(ms, prometheusRBACResourceName), ms), + // Prometheus Deployment + reconciler.NewUpdater(newServiceAccount(prometheusName, ms.Namespace), ms), + reconciler.NewUpdater(newPrometheusClusterRole(ms, prometheusName, rbacVerbs), ms), reconciler.NewUpdater(newAdditionalScrapeConfigsSecret(ms, additionalScrapeConfigsSecretName), ms), - reconciler.NewUpdater(newServiceAccount(alertmanagerRBACResourceName, ms.Namespace), ms), - reconciler.NewOptionalUpdater(newAlertManagerRole(ms, alertmanagerRBACResourceName, rbacVerbs), ms, - !ms.Spec.AlertmanagerConfig.Disabled), - reconciler.NewOptionalUpdater(newRoleBinding(ms, alertmanagerRBACResourceName), ms, - !ms.Spec.AlertmanagerConfig.Disabled), - reconciler.NewOptionalUpdater(newAlertmanager(ms, alertmanagerRBACResourceName, instanceSelectorKey, instanceSelectorValue), ms, - !ms.Spec.AlertmanagerConfig.Disabled), - reconciler.NewOptionalUpdater(newAlertmanagerService(ms, instanceSelectorKey, instanceSelectorValue), ms, - !ms.Spec.AlertmanagerConfig.Disabled), - reconciler.NewOptionalUpdater(newAlertmanagerPDB(ms, instanceSelectorKey, instanceSelectorValue), ms, - !ms.Spec.AlertmanagerConfig.Disabled), - reconciler.NewUpdater(newPrometheus(ms, prometheusRBACResourceName, additionalScrapeConfigsSecretName, instanceSelectorKey, instanceSelectorValue), ms), + reconciler.NewUpdater(newPrometheus(ms, prometheusName, + additionalScrapeConfigsSecretName, + instanceSelectorKey, instanceSelectorValue), ms), reconciler.NewUpdater(newPrometheusService(ms, instanceSelectorKey, instanceSelectorValue), ms), reconciler.NewUpdater(newThanosSidecarService(ms, instanceSelectorKey, instanceSelectorValue), ms), reconciler.NewOptionalUpdater(newPrometheusPDB(ms, instanceSelectorKey, instanceSelectorValue), ms, *ms.Spec.PrometheusConfig.Replicas > 1), + + // Alertmanager Deployment + reconciler.NewOptionalUpdater(newServiceAccount(alertmanagerName, ms.Namespace), ms, deployAlertmanager), + // create clusterrolebinding if nsSelector's present otherwise a rolebinding + reconciler.NewOptionalUpdater(newClusterRoleBinding(ms, prometheusName), ms, hasNsSelector), + reconciler.NewOptionalUpdater(newRoleBindingForClusterRole(ms, prometheusName), ms, !hasNsSelector), + + reconciler.NewOptionalUpdater(newAlertManagerClusterRole(ms, alertmanagerName, rbacVerbs), ms, deployAlertmanager), + + // create clusterrolebinding if alertmanager is enabled and namespace selector is also present in MonitoringStack + reconciler.NewOptionalUpdater(newClusterRoleBinding(ms, alertmanagerName), ms, deployAlertmanager && hasNsSelector), + reconciler.NewOptionalUpdater(newRoleBindingForClusterRole(ms, alertmanagerName), ms, deployAlertmanager && !hasNsSelector), + + reconciler.NewOptionalUpdater(newAlertmanager(ms, alertmanagerName, instanceSelectorKey, instanceSelectorValue), ms, deployAlertmanager), + reconciler.NewOptionalUpdater(newAlertmanagerService(ms, instanceSelectorKey, instanceSelectorValue), ms, deployAlertmanager), + reconciler.NewOptionalUpdater(newAlertmanagerPDB(ms, instanceSelectorKey, instanceSelectorValue), ms, deployAlertmanager), } } -func newPrometheusRole(ms *stack.MonitoringStack, rbacResourceName string, rbacVerbs []string) *rbacv1.Role { - return &rbacv1.Role{ +func newPrometheusClusterRole(ms *stack.MonitoringStack, rbacResourceName string, rbacVerbs []string) *rbacv1.ClusterRole { + return &rbacv1.ClusterRole{ TypeMeta: metav1.TypeMeta{ APIVersion: rbacv1.SchemeGroupVersion.String(), - Kind: "Role", + Kind: "ClusterRole", }, ObjectMeta: metav1.ObjectMeta{ - Name: rbacResourceName, - Namespace: ms.Namespace, - }, - Rules: []rbacv1.PolicyRule{ - { - APIGroups: []string{""}, - Resources: []string{"services", "endpoints", "pods"}, - Verbs: rbacVerbs, - }, - { - APIGroups: []string{"extensions", "networking.k8s.io"}, - Resources: []string{"ingresses"}, - Verbs: rbacVerbs, - }, - { - APIGroups: []string{"security.openshift.io"}, - Resources: []string{"securitycontextconstraints"}, - ResourceNames: []string{"nonroot", "nonroot-v2"}, - Verbs: []string{"use"}, - }, + Name: rbacResourceName, }, + Rules: []rbacv1.PolicyRule{{ + APIGroups: []string{""}, + Resources: []string{"services", "endpoints", "pods"}, + Verbs: rbacVerbs, + }, { + APIGroups: []string{"extensions", "networking.k8s.io"}, + Resources: []string{"ingresses"}, + Verbs: rbacVerbs, + }, { + APIGroups: []string{"security.openshift.io"}, + Resources: []string{"securitycontextconstraints"}, + ResourceNames: []string{"nonroot", "nonroot-v2"}, + Verbs: []string{"use"}, + }}, } } @@ -136,9 +141,9 @@ func newPrometheus( ServiceAccountName: rbacResourceName, ServiceMonitorSelector: prometheusSelector, - ServiceMonitorNamespaceSelector: nil, + ServiceMonitorNamespaceSelector: ms.Spec.NamespaceSelector, PodMonitorSelector: prometheusSelector, - PodMonitorNamespaceSelector: nil, + PodMonitorNamespaceSelector: ms.Spec.NamespaceSelector, Affinity: &corev1.Affinity{ PodAntiAffinity: &corev1.PodAntiAffinity{ RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ @@ -173,7 +178,7 @@ func newPrometheus( }, Retention: ms.Spec.Retention, RuleSelector: prometheusSelector, - RuleNamespaceSelector: nil, + RuleNamespaceSelector: ms.Spec.NamespaceSelector, Thanos: &monv1.ThanosSpec{ BaseImage: stringPtr("quay.io/thanos/thanos"), Version: stringPtr("v0.24.0"), @@ -214,7 +219,7 @@ func storageForPVC(pvc *corev1.PersistentVolumeClaimSpec) *monv1.StorageSpec { } } -func newRoleBinding(ms *stack.MonitoringStack, rbacResourceName string) *rbacv1.RoleBinding { +func newRoleBindingForClusterRole(ms *stack.MonitoringStack, rbacResourceName string) *rbacv1.RoleBinding { roleBinding := &rbacv1.RoleBinding{ TypeMeta: metav1.TypeMeta{ APIVersion: rbacv1.SchemeGroupVersion.String(), @@ -224,17 +229,39 @@ func newRoleBinding(ms *stack.MonitoringStack, rbacResourceName string) *rbacv1. Name: rbacResourceName, Namespace: ms.Namespace, }, - Subjects: []rbacv1.Subject{ - { - APIGroup: corev1.SchemeGroupVersion.Group, - Kind: "ServiceAccount", - Name: rbacResourceName, - Namespace: ms.Namespace, - }, + Subjects: []rbacv1.Subject{{ + APIGroup: corev1.SchemeGroupVersion.Group, + Kind: "ServiceAccount", + Name: rbacResourceName, + Namespace: ms.Namespace, + }}, + RoleRef: rbacv1.RoleRef{ + APIGroup: rbacv1.SchemeGroupVersion.Group, + Kind: "ClusterRole", + Name: rbacResourceName, + }, + } + return roleBinding +} + +func newClusterRoleBinding(ms *stack.MonitoringStack, rbacResourceName string) *rbacv1.ClusterRoleBinding { + roleBinding := &rbacv1.ClusterRoleBinding{ + TypeMeta: metav1.TypeMeta{ + APIVersion: rbacv1.SchemeGroupVersion.String(), + Kind: "ClusterRoleBinding", }, + ObjectMeta: metav1.ObjectMeta{ + Name: rbacResourceName, + }, + Subjects: []rbacv1.Subject{{ + APIGroup: corev1.SchemeGroupVersion.Group, + Kind: "ServiceAccount", + Name: rbacResourceName, + Namespace: ms.Namespace, + }}, RoleRef: rbacv1.RoleRef{ APIGroup: rbacv1.SchemeGroupVersion.Group, - Kind: "Role", + Kind: "ClusterRole", Name: rbacResourceName, }, } diff --git a/pkg/controllers/monitoring/monitoring-stack/controller.go b/pkg/controllers/monitoring/monitoring-stack/controller.go index a74f79afb..4c80d98fb 100644 --- a/pkg/controllers/monitoring/monitoring-stack/controller.go +++ b/pkg/controllers/monitoring/monitoring-stack/controller.go @@ -63,7 +63,7 @@ type Options struct { // RBAC for managing Prometheus Operator CRs //+kubebuilder:rbac:groups=monitoring.rhobs,resources=alertmanagers;prometheuses;servicemonitors,verbs=list;watch;create;update;delete;patch -//+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=roles;rolebindings,verbs=list;watch;create;update;delete;patch +//+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=roles;rolebindings;clusterroles;clusterrolebindings,verbs=list;watch;create;update;delete;patch //+kubebuilder:rbac:groups="",resources=serviceaccounts;services;secrets,verbs=list;watch;create;update;delete;patch //+kubebuilder:rbac:groups="policy",resources=poddisruptionbudgets,verbs=list;watch;create;update;delete;patch diff --git a/test/e2e/monitoring_stack_controller_test.go b/test/e2e/monitoring_stack_controller_test.go index f0c45dabb..4ccbb222c 100644 --- a/test/e2e/monitoring_stack_controller_test.go +++ b/test/e2e/monitoring_stack_controller_test.go @@ -45,62 +45,64 @@ func assertCRDExists(t *testing.T, crds ...string) { func TestMonitoringStackController(t *testing.T) { assertCRDExists(t, "prometheuses.monitoring.rhobs", + "alertmanagers.monitoring.rhobs", + "podmonitors.monitoring.rhobs", + "servicemonitors.monitoring.rhobs", "monitoringstacks.monitoring.rhobs", ) - ts := []testCase{ - { - name: "Defaults are applied to Monitoring CR", - scenario: promConfigDefaultsAreApplied, - }, { - name: "Empty stack spec must create a Prometheus", - scenario: emptyStackCreatesPrometheus, - }, { - name: "stack spec are reflected in Prometheus", - scenario: reconcileStack, - }, { - name: "invalid loglevels are rejected", - scenario: validateStackLogLevel, - }, { - name: "invalid retention is rejected", - scenario: validateStackRetention, - }, { - name: "Controller reverts back changes to Prometheus", - scenario: reconcileRevertsManualChanges, - }, { - name: "single prometheus replica has no pdb", - scenario: singlePrometheusReplicaHasNoPDB, - }, { - name: "Prometheus stacks can scrape themselves", - scenario: assertPrometheusScrapesItself, - }, { - name: "Alertmanager receives alerts from the Prometheus instance", - scenario: assertAlertmanagerReceivesAlerts, - }, { - name: "Alertmanager runs in HA mode", - scenario: func(t *testing.T) { - stackName := "alerting" - assertAlertmanagerCreated(t, stackName) - pods, err := f.GetStatefulSetPods("alertmanager-"+stackName, e2eTestNamespace) - if err != nil { - t.Fatal(err) - } - assertAlertmanagersAreOnDifferentNodes(t, pods) - assertAlertmanagersAreResilientToDisruption(t, pods) - }, - }, { - name: "Alertmanager disabled", - scenario: assertAlertmanagerNotDeployed, - }, - { - name: "Alertmanager deployed and removed", - scenario: assertAlertmanagerDeployedAndRemoved, - }, - { - name: "invalid Prometheus replicas numbers", - scenario: validatePrometheusConfig, + ts := []testCase{{ + name: "Defaults are applied to Monitoring CR", + scenario: promConfigDefaultsAreApplied, + }, { + name: "Empty stack spec must create a Prometheus", + scenario: emptyStackCreatesPrometheus, + }, { + name: "stack spec are reflected in Prometheus", + scenario: reconcileStack, + }, { + name: "invalid loglevels are rejected", + scenario: validateStackLogLevel, + }, { + name: "invalid retention is rejected", + scenario: validateStackRetention, + }, { + name: "Controller reverts back changes to Prometheus", + scenario: reconcileRevertsManualChanges, + }, { + name: "single prometheus replica has no pdb", + scenario: singlePrometheusReplicaHasNoPDB, + }, { + name: "Prometheus stacks can scrape themselves", + scenario: assertPrometheusScrapesItself, + }, { + name: "Alertmanager receives alerts from the Prometheus instance", + scenario: assertAlertmanagerReceivesAlerts, + }, { + name: "Alertmanager runs in HA mode", + scenario: func(t *testing.T) { + stackName := "alerting" + assertAlertmanagerCreated(t, stackName) + pods, err := f.GetStatefulSetPods("alertmanager-"+stackName, e2eTestNamespace) + if err != nil { + t.Fatal(err) + } + assertAlertmanagersAreOnDifferentNodes(t, pods) + assertAlertmanagersAreResilientToDisruption(t, pods) }, - } + }, { + name: "invalid Prometheus replicas numbers", + scenario: validatePrometheusConfig, + }, { + name: "Alertmanager disabled", + scenario: assertAlertmanagerNotDeployed, + }, { + name: "Alertmanager deployed and removed", + scenario: assertAlertmanagerDeployedAndRemoved, + }, { + name: "Verify multi-namespace support", + scenario: namespaceSelectorTest, + }} for _, tc := range ts { t.Run(tc.name, tc.scenario) @@ -568,15 +570,28 @@ func newAlerts(t *testing.T) *monv1.PrometheusRule { return rule } -func newMonitoringStack(t *testing.T, name string, options ...func(*stack.MonitoringStack)) *stack.MonitoringStack { +type stackModifier func(*stack.MonitoringStack) + +func msResourceSelector(labels map[string]string) stackModifier { + return func(ms *stack.MonitoringStack) { + ms.Spec.ResourceSelector = &metav1.LabelSelector{MatchLabels: labels} + } +} +func msNamespaceSelector(labels map[string]string) stackModifier { + return func(ms *stack.MonitoringStack) { + ms.Spec.NamespaceSelector = &metav1.LabelSelector{MatchLabels: labels} + } +} + +func newMonitoringStack(t *testing.T, name string, mods ...stackModifier) *stack.MonitoringStack { ms := &stack.MonitoringStack{ ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: e2eTestNamespace, }, } - for _, opt := range options { - opt(ms) + for _, mod := range mods { + mod(ms) } f.CleanUp(t, func() { f.K8sClient.Delete(context.Background(), ms) @@ -594,3 +609,182 @@ func waitForStackDeletion(name string) error { return errors.IsNotFound(err), nil }) } + +// tests if a stack with a namespace selector is able to monitor +// resources from multiple namespaces +func namespaceSelectorTest(t *testing.T) { + // as a convention, add labels to ns to indicate the stack responsible for + // monitoring the namespaces + // while resourceSelector uses both stack and an app label + stackName := "multi-ns" + nsLabels := map[string]string{"monitoring.rhobs/stack": stackName} + resourceLabels := map[string]string{ + "monitoring.rhobs/stack": stackName, + "app": "demo", + } + + ms := newMonitoringStack(t, stackName, + msResourceSelector(resourceLabels), + msNamespaceSelector(nsLabels)) + + err := f.K8sClient.Create(context.Background(), ms) + assert.NilError(t, err, "failed to create a monitoring stack") + + namespaces := []string{"test-ns-1", "test-ns-2", "test-ns-3"} + + for _, ns := range namespaces { + err := deployDemoApp(t, ns, nsLabels, resourceLabels) + assert.NilError(t, err, "%s: deploying demo app failed", ns) + } + + stopChan := make(chan struct{}) + defer close(stopChan) + if pollErr := wait.Poll(5*time.Second, 2*time.Minute, func() (bool, error) { + err := f.StartServicePortForward(ms.Name+"-prometheus", e2eTestNamespace, "9090", stopChan) + return err == nil, nil + }); pollErr != nil { + t.Fatal(pollErr) + } + + promClient := framework.NewPrometheusClient("http://localhost:9090") + if pollErr := wait.Poll(5*time.Second, 5*time.Minute, func() (bool, error) { + query := `prometheus_build_info{namespace=~"test-ns-.*"}` + result, err := promClient.Query(query) + if err != nil { + return false, nil + } + + if len(result.Data.Result) != len(namespaces) { + return false, nil + } + + return true, nil + }); pollErr != nil { + t.Fatal(pollErr) + } +} + +// Deploys a prometheus instance and a service pointing to the prometheus's port - 9090 +// and a service-monitor to nsName namespace. nsLabels are applied to the namespace +// so that it can be monitored. resourceLabels are applied to the service monitor +func deployDemoApp(t *testing.T, nsName string, nsLabels, resourceLabels map[string]string) error { + + ns := newNamespace(t, nsName) + ns.SetLabels(nsLabels) + if err := f.K8sClient.Create(context.Background(), ns); err != nil { + return fmt.Errorf("failed to create namespace %s: %w", ns, err) + } + + // deploy a pod, service, service-monitor into that namespace + prom := newPrometheusPod(t, "prometheus", ns.Name) + if err := f.K8sClient.Create(context.Background(), prom); err != nil { + return fmt.Errorf("failed to create demo app %s/%s: %w", nsName, prom.Name, err) + } + + svcLabels := map[string]string{ + "app.kubernetes.io/name": prom.Name, + "app.kubernetes.io/part-of": "prometheus", + } + svc := newService(t, prom.Name, ns.Name, svcLabels, prom.Labels) + // these are prometheus ports + svc.Spec.Ports = []corev1.ServicePort{{ + Name: "metrics", + Port: 9090, + TargetPort: intstr.FromInt(9090), + }} + + if err := f.K8sClient.Create(context.Background(), svc); err != nil { + return fmt.Errorf("failed to create service for demo app %s/%s: %w", nsName, svc.Name, err) + } + + svcMon := newServiceMonitor(t, ns.Name, "prometheus", resourceLabels, svcLabels, "metrics") + if err := f.K8sClient.Create(context.Background(), svcMon); err != nil { + return fmt.Errorf("failed to create servicemonitor for demo service %s/%s: %w", nsName, svcMon.Name, err) + } + return nil +} + +func newServiceMonitor(t *testing.T, ns, name string, stackSelector, serviceSelector map[string]string, endpoint string) *monv1.ServiceMonitor { + svcMon := &monv1.ServiceMonitor{ + TypeMeta: metav1.TypeMeta{ + APIVersion: monv1.SchemeGroupVersion.String(), + Kind: "ServiceMonitor", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: ns, + Labels: stackSelector, + }, + Spec: monv1.ServiceMonitorSpec{ + Selector: metav1.LabelSelector{MatchLabels: serviceSelector}, + Endpoints: []monv1.Endpoint{{Port: endpoint}}, + }, + } + f.CleanUp(t, func() { f.K8sClient.Delete(context.Background(), svcMon) }) + return svcMon +} + +func newNamespace(t *testing.T, name string) *corev1.Namespace { + ns := &corev1.Namespace{ + TypeMeta: metav1.TypeMeta{ + APIVersion: corev1.SchemeGroupVersion.String(), + Kind: "Namespace", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + } + + f.CleanUp(t, func() { f.K8sClient.Delete(context.Background(), ns) }) + return ns +} + +func newService(t *testing.T, name, namespace string, labels, selector map[string]string) *corev1.Service { + svc := &corev1.Service{ + TypeMeta: metav1.TypeMeta{ + APIVersion: corev1.SchemeGroupVersion.String(), + Kind: "Service", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Labels: labels, + }, + Spec: corev1.ServiceSpec{ + Selector: selector, + }, + } + + f.CleanUp(t, func() { f.K8sClient.Delete(context.Background(), svc) }) + return svc +} + +func newPrometheusPod(t *testing.T, name, ns string) *corev1.Pod { + pod := &corev1.Pod{ + TypeMeta: metav1.TypeMeta{ + APIVersion: corev1.SchemeGroupVersion.String(), + Kind: "Pod", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: ns, + Labels: map[string]string{ + "app.kubernetes.io/name": "prometheus", + "app.kubernetes.io/version": "2.39.1", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "prometheus", + Image: "quay.io/prometheus/prometheus:v2.39.1", + Ports: []corev1.ContainerPort{{ + Name: "metrics", + ContainerPort: 9090, + }}, + }}, + }, + } + + f.CleanUp(t, func() { f.K8sClient.Delete(context.Background(), pod) }) + return pod +}