Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(alerting): magic alerting severities #127

Merged
merged 13 commits into from
Dec 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions .github/release-drafter.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
category-template: '## $TITLE'
name-template: 'v$RESOLVED_VERSION'
tag-template: 'v$RESOLVED_VERSION'
tag-prefix: ''
version-template: $MAJOR.$MINOR.$PATCH
change-template: '* $TITLE (#$NUMBER) by @$AUTHOR'
change-title-escapes: ''
no-changes-template: 'No changes were made in this version. Stay tuned for upcoming updates!'
categories:
- title: '⚡ Breaking Changes'
labels:
- 'breaking-change'
- title: '🌟 New Features'
labels:
- 'feature'
- title: '🔧 Improvements'
labels:
- 'enhancement'
- title: '📜 Documentation Updates'
labels:
- 'documentation'
- title: '🐛 Bug Fixes'
labels:
- 'bug'
- title: '🚒 Deprecations'
labels:
- 'deprecation'
- title: '🔧 Maintenance'
labels:
- 'chore'
- title: '📦 Dependency Updates'
collapse-after: 10
labels:
- 'dependencies'
version-resolver:
major:
labels:
- 'major'
- 'breaking-change'
minor:
labels:
- 'minor'
- 'feature'
- 'enhancement'
- 'deprecation'
patch:
labels:
- 'patch'
- 'documentation'
- 'bug'
- 'bugfix'
- 'fix'
- 'chore'
- 'internal'
- 'dependencies'
default: patch
autolabeler:
- label: 'breaking-change'
title:
- '/.*!:.*/'
- label: 'feature'
title:
- '/feat.*: /i'
- label: 'bug'
title:
- '/fix.*: /i'
- '/bug.*: /i'
- label: 'dependencies'
branch:
- '/dependabot\/.*/'
- label: 'documentation'
files:
- '*.md'
- label: 'chore'
files:
- '*.md'
exclude-labels:
- 'skip-changelog'
template: |
## Summary

**[Human readable summary of changes]**

## Changes

$CHANGES

## This release was made possible by the following contributors:

$CONTRIBUTORS
41 changes: 41 additions & 0 deletions .github/workflows/release-drafter.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: Release Drafter

on:
push:
# branches to consider in the event; optional, defaults to all
branches:
- main
# pull_request event is required only for autolabeler
pull_request:
# Only following types are handled by the action, but one can default to all as well
types: [opened, reopened, synchronize]
# pull_request_target event is required for autolabeler to support PRs from forks
# pull_request_target:
# types: [opened, reopened, synchronize]

permissions:
contents: read

jobs:
update_release_draft:
permissions:
# write permission is required to create a github release
contents: write
# write permission is required for autolabeler
# otherwise, read permission is required at least
pull-requests: write
runs-on: ubuntu-latest
steps:
# (Optional) GitHub Enterprise requires GHE_HOST variable set
#- name: Set GHE_HOST
# run: |
# echo "GHE_HOST=${GITHUB_SERVER_URL##https:\/\/}" >> $GITHUB_ENV

# Drafts your next Release notes as Pull Requests are merged into "master"
- uses: release-drafter/release-drafter@v6
# (Optional) specify config name to use, relative to .github/. Default: release-drafter.yml
# with:
# config-name: my-config.yml
# disable-autolabeler: true
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
12 changes: 10 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,17 @@ $(ENVTEST): $(LOCALBIN)

.PHONY: deploydev
deploydev:
@$(KUBECTL) apply -R -f devel/
@$(KUBECTL) apply -k devel/
@echo "Waiting for services to come online for the port-forwards..."
@sleep 5
@until [ "$$($(KUBECTL) get pods -l app=grafana -o jsonpath='{.items}')}" != "[]" ] && \
[ "$$($(KUBECTL) get pods -l app=grafana -o jsonpath='{.items[0].status.containerStatuses[0].ready}')" == "true" ]; do \
echo "Waiting for Grafana to be ready..." && sleep 2; \
done
@until [ "$$($(KUBECTL) get pods -l app=mimir -o jsonpath='{.items}')}" != "[]" ] && \
[ "$$($(KUBECTL) get pods -l app=mimir -o jsonpath='{.items[0].status.containerStatuses[0].ready}')" == "true" ]; do \
echo "Waiting for Mimir to be ready..." && sleep 2; \
done
@echo "Services are ready. Setting up port-forwards..."
@$(KUBECTL) port-forward svc/grafana 3000:3000 > /dev/null 2>&1 &
@$(KUBECTL) port-forward svc/mimir-service 9009:9009 >/dev/null 2>&1 &
@echo "Port-forwards activated. Reach Grafana on port 3000 and Mimir on port 9009."
Expand Down
2 changes: 1 addition & 1 deletion api/osko/v1alpha1/alertmanagerconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (

// AlertManagerConfigSpec defines the desired state of AlertManagerConfig
type AlertManagerConfigSpec struct {
SecretRef v1.SecretReference `json:"secretRef,omitempty"`
ConfigSecretRef v1.SecretReference `json:"configSecretRef,omitempty"`
}

// AlertManagerConfigStatus defines the observed state of AlertManagerConfig
Expand Down
15 changes: 8 additions & 7 deletions api/osko/v1alpha1/mimirrule_types.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package v1alpha1

import (
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"github.com/prometheus/common/model"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
Expand Down Expand Up @@ -33,13 +34,13 @@ type RuleGroup struct {
}

type Rule struct {
Record string `json:"record,omitempty"`
Alert string `json:"alert,omitempty"`
Expr string `json:"expr"`
For model.Duration `json:"for,omitempty"`
KeepFiringFor model.Duration `json:"keep_firing_for,omitempty"`
Labels map[string]string `json:"labels,omitempty"`
Annotations map[string]string `json:"annotations,omitempty"`
Record string `json:"record,omitempty"`
Alert string `json:"alert,omitempty"`
Expr string `json:"expr"`
For *monitoringv1.Duration `json:"for,omitempty"`
KeepFiringFor model.Duration `json:"keep_firing_for,omitempty"`
Labels map[string]string `json:"labels,omitempty"`
Annotations map[string]string `json:"annotations,omitempty"`
}

//+kubebuilder:object:root=true
Expand Down
8 changes: 7 additions & 1 deletion api/osko/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func main() {
opts.BindFlags(flag.CommandLine)

flag.Parse()
cfg := config.NewConfig()
config.NewConfig()

ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))

Expand Down Expand Up @@ -141,7 +141,7 @@ func main() {
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("mimirrule-controller"),
RequeueAfterPeriod: cfg.MimirRuleRequeuePeriod,
RequeueAfterPeriod: config.Cfg.MimirRuleRequeuePeriod,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "MimirRule")
os.Exit(1)
Expand Down
2 changes: 1 addition & 1 deletion config/crd/bases/osko.dev_alertmanagerconfigs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ spec:
spec:
description: AlertManagerConfigSpec defines the desired state of AlertManagerConfig
properties:
secretRef:
configSecretRef:
description: |-
SecretReference represents a Secret Reference. It has enough information to retrieve secret
in any namespace
Expand Down
10 changes: 5 additions & 5 deletions config/crd/bases/osko.dev_mimirrules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,11 @@ spec:
type: string
for:
description: |-
Duration wraps time.Duration. It is used to parse the custom duration format
from YAML.
This type should not propagate beyond the scope of input/output processing.
format: int64
type: integer
Duration is a valid time duration that can be parsed by Prometheus model.ParseDuration() function.
Supported units: y, w, d, h, m, s, ms
Examples: `30s`, `1m`, `1h20m15s`, `15d`
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
type: string
keep_firing_for:
description: |-
Duration wraps time.Duration. It is used to parse the custom duration format
Expand Down
5 changes: 4 additions & 1 deletion config/samples/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
## Append samples of your project ##
resources:
- osko_v1alpha1_alertmanagerconfig.yaml
- openslo_v1_datasource.yaml
- openslo_v1_slo.yaml
- config_secret.yaml
- osko_v1alpha1_alertmanagerconfig.yaml
# +kubebuilder:scaffold:manifestskustomizesamples
2 changes: 0 additions & 2 deletions config/samples/openslo_v1_datasource.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@ spec:
description: Mimir Datasource for logging tenant
type: mimir
connectionDetails:
address: https://mimir.monitoring.dev.heu.group/
address: http://localhost:9009/
sourceTenants:
- gatekeeper-system
targetTenant: gatekeeper-system
- monitoring
targetTenant: monitoring
28 changes: 14 additions & 14 deletions config/samples/openslo_v1_slo.yaml
Original file line number Diff line number Diff line change
@@ -1,39 +1,39 @@
apiVersion: openslo.com/v1
kind: SLO
metadata:
name: mimir-ingestion-latency
labels:
label.osko.dev/team: "infrastructure"
label.osko.dev/system: "gatekeeper"
label.osko.dev/domain: "security"
label.osko.dev/team: "infra"
label.osko.dev/system: "monitoring"
label.osko.dev/domain: "observability"
label.osko.dev/service: "mimir"
annotations:
osko.dev/datasourceRef: "mimir-infra-ds"
osko.dev/magicAlerting: "true"
name: gatekeeper-webhook-response-time
spec:
budgetingMethod: Occurrences
description: 99% of Gatekeeper webhook requests return in less than 0.5s
description: 95% of all queries should have a latency of less than 300 milliseconds
indicator:
metadata:
name: gatekeeper-webhook-less-than-05s
name: distributor-query-success-latency
spec:
description: 99% of Gatekeeper webhook requests return in less than 0.5s
description: 95% of all queries should have a latency of less than 500 milliseconds
ratioMetric:
good:
metricSource:
metricSourceRef: mimir-infra-ds
type: Mimir
spec:
query: controller_runtime_webhook_latency_seconds_bucket{le="0.5", job="gatekeeper-metrics"}
query: cortex_distributor_query_duration_seconds_bucket{le="0.5", method="Distributor.QueryStream", status_code="200"}
total:
metricSource:
metricSourceRef: mimir-infra-ds
type: Mimir
spec:
query: controller_runtime_webhook_latency_seconds_count{job="gatekeeper-metrics"}
query: cortex_distributor_query_duration_seconds_count{method="Distributor.QueryStream"}
objectives:
- displayName: gatekeeper-webhook-less-than-05s
target: '0.99'
service: testing
- target: "0.99"
service: mimir
timeWindow:
- duration: 28d
isRolling: true
- duration: 28d
isRolling: true
11 changes: 10 additions & 1 deletion devel/grafana-agent/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,19 @@ data:
}
}
}
prometheus.scrape "static" {
prometheus.relabel "cluster" {
rule {
target_label = "cluster"
replacement = "local"
}
forward_to = [
prometheus.remote_write.local.receiver,
]
}
prometheus.scrape "static" {
forward_to = [
prometheus.relabel.cluster.receiver,
]
targets = [
{
"__address__" = "mimir-service:9009",
Expand Down
13 changes: 8 additions & 5 deletions devel/grafana-agent/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ spec:
- name: grafana-agent
image: grafana/agent:latest
args:
- 'run'
- '/etc/agent/agent.river'
- '--storage.path=/tmp/agent'
- '--server.http.listen-addr=127.0.0.1:80'
- '--server.http.ui-path-prefix=/'
- "run"
- "/etc/agent/agent.river"
- "--storage.path=/tmp/agent"
- "--server.http.listen-addr=127.0.0.1:12345"
- "--server.http.ui-path-prefix=/"
volumeMounts:
- name: config-volume
mountPath: /etc/agent
Expand All @@ -31,6 +31,9 @@ spec:
valueFrom:
fieldRef:
fieldPath: spec.nodeName
ports:
- containerPort: 12345
name: http-agent

volumes:
- name: config-volume
Expand Down
Loading
Loading